This version uses Milvus through Docker Compose so you must have Docker installed to run this notebook (Milvus is spun up via `docker compose up -d` as shown in the block below)

In [None]:
# ! pip install pymilvus milvus langchain sentence-transformers tiktoken octoai-sdk
# docker compose up -d

In [1]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.embeddings import OctoAIEmbeddings
from langchain_community.vectorstores import Milvus
from dotenv import load_dotenv
import os

In [5]:
load_dotenv()
os.environ["OCTOAI_API_TOKEN"] = os.getenv("OCTOAI_API_TOKEN")
files = os.listdir("./data")
file_texts = []
llm = OctoAIEndpoint(
    endpoint_url="https://text.octoai.run/v1/chat/completions",
    model_kwargs={
        "model": "mixtral-8x7b-instruct-fp16",
        "max_tokens": 128,
        "presence_penalty": 0,
        "temperature": 0.01,
        "top_p": 0.9,
        "messages": [
            {
                "role": "system",
                "content": "You are a helpful assistant. Keep your responses limited to one short paragraph if possible.",
            },
        ],
    },
)
embeddings = OctoAIEmbeddings(endpoint_url="https://text.octoai.run/v1/embeddings")

In [6]:
for file in files:
    with open(f"./data/{file}",encoding='utf8') as f:
        file_text = f.read()
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=512, chunk_overlap=64, 
    )
    texts = text_splitter.split_text(file_text)
    for i, chunked_text in enumerate(texts):
        file_texts.append(Document(page_content=chunked_text, 
                metadata={"doc_title": file.split(".")[0], "chunk_num": i}))

In [7]:
vector_store = Milvus.from_documents(
    file_texts,
    embedding=embeddings,
    connection_args={"host": "localhost", "port": 19530},
    collection_name="water_reports"
)
retriever = vector_store.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = PromptTemplate.from_template(template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [11]:
question_array = [
    'What is the water quality for the city of NYC?',
    'What is the iron level for the city of Kent?',
    'What is the water quality for the city of Washington DC?',
    '''According to the level and filters document and the Kent water report,
       How many filters for chlorine do I need for the city of Kent ?'''
    ]

chain.invoke(question_array[3])

" Based on the provided Kent 2022 Water Quality Report, the Chlorine level is 1.86 ppm. However, the document does not provide information about filters or filter requirements. To determine the number of filters needed for Chlorine removal, you would need to consider the specific filter's removal capacity and the flow rate of the water system. Please consult the manufacturer's guidelines for the specific filter you are using or plan to use."