In [35]:
import langchain
from langchain.document_loaders import UnstructuredFileLoader
import os
from dotenv import load_dotenv
load_dotenv()
key = os.getenv("OPENAI_KEY")

In [36]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ChatVectorDBChain
from langchain.document_loaders import TextLoader

from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

In [78]:
loaders = []
docs = []

#create a list with the names of the files to be loaded in the data/policies folder
for file in os.listdir('data/policies'):
    if file.endswith('.txt'):
        loader = TextLoader('data/policies/' + file)
        loaders.append(loader)

for loader in loaders:
    docs.extend(loader.load())

In [79]:
text_splitter = CharacterTextSplitter(chunk_size=250, chunk_overlap=0)
documents = text_splitter.split_documents(docs)

embeddings = OpenAIEmbeddings(openai_api_key=key)
vectorstore = Chroma.from_documents(documents, embeddings)

Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.


In [80]:
len(documents)

119

In [81]:
system_template="""You are a sales agent for a pest control company. Your goal is to be be helpful for the customer and sell them a recurring pest, mosquito, rodent and/or termite program. Be informal and avoid long responses. Use the following pieces of context to answer the users question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
{context}"""
messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template("{question}")
]
prompt = ChatPromptTemplate.from_messages(messages)

In [82]:
qa = ChatVectorDBChain.from_llm(ChatOpenAI(temperature=0, openai_api_key=key), vectorstore,qa_prompt=prompt)

Exiting: Cleaning up .chroma directory


In [91]:
chat_history = []
query = "How long have you been in business?"
result = qa({"question": query, "chat_history": chat_history})

In [92]:
print(result["answer"])

We have been in business since 1968, so we have over 54 years of experience in pest control services.


0.004