In [1]:
from langchain_openai import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(return_messages=True)

llm = ChatOpenAI(temperature=0.1)

cache_dir = LocalFileStore('./cache/')

splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)

loader = UnstructuredFileLoader('./document.txt')
documents = loader.load_and_split(text_splitter=splitter)

  memory = ConversationBufferMemory(return_messages=True)
  loader = UnstructuredFileLoader('./document.txt')
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
Created a chunk of size 717, which is longer than the specified 600
Created a chunk of size 608, which is longer than the specified 600
Created a chunk of size 642, which is longer than the specified 600
Created a chunk of size 1444, which is longer than the specified 600
Created a chunk of size 1251, which is longer than the specified 600
Created a chunk of size 1012, which is longer than the specified 600
Created a chunk of size 2313, which is longer than the specified 600
Created a chunk of size 1458, which is longer than the specified 600
Created a chunk of size 1673, which is longer than the specified 600
Created a chunk of size 742, which is longer than the specified 600
Created a chunk of size 669, which is longer than the specified 600
Created a chunk of si

In [2]:
embeddings = CacheBackedEmbeddings.from_bytes_store(
    OpenAIEmbeddings(),
    cache_dir
)

vector_store = Chroma.from_documents(
    documents=documents,
    embedding=embeddings,
)

  OpenAIEmbeddings(),


In [3]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant. Answer the question using only the following context. If you don't know the answer just say 'I don't know', don't make it up:\n\n{context}"),
        ("human", "{question}"),
    ]
)

retriever = vector_store.as_retriever()

chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm

In [4]:
def add_message(question):
    response = chain.invoke(question)
    memory.save_context({"input": question}, {"output": response.content})
    return response

add_message("Is Aaronson guilty?")
add_message("What message did he write in the table?")
add_message("Who is Julia?")


AIMessage(content='Julia is a character mentioned in the provided text excerpts.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 557, 'total_tokens': 570, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-eebeb906-2ed0-4a26-b86d-18cb5f8e291a-0', usage_metadata={'input_tokens': 557, 'output_tokens': 13, 'total_tokens': 570, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [5]:
memory.load_memory_variables({})

{'history': [HumanMessage(content='Is Aaronson guilty?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What message did he write in the table?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='He wrote: "FREEDOM IS SLAVERY" and "TWO AND TWO MAKE FIVE" on the table.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Who is Julia?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Julia is a character mentioned in the provided text excerpts.', additional_kwargs={}, response_metadata={})]}