In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

llm = ChatOpenAI(
    temperature=0.1,
)

memory = ConversationBufferMemory(
    llm=llm,
    max_token_limit=120,
    return_messages=True,
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/document.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriver = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)


def load_memory(_):
    return memory.load_memory_variables({})["history"]


def retrieve_context(inputs):
    docs = retriver.get_relevant_documents(inputs["question"])
    return "\n\n".join([doc.page_content for doc in docs])


chain = (
    {
        "context": retrieve_context,
        "question": RunnablePassthrough(),
        "history": load_memory,
    }
    | prompt
    | llm
)


def invoke_chain(question):
    result = chain.invoke({"question": question})
    memory.save_context(
        {"input": question},
        {"output": result.content},
    )
    print(result)


invoke_chain("Is Aaronson guilty?")

  memory = ConversationBufferMemory(
  loader = UnstructuredFileLoader("./files/document.txt")
  docs = retriver.get_relevant_documents(inputs["question"])


content='Yes, according to the information provided, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 2163, 'total_tokens': 2190, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-BEHJxU0jc90ApX9jItTKisL1bC4lD', 'finish_reason': 'stop', 'logprobs': None} id='run-a734df34-9e59-47ca-8758-34968b9b8628-0' usage_metadata={'input_tokens': 2163, 'output_tokens': 27, 'total_tokens': 2190, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [7]:
invoke_chain("What message did he write on the table?")

content='He wrote "2+2=5" on the table.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 2309, 'total_tokens': 2323, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-BEHKL7FqV7ZCGszXoAT0xn1WUwP9R', 'finish_reason': 'stop', 'logprobs': None} id='run-e45426e9-cf9c-4be1-87f3-f26e12fadadc-0' usage_metadata={'input_tokens': 2309, 'output_tokens': 14, 'total_tokens': 2323, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [6]:
invoke_chain("Who is Julia?")

content="I don't know." additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 2140, 'total_tokens': 2146, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-BEHKHJsgpFubuChFJ7LJk6f9taK97', 'finish_reason': 'stop', 'logprobs': None} id='run-253ba014-c4c9-43ad-a764-163739efa1c9-0' usage_metadata={'input_tokens': 2140, 'output_tokens': 6, 'total_tokens': 2146, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
