In [1]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough
from langchain.memory import ConversationBufferMemory

In [2]:
# load, split
splitter = CharacterTextSplitter.from_tiktoken_encoder( # .from_tiktoken_encoder(): count length of tokens (just how the model counts)
    separator="\n", # split standard
    chunk_size=600,
    chunk_overlap=100,
)
loader = TextLoader("./assignment_4.txt")
docs = loader.load_and_split(text_splitter=splitter)

# embed
cache_dir = LocalFileStore("./.cache")
embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)
vectorstore = FAISS.from_documents(docs, cached_embeddings)
retriever = vectorstore.as_retriever() # input: user question, output: list of relevant doc chunks

# llm
llm = ChatOpenAI()

# memory
memory = ConversationBufferMemory(
    return_messages=True, # keep all messages in memory
    memory_key="history", # key to use in prompt
)
def load_memory(_):
    return memory.load_memory_variables({})["history"]

# stuff
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            You are a helpful assistant. 
            Answer questions using only the following context. 
            \n
            {chunk}
            \n
            Also, remember the conversation history below.
            """,
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)

# chain
chain = {"chunk": retriever, "history": load_memory, "question": RunnablePassthrough()} | prompt | llm

In [3]:
chain.invoke("Is Aaronson guilty?")

AIMessage(content='Yes, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.', response_metadata={'token_usage': {'completion_tokens': 20, 'prompt_tokens': 2488, 'total_tokens': 2508, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5e518f4c-8dda-49db-88ea-7e85dd409f71-0')

In [4]:
chain.invoke("What message did he write in the table?")

AIMessage(content='He wrote: \n\nFREEDOM IS SLAVERY\nTWO AND TWO MAKE FIVE\nGOD IS POWER', response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 2466, 'total_tokens': 2491, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-dc8a5053-34fb-475c-aeaf-efcbb78a91ab-0')

In [5]:
chain.invoke("Who is Julia?")

AIMessage(content="Julia is a character mentioned in the text. She is someone who Winston has a connection with and has not betrayed, according to O'Brien.", response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 2478, 'total_tokens': 2507, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-0095570e-2102-43d0-a9c1-6d8f8c410f1f-0')