In [26]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory
from langchain.callbacks import StreamingStdOutCallbackHandler

llm = ChatOpenAI(
  temperature=0.1,
  streaming=True,
  callbacks=[StreamingStdOutCallbackHandler()],
  model="gpt-3.5-turbo",
  )
memory = ConversationBufferMemory(
  llm=llm,
  return_messages=True,
)
#cache location
cache_dir = LocalFileStore("./.cache/embeddings/assignment04")

splitter = CharacterTextSplitter(
  separator = "\n",
  chunk_size = 600,
  chunk_overlap = 100,
)

#loading and spliiting the document
loader = UnstructuredFileLoader("./.cache/files/document.txt")
docs = loader.load_and_split(text_splitter=splitter)
#embedding and caching
embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
  embeddings,
  cache_dir,
)
vector_store = FAISS.from_documents(
  documents=docs,
  embedding=cached_embeddings,
)
#LCEL stuff chain
retriever=vector_store.as_retriever()
prompt = ChatPromptTemplate.from_messages([
  ("system", "You are a helpful assistant. Answer questions using the following context. If you don't knwo the answer just say you don't know, don't make it up:\n\n{context}"),
  MessagesPlaceholder(variable_name="history"),
  ("user", "{question}"),
])

def load_memory(_):
  return memory.load_memory_variables({})["history"]

chain = {
  "context": retriever, 
  "history": RunnableLambda(load_memory),
  "question": RunnablePassthrough(),
  } | prompt | llm

def invoke_chain(question):
  result = chain.invoke(question)
  memory.save_context({"input": question}, {"output": result.content})

Created a chunk of size 717, which is longer than the specified 600
Created a chunk of size 608, which is longer than the specified 600
Created a chunk of size 642, which is longer than the specified 600
Created a chunk of size 1444, which is longer than the specified 600
Created a chunk of size 1251, which is longer than the specified 600
Created a chunk of size 1012, which is longer than the specified 600
Created a chunk of size 1493, which is longer than the specified 600
Created a chunk of size 819, which is longer than the specified 600
Created a chunk of size 1458, which is longer than the specified 600
Created a chunk of size 1411, which is longer than the specified 600
Created a chunk of size 742, which is longer than the specified 600
Created a chunk of size 669, which is longer than the specified 600
Created a chunk of size 906, which is longer than the specified 600
Created a chunk of size 703, which is longer than the specified 600
Created a chunk of size 1137, which is lon

In [27]:
invoke_chain("Is Aaronson guilty?")

According to the document, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.

In [28]:
invoke_chain("What message did he write on the table?")

He wrote "FREEDOM IS SLAVERY" and "TWO AND TWO MAKE FIVE" on the table.

In [29]:
invoke_chain("Who is Julia?")

Julia is a character mentioned in the documents provided. She is someone who the main character has strong feelings for and is willing to protect, even at his own expense.

In [30]:
invoke_chain("Why O'Brien is angry?")

O'Brien is angry because Winston made a statement that O'Brien found to be foolish and dangerous. This caused O'Brien to increase the level of pain inflicted on Winston using the dial.

In [31]:
memory.load_memory_variables({})

{'history': [HumanMessage(content='Is Aaronson guilty?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='According to the document, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What message did he write on the table?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='He wrote "FREEDOM IS SLAVERY" and "TWO AND TWO MAKE FIVE" on the table.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Who is Julia?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Julia is a character mentioned in the documents provided. She is someone who the main character has strong feelings for and is willing to protect, even at his own expense.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content="Why O'Brien is angry?", additional_kwargs={}, response_metadata={}),
  AIMessage(content="O'Brien is angry because Winsto