In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader(f"./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)

chain.run("Describe Victory Mansions")

  llm = ChatOpenAI()
  loader = UnstructuredFileLoader(f"./files/chapter_one.txt")
  embeddings = OpenAIEmbeddings()
  _warn_about_sha1_encoder()
  chain.run("Describe Victory Mansions")


'Victory Mansions is a building where Winston Smith resides in the novel "1984." It is a run-down apartment complex located in Airstrip One, which is a province of Oceania. The hallway of Victory Mansions smells of boiled cabbage and old rag mats. The building has a faulty lift and Winston often has to climb seven flights of stairs to reach his flat. Inside the flat, there is a telescreen, which is a device that continuously broadcasts propaganda and can never be fully turned off. The neighborhood surrounding Victory Mansions is depicted as grimy, with dilapidated buildings and bombed sites. The overall atmosphere of Victory Mansions reflects the oppressive and austere setting of the dystopian society depicted in the novel.'