In [11]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n", chunk_size=600, chunk_overlap=100
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)

chain.run("Describe Victory Mansions")

"Victory Mansions is the building where Winston Smith lives. It is described as having glass doors at the entrance, but the hallway has a strong smell of boiled cabbage and old rag mats. The building is run-down, with rotting nineteenth-century houses surrounding it. The flats are located on the upper floors, and Winston's flat is seven flights up. The building is poorly maintained, with the elevator rarely working and the electricity being cut off during daylight hours as part of an economy drive. Inside the flat, there is a telescreen on the wall, which cannot be completely shut off. Overall, Victory Mansions is depicted as a grim and deteriorating place to live."