In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI(
    temperature=0.1
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)

loader = UnstructuredFileLoader("./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()


map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs['documents']
    question = inputs['question']
    return "\n\n".join(map_doc_chain.invoke({
        "context": doc.page_content,
        "question": question
    }).content for doc in documents)

map_chain = {"documents": retriever, "question": RunnablePassthrough()} | RunnableLambda(map_docs)


final_prompt = ChatPromptTemplate.from_messages([
    ("system", 
     """Given the following extracted parts of a long document and a question, create a final answer. 
     If you don't know the answer, just say that you don't know. Don't try to make up an answer.
     ------
     {context}
     """),
     ("human", "{question}")
])

chain = {"context": map_chain , "question": RunnablePassthrough()} | final_prompt | llm

In [4]:
chain.invoke("Describe Victory Mansions")

AIMessage(content="Victory Mansions is a building located in London, specifically in Airstrip One, which is the chief city of Oceania. It is a residential building complex where Winston resides. The building is described as having glass doors at the entrance. The hallway of Victory Mansions has a distinct smell of boiled cabbage and old rag mats. On one wall of the hallway, there is a large colored poster depicting the face of a man in his forties with a black mustache. \n\nThe flat in Victory Mansions is situated on the seventh floor. Winston, who is described as a small and frail figure wearing blue overalls, takes his time climbing the stairs due to his varicose ulcer. On each landing of the building, there is a poster with the face of Big Brother, which seems to follow you with its eyes. \n\nInside Winston's flat, there is a telescreen, which is an oblong metal plaque on the right-hand wall. The telescreen cannot be completely shut off and serves as a constant surveillance device. 

In [5]:
chain.invoke("Where does Winston go to work?")

AIMessage(content='Winston goes to work at the Ministry of Truth.')