In [None]:
### Installs ###
# %pip install langchain openai pypdf faiss-cpu python-dotenv tiktoken

In [1]:
from dotenv import load_dotenv
load_dotenv()

import os
import glob

In [2]:
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

# Setup

In [None]:
def find_pdf_files(directory):
    os.chdir(directory)

    pdf_files = []

    for file in glob.glob('**/*.pdf', recursive=True):
        absolute_path = os.path.abspath(file)
        pdf_files.append(absolute_path)
        
    pdf_files.sort()

    return pdf_files

In [None]:
def load_and_split(path):
    loader = PyPDFLoader(file_path=path)
    documents = loader.load()

    text_splitter = CharacterTextSplitter(
            chunk_size=1000, chunk_overlap=0, separator="\n"
        )
    return text_splitter.split_documents(documents=documents)

In [None]:
pdf_files = [
    # "/Users/aditkapoor/Local Documents/Work/Cognizant/langchain-proj/assets/data/analogue-mind.pdf",
    "/Users/aditkapoor/Local Documents/Work/Cognizant/langchain-proj/assets/data/gray-city.pdf",
    "/Users/aditkapoor/Local Documents/Work/Cognizant/langchain-proj/assets/data/irl.pdf",
    "/Users/aditkapoor/Local Documents/Work/Cognizant/langchain-proj/assets/data/joeys-journey.pdf",
    "/Users/aditkapoor/Local Documents/Work/Cognizant/langchain-proj/assets/data/singing-peddler.pdf",
    "/Users/aditkapoor/Local Documents/Work/Cognizant/langchain-proj/assets/data/memoirs.pdf",
    "/Users/aditkapoor/Local Documents/Work/Cognizant/langchain-proj/assets/data/small-little-circle.pdf",
    "/Users/aditkapoor/Local Documents/Work/Cognizant/langchain-proj/assets/data/veracious.pdf",
]

In [None]:
embeddings = OpenAIEmbeddings()
vectorstores = []

for file_path in pdf_files:
    docs = load_and_split(file_path)
    vectorstore = FAISS.from_documents(docs, embeddings)
    vectorstores.append(vectorstore)

In [None]:
### DEBUG CELL ###
# vectorstores[0].docstore._dict

In [None]:
for i in range(1, len(vectorstores)):
    vectorstores[0].merge_from(vectorstores[i])
    
vectorstores[0].save_local("faiss_index_project")

# Demonstration

In [3]:
embeddings = OpenAIEmbeddings()
new_vectorstore = FAISS.load_local("faiss_index_project", embeddings)
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type='stuff', retriever=new_vectorstore.as_retriever())

In [4]:
result = qa.run("Who did Clyde yet at about giving omniscient third person narrators too much information? If you don't know, say you don't know.")
print(result)

 Clyde's mom.


In [5]:
result = qa.run("In the memoir, why was the father sad after receiving the letter?")
print(result)

 The father was sad after receiving the letter because it was the last letter he had from his son, who had been killed two weeks prior.


In [6]:
result = qa.run("In the crowded streets of Delhi, what stories do the patches on people's clothing tell, as described in the poem? Reflect on the diversity and history embedded in these patches.")
print(result)

 The patches on people's clothing in the crowded streets of Delhi tell stories of their life hardships and struggles. The diversity and history of these patches reflect the diversity of the city, with its mix of huge mansions and slums, and its long history of people from many different backgrounds living together.


In [7]:
result = qa.run("How does \"IRL\" portray the world post-internet and modern relationships?")
print(result)


IRL portrays the world post-internet and modern relationships as a place where people can meet and form connections with each other through online interactions, and then meet in person and have a real, meaningful experience together. It shows that even though people may be distant from each other geographically, they can still form strong connections and relationships.


In [8]:
result = qa.run("In \"Veracious\", why is the author so mad at his Uncle despite his untimely death? Give me around 5 sentences.")
print(result)

 In "Veracious", the author is mad at his Uncle despite his untimely death because he had been getting his life together in the last few months. The author had looked up to him as a child, and his death has left his mother vulnerable and broken. The author feels that his Uncle was selfish in death for leaving his grandmother alone in the big house and leaving everyone to deal with the loss of two brothers in such a short time. Furthermore, the author feels that his Uncle could have waited to die and allowed his family to spend more time with him. The death of his Uncle has left a deep hole in the author's heart that will never be filled.


In [9]:
result = qa.run("Describe the Author's relationship with Maggi. Why was he so attached to her?")
print(result)

 The author was very attached to Maggi because she was comforting and understanding in a difficult time. Maggi was present with the author when he was grieving over his student's death and provided him comfort and companionship. He found solace in Maggi's presence and was grateful for her companionship.
