In [26]:
import os
from langchain_community.document_loaders.directory import DirectoryLoader
from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader, PyPDFLoader
from langchain_openai import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

In [4]:
# Load the Research papers pdfs
dir_path = "ml_research_papers/"
loader = PyPDFDirectoryLoader(dir_path)

docs = loader.load()

In [13]:
# Split the research papers into chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splitted_docs = text_splitter.split_documents(docs)

In [14]:
#Create and store embeddings in database

persist_dir = "db"

openai_embedding = OpenAIEmbeddings()

vectordb = Chroma.from_documents(
    documents=splitted_docs,
    embedding=openai_embedding,
    persist_directory=persist_dir
)

In [15]:
vectordb.persist()

In [16]:
vectordb = Chroma(
    persist_directory=persist_dir, 
    embedding_function=openai_embedding
)

# Making Retriever

In [17]:
retriever = vectordb.as_retriever()

In [21]:
result_docs = retriever.get_relevant_documents("the encoder maps an input sequence of symbol representations") # pass any query/line from research paper

result_docs

[Document(page_content='Here, the encoder maps an input sequence of symbol representations (x1, ..., x n)to a sequence\nof continuous representations z= (z1, ..., z n). Given z, the decoder then generates an output\nsequence (y1, ..., y m)of symbols one element at a time. At each step the model is auto-regressive\n[10], consuming the previously generated symbols as additional input when generating the next.\n2', metadata={'page': 1, 'source': 'ml_research_papers\\attention_is_all_you_need.pdf'}),
 Document(page_content='tokens in the sequence. To this end, we add "positional encodings" to the input embeddings at the\nbottoms of the encoder and decoder stacks. The positional encodings have the same dimension dmodel\nas the embeddings, so that the two can be summed. There are many choices of positional encodings,\nlearned and fixed [9].\nIn this work, we use sine and cosine functions of different frequencies:\nPE(pos,2i)=sin(pos/100002i/d model)\nPE(pos,2i+1)=cos(pos/100002i/d model)\nwh

In [22]:
print(result_docs[0].page_content)

Here, the encoder maps an input sequence of symbol representations (x1, ..., x n)to a sequence
of continuous representations z= (z1, ..., z n). Given z, the decoder then generates an output
sequence (y1, ..., y m)of symbols one element at a time. At each step the model is auto-regressive
[10], consuming the previously generated symbols as additional input when generating the next.
2


In [23]:
retriever.search_type

'similarity'

# Interacti with the PDFs with LLMS

In [27]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "")

In [30]:
qa_chain = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

In [32]:
def preprocess_llm_response(resp):
    print(resp["result"])
    print("\n \n Supporting Documents")
    for src_docs in resp["source_documents"]:
        print(src_docs)

In [36]:
llm_resp = qa_chain.invoke({"query": "What was different in self attention?"})

preprocess_llm_response(llm_resp)

 Self-attention is an attention mechanism that relates different positions of a single sequence in order to compute a representation of the sequence. This is different from recurrent and convolutional layers, which are commonly used for mapping one variable-length sequence of symbol representations to another sequence of equal length. Self-attention is also faster than recurrent layers in terms of computational complexity. 

 
 Supporting Documents
page_content='in the distance between positions, linearly for ConvS2S and logarithmically for ByteNet. This makes\nit more difficult to learn dependencies between distant positions [ 12]. In the Transformer this is\nreduced to a constant number of operations, albeit at the cost of reduced effective resolution due\nto averaging attention-weighted positions, an effect we counteract with Multi-Head Attention as\ndescribed in section 3.2.\nSelf-attention, sometimes called intra-attention is an attention mechanism relating different positions\nof