In [1]:
!pip install langchain langchain-openai pypdf unstructured pdfminer.six pdf2image pillow_heif unstructured_inference pikepdf chromadb lark

import os
os.environ["OPENAI_API_KEY"] = "YOUR API KEY"

Collecting langchain
  Downloading langchain-0.1.9-py3-none-any.whl (816 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/817.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.8/817.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━[0m [32m553.0/817.0 kB[0m [31m8.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m809.0/817.0 kB[0m [31m9.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m817.0/817.0 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-openai
  Downloading langchain_openai-0.0.7-py3-none-any.whl (33 kB)
Collecting pypdf
  Downloading pypdf-4.0.2-py3-none-any.whl (283 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.0/284.0 kB[0m [31m11.2 MB/s[0m eta [36

In [2]:
from langchain.document_loaders import OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

recursive_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=200,
    separators=["\n\n", "\n", " ", ""]
)

# Load and Split Data
loader = OnlinePDFLoader("https://arxiv.org/pdf/1706.03762.pdf")
data = loader.load_and_split(text_splitter=recursive_splitter)

# Store data in db
db = Chroma.from_documents(data, OpenAIEmbeddings())
print(db._collection.count())

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


23


# Asking Questions About The Paper

In [4]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)

In [6]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=db.as_retriever()
)

question = "What does the encoder portion of the transformer do?"

result = qa_chain.invoke({"query": question})
result

{'query': 'What does the encoder portion of the transformer do?',
 'result': 'The encoder portion of the Transformer maps an input sequence of symbol representations to a sequence of continuous representations using stacked self-attention and point-wise, fully connected layers. The encoder consists of a stack of identical layers, each containing a multi-head self-attention mechanism and a position-wise fully connected feed-forward network. The encoder also employs residual connections and layer normalization to enhance the learning process.'}

In [7]:
result["result"]

'The encoder portion of the Transformer maps an input sequence of symbol representations to a sequence of continuous representations using stacked self-attention and point-wise, fully connected layers. The encoder consists of a stack of identical layers, each containing a multi-head self-attention mechanism and a position-wise fully connected feed-forward network. The encoder also employs residual connections and layer normalization to enhance the learning process.'

### Make It Return Its Source Documents

In [8]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=db.as_retriever(),
    return_source_documents=True
)

question = "What does the encoder portion of the transformer do?"

result = qa_chain.invoke({"query": question})
result

{'query': 'What does the encoder portion of the transformer do?',
 'result': 'The encoder portion of the Transformer maps an input sequence of symbol representations to a sequence of continuous representations using stacked self-attention and point-wise, fully connected layers. The encoder consists of a stack of identical layers, each containing a multi-head self-attention mechanism and a position-wise fully connected feed-forward network. The encoder also employs residual connections and layer normalization to enhance the learning process.',
 'source_documents': [Document(page_content='3 Model Architecture\n\nMost competitive neural sequence transduction models have an encoder-decoder structure [5, 2, 35]. Here, the encoder maps an input sequence of symbol representations (x1, ..., xn) to a sequence of continuous representations z = (z1, ..., zn). Given z, the decoder then generates an output sequence (y1, ..., ym) of symbols one element at a time. At each step the model is auto-regre

# Lets have a conversation about the paper

In [9]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory


memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

retriever=db.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [10]:
result = qa.invoke({"question": question})
result['answer']

"The encoder portion of the Transformer maps an input sequence of symbol representations to a sequence of continuous representations using stacked self-attention and point-wise, fully connected layers. The encoder consists of a stack of identical layers, each containing a multi-head self-attention mechanism and a position-wise fully connected feed-forward network. The output of each sub-layer is passed through a residual connection and layer normalization. The encoder's role is to process the input sequence and create meaningful continuous representations that can be used by the decoder to generate an output sequence."

In [11]:
question = "How does the multi-head self-attention mechanism work?"
result = qa.invoke({"question": question})
result['answer']

'The multi-head self-attention mechanism works by linearly projecting the queries, keys, and values multiple times with different learned linear projections to different dimensions. These projected versions are then used to perform the attention function in parallel, allowing the model to jointly attend to information from different representation subspaces at different positions. The outputs from these parallel attention functions are concatenated, projected again, and used as the final values. This approach enables the model to capture dependencies and relationships across different parts of the input sequence effectively.'

In [12]:
question = "On which dimension are they concatenated?"
result = qa.invoke({"question": question})
result['answer']

'The outputs from the parallel attention functions are concatenated on the dimension of the final values, which is the dv-dimensional output values.'

In [13]:
question = "Do you mean they are concatenated on the columns?"
result = qa.invoke({"question": question})
result['answer']

'Yes, the outputs from the parallel attention functions are concatenated along the columns.'

### BONUS

In [15]:
# Build prompt
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
from langchain.chains import RetrievalQA
question = "How did the the transformer network perform?"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=db.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})


result = qa_chain.invoke({"query": question})
result["result"]

'The Transformer network performed well, achieving state-of-the-art results on machine translation tasks such as English-to-German and English-to-French. The model outperformed previously reported models and ensembles, even with a base model. Thanks for asking!'