In [3]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain_openai import OpenAI
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.document_loaders import PyPDFDirectoryLoader

In [4]:
loader = PyPDFDirectoryLoader("pdfs")

In [6]:
data = loader.load()

In [7]:
data[0]

Document(metadata={'source': 'pdfs\\NIPS-2017-attention-is-all-you-need-Paper.pdf', 'page': 0}, page_content='Attention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗†\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show

In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                                chunk_overlap=20
)

In [9]:
text_chunks = text_splitter.split_documents(data) 

In [10]:
print(text_chunks[75].page_content)

[31] Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V Le, Mohammad Norouzi, Wolfgang
Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey, et al. Google’s neural machine
translation system: Bridging the gap between human and machine translation. arXiv preprint
arXiv:1609.08144 , 2016.
[32] Jie Zhou, Ying Cao, Xuguang Wang, Peng Li, and Wei Xu. Deep recurrent models with
fast-forward connections for neural machine translation. CoRR , abs/1606.04199, 2016.
11


### Creating the DB

In [15]:
from langchain import embeddings

In [16]:
persist_directory = 'db'

embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=text_chunks,
                                embedding=embedding,
                                persist_directory=persist_directory)

In [17]:
vectordb.persist() # persist to your disk. 

In [18]:
vectordb = None

In [20]:
type(vectordb)

NoneType

In [21]:
vectordb = Chroma(persist_directory = persist_directory,
                 embedding_function=embedding)

### Make retriever

In [22]:
retriver = vectordb.as_retriever()

In [23]:
docs = retriver.get_relevant_documents("How many attention heads was using in this paper?")

In [25]:
len(docs)

4

In [27]:
docs[2]

Document(metadata={'page': 4, 'source': 'pdfs\\NIPS-2017-attention-is-all-you-need-Paper.pdf'}, page_content='MultiHead( Q,K,V ) = Concat(head 1,...,head h)WO\nwhere head i= Attention( QWQ\ni,KWK\ni,VWV\ni)\nWhere the projections are parameter matrices WQ\ni∈Rdmodel×dk,WK\ni∈Rdmodel×dk,WV\ni∈Rdmodel×dv\nandWO∈Rhdv×dmodel.\nIn this work we employ h= 8 parallel attention layers, or heads. For each of these we use\ndk=dv=dmodel/h= 64 . Due to the reduced dimension of each head, the total computational cost\nis similar to that of single-head attention with full dimensionality.')

In [28]:
retrive = vectordb.as_retriever(search_kwargs = {'k':2})
retrive.search_kwargs

{'k': 2}

In [29]:
doc = retrive.get_relevant_documents("How many attention heads was using in this paper?")
len(doc)

2

In [30]:
doc

[Document(metadata={'page': 7, 'source': 'pdfs\\NIPS-2017-attention-is-all-you-need-Paper.pdf'}, page_content='development set, newstest2013. We used beam search as described in the previous section, but no\ncheckpoint averaging. We present these results in Table 3.\nIn Table 3 rows (A), we vary the number of attention heads and the attention key and value dimensions,\nkeeping the amount of computation constant, as described in Section 3.2.2. While single-head\nattention is 0.9 BLEU worse than the best setting, quality also drops off with too many heads.'),
 Document(metadata={'page': 10, 'source': 'pdfs\\NIPS-2017-attention-is-all-you-need-Paper.pdf'}, page_content='[21] Minh-Thang Luong, Hieu Pham, and Christopher D Manning. Effective approaches to attention-\nbased neural machine translation. arXiv preprint arXiv:1508.04025 , 2015.\n[22] Ankur Parikh, Oscar Täckström, Dipanjan Das, and Jakob Uszkoreit. A decomposable attention\nmodel. In Empirical Methods in Natural Language Process