In [21]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_openai import OpenAI
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import os

In [None]:
!mkdir pdfs

In [2]:
loader = PyPDFDirectoryLoader("pdfs")

In [3]:
data = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)

In [5]:
text_chunks = text_splitter.split_documents(data)

In [6]:
print(text_chunks[0])

page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto' metadata={'source': 'pdfs\\transformer-paper.pdf', 'page': 0}


In [7]:
print(text_chunks[1].page_content)

aidan@cs.toronto.eduŁukasz Kaiser∗
Google Brain
lukaszkaiser@google.com
Illia Polosukhin∗ ‡
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural networks that include an encoder and a decoder. The best
performing models also connect the encoder and decoder through an attention
mechanism. We propose a new simple network architecture, the Transformer,


In [8]:
print(len(text_chunks))

91


In [9]:
os.environ['OPENAI_API_KEY'] = ""

In [10]:
embedding = OpenAIEmbeddings()

In [11]:
len(embedding.embed_query("How are you"))

1536

In [14]:

os.environ['PINECONE_API_KEY'] = ''

In [15]:
index_name = "genai-foundations"
docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunks], embedding, index_name = index_name)

In [16]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1d68aa29610>

In [17]:
query = "What is a transformer"

In [18]:
docs = docsearch.similarity_search(query)

In [19]:
docs

[Document(page_content='7 Conclusion\nIn this work, we presented the Transformer, the first sequence transduction model based entirely on\nattention, replacing the recurrent layers most commonly used in encoder-decoder architectures with\nmulti-headed self-attention.\nFor translation tasks, the Transformer can be trained significantly faster than architectures based\non recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014'),
 Document(page_content='7 Conclusion\nIn this work, we presented the Transformer, the first sequence transduction model based entirely on\nattention, replacing the recurrent layers most commonly used in encoder-decoder architectures with\nmulti-headed self-attention.\nFor translation tasks, the Transformer can be trained significantly faster than architectures based\non recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014'),
 Document(page_content='aidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszka

In [22]:
llm = OpenAI()

In [23]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=docsearch.as_retriever())

In [25]:
qa.invoke(query)

{'query': 'What is a transformer',
 'result': ' A transformer is a sequence transduction model that uses attention instead of recurrent layers to connect an encoder and decoder in neural networks. It was proposed as a simpler and faster alternative to existing architectures for translation tasks. '}

In [28]:
import sys
while True:
    user_input = input("Input Prompt: ")
    if user_input == 'exit':
        print('Exiting..')
        sys.exit()

    if user_input == "":
        continue
    result = qa.invoke(user_input)
    print(f"Answer: {result['result']}")

Answer:  Ashish, Noam, Niki, and Illia Polosukhin were all involved in the design and implementation of the Transformer model. It is not clear who was the sole inventor of the model, as it appears to have been a collaborative effort.
Exiting..


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
