In [3]:
from langchain_community.document_loaders import PyPDFLoader
from colorama import Fore, Style
from pprint import pprint
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

import os
from dotenv import load_dotenv

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage

In [4]:
load_dotenv()

True

In [5]:
loader = PyPDFLoader("data/attention.pdf")
print(f"{Fore.RED}Loading document: attention.pdf{Style.RESET_ALL}")
docs = loader.load()

print(f"{Fore.RED}Type of docs: {Style.RESET_ALL}{type(docs)}")
print(f"{Fore.RED}Length of docs: {Style.RESET_ALL}{len(docs)}")
print(f"{Fore.RED}Type of list item: {Style.RESET_ALL}{type(docs[0])}")

[31mLoading document: attention.pdf[0m
[31mType of docs: [0m<class 'list'>
[31mLength of docs: [0m15
[31mType of list item: [0m<class 'langchain_core.documents.base.Document'>


In [11]:
# for document in docs:
#     print(document)

In [6]:
splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
splitted_docs = splitter.split_documents(docs)

print(f"{Fore.RED}Type of splitted docs: {Style.RESET_ALL}{type(splitted_docs)}")
print(f"{Fore.RED}Length of splitted docs: {Style.RESET_ALL}{len(splitted_docs)}")
print(f"{Fore.RED}Type of list item: {Style.RESET_ALL}{type(splitted_docs[0])}")

db = FAISS.from_documents(splitted_docs, embedding=OpenAIEmbeddings())

llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-3.5-turbo",
                 temperature=0.5)

prompt = ChatPromptTemplate.from_template("""
                                      Hello! you are a helpful assistant who can help answer queries based on provided context.
                                      {context}
                                      Question: {input}
                                      """)
# prompt = ChatPromptTemplate.from_messages([
#     SystemMessage(content="Hello! you are a helpful assistant who can help answer queries based on provided context."),
#     HumanMessage(content=f"Below is the context: \n {context} and the question is: {input}"),
# ])

document_chain = create_stuff_documents_chain(llm, prompt)
retriever = db.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)



[31mType of splitted docs: [0m<class 'list'>
[31mLength of splitted docs: [0m27
[31mType of list item: [0m<class 'langchain_core.documents.base.Document'>


In [7]:
# getting the query from user
# user_query = input(f"{Fore.RED}Enter your query from the document: {Style.RESET_ALL}")
user_query = input("Enter your query from the document:")

In [10]:
# invoking the retrieval chain
response = retrieval_chain.invoke({"input":user_query})
print(response)
print(f"{Fore.RED}Answer from RAG: {Style.RESET_ALL}\n{response['answer']}")

{'input': 'What is attention?', 'context': [Document(id='531a7a82-ad66-4382-8063-9da27bfe05ce', metadata={'source': 'data/attention.pdf', 'page': 12, 'page_label': '13'}, page_content='Attention Visualizations\nInput-Input Layer5\nIt\nis\nin\nthis\nspirit\nthat\na\nmajority\nof\nAmerican\ngovernments\nhave\npassed\nnew\nlaws\nsince\n2009\nmaking\nthe\nregistration\nor\nvoting\nprocess\nmore\ndifficult\n.\n<EOS>\n<pad>\n<pad>\n<pad>\n<pad>\n<pad>\n<pad>\nIt\nis\nin\nthis\nspirit\nthat\na\nmajority\nof\nAmerican\ngovernments\nhave\npassed\nnew\nlaws\nsince\n2009\nmaking\nthe\nregistration\nor\nvoting\nprocess\nmore\ndifficult\n.\n<EOS>\n<pad>\n<pad>\n<pad>\n<pad>\n<pad>\n<pad>\nFigure 3: An example of the attention mechanism following long-distance dependencies in the\nencoder self-attention in layer 5 of 6. Many of the attention heads attend to a distant dependency of\nthe verb ‘making’, completing the phrase ‘making...more difficult’. Attentions here shown only for\nthe word ‘making’. 