# Building First RAG 

In [1]:
#Importing libraries 
from langchain_community.document_loaders import PyPDFLoader


In [2]:
#Capturing the location of the document
FILE_PATH="llama2.pdf"

In [3]:
#Creating the loader object
loader=PyPDFLoader(FILE_PATH)

#Perform the load operation 
pages=loader.load()

async data loading can also be done as follows:

In [4]:
pages = []
async for page in loader.alazy_load():
    pages.append(page)

In [5]:
len(loader.load())

77

There are 77 pages within the given document. 

In [6]:
#Perform Chunking using RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

#Creating the instance of Charac Splitter
splitter=RecursiveCharacterTextSplitter(
    chunk_size=500, #Hyper Parameter 
    chunk_overlap=50 #Hyper Parameter 
)

In [7]:
#Splitting the documents 
split_docs=splitter.split_documents(pages)

In [8]:
#Checking the splitted documents
len(split_docs)

615


we can see that our 77 documents got further split into 615

In [9]:
import faiss 
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore 

In [10]:
#Use Sentence Transfromer 
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [11]:

#Storing the splitted document into Vector Store
index=faiss.IndexFlatIP(384)

#Create vector store object
vector_store=FAISS(
    embedding_function=embeddings, 
    index=index, 
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [12]:
#Loading the dataonto Vector DB
vector_store.add_documents(documents=split_docs)

['2371086f-74fc-4048-a585-a7afdc1d224f',
 '2ef50e03-d300-4f76-87e5-fb899112ba35',
 'ee364638-1dc6-42cf-bebb-d0965df5dda9',
 'e13a1a8f-be8b-4d1a-b7fa-a1d9f6dca536',
 '8a1d6897-0fc4-4a9f-835f-a9949d087984',
 '99e8618e-f0eb-4547-96b8-b4d62d1c1b00',
 'b9b70bb3-a4d2-4c46-82c7-1bb1d80e4a76',
 '68cdc2ab-7a39-4f7e-b93d-5f93d078c270',
 '857f4bc2-27ec-4956-8f67-c263f8c750a7',
 '520cb994-d0b6-45ec-a6bd-c469ac235390',
 'aacb3f45-60aa-4581-ae7a-88378d1d350c',
 '9c4369b5-3c17-47d4-ad56-bfb4e65ee4b2',
 '277afc7c-bbd8-4048-96a5-3bda1c74d537',
 'c82da2c4-3ef3-436e-830b-b17229faf9e6',
 'af7be6ba-6e94-4e39-9caa-1561303c569e',
 '353540d0-c391-4db5-88cd-374c97f42b77',
 'e470730d-9582-435d-9c5c-8cb26adbca1e',
 '2c91c570-c27e-4016-adea-50cc16ec5d47',
 'ee1a32a2-04c7-40f2-9226-c0d551a37c36',
 '0e91974c-eb5a-413e-a745-9a1617503c43',
 'dbe7fc8f-6d0a-4951-bb71-f7cdefb9813e',
 '39abe80b-d763-4b47-bfac-37367cbb0e8a',
 '8fdbbbb1-7a72-4855-9c2f-9ec8d3f00cf0',
 'e9f66e84-e6c7-43c6-9cb7-b2f6f1f259ab',
 'bf6af698-c950-

In [None]:
#Creating a retriver object
retriever=vector_store.as_retriever(
    search_kwargs={"k":5} #HyperPArameter, specify the number of records
)

In [14]:
#Ask Retriver a question 
retriever.invoke("What is a llama model?")

[Document(id='bf6af698-c950-41d5-9c43-8fd31132de3c', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'llama2.pdf', 'total_pages': 77, 'page': 3, 'page_label': '4'}, page_content='work (Section 6), and conclusions (Section 7).\n‡https://ai.meta.com/resources/models-and-libraries/llama/\n§We are delaying the release of the 34B model due to a lack of time to sufficiently red team.\n¶https://ai.meta.com/llama\n‖https://github.com/facebookresearch/llama\n4'),
 Document(id='8b9e4d30-444a-43b3-bbef-9db67d01eaeb', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:3

In [15]:
#Creating the model instance 
from langchain_google_genai import ChatGoogleGenerativeAI
model=ChatGoogleGenerativeAI(model='gemini-1.5-flash')

In [16]:
#Building a RAG Prompt 
from langchain import hub 
prompt=hub.pull("rlm/rag-prompt")

In [17]:
#Display the prompt output 
import pprint
pprint.pprint(prompt.messages)


[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


In [18]:
#Defining String output pharser 
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

Create Chain

In [19]:
#Creating format document 
def format_docs(docs):
    return"\n \n".join(doc.page_content for doc in docs)

In [20]:
#Building the model 
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    |   prompt 
    |   model 
    |   StrOutputParser()
)

In [21]:
#Executing the pipeline 
rag_chain.invoke("What is llama model?")

'Llama is a large language model developed by Meta.  There are different versions, including Llama 1 and Llama 2, with varying parameter sizes.  Llama 2 models are intended for commercial and research use and are available under a custom commercial license.'

In [22]:
pprint.pprint(rag_chain.invoke("What is llama model?"))

('Llama is a large language model developed by Meta.  It has various versions, '
 'including tuned versions for chat and pretrained models adaptable to '
 'different natural language tasks.  Llama 2, a newer version, is intended for '
 'commercial and research use.')
