In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter

import os

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
os.environ["GOOGLE_API_KEY"]="enter your gemini-api-key"

gemini_llm=ChatGoogleGenerativeAI(model="gemini-1.5-flash", max_output_tokens=350, temperature=0)

In [4]:
loader=PyPDFLoader("C:\\Users\\HOME\\Desktop\\GenAI\\Projects\\Chat-with-your-PDF-using-Langchain-RAG\\ml.pdf")

docs=loader.load()

In [5]:
splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
data_splits=splitter.split_documents(docs)

In [6]:
g_embeddings=GoogleGenerativeAIEmbeddings(model="models/embedding-001")

db=Chroma.from_documents(documents=data_splits, embedding=g_embeddings)

db

<langchain_community.vectorstores.chroma.Chroma at 0x1fb458a2510>

In [7]:
# g_embeddings.embed_query("What is Machine Learning?") #Returns vector for this question

[-0.002020119223743677,
 -0.09197726100683212,
 0.013470837846398354,
 0.023921845480799675,
 0.03767101839184761,
 0.006959029007703066,
 0.012941496446728706,
 -0.012158864177763462,
 -0.020664989948272705,
 0.014488447457551956,
 0.0277852825820446,
 0.030256079509854317,
 0.009191661141812801,
 -0.03013002872467041,
 0.013186061754822731,
 -0.06057530269026756,
 -0.01603669673204422,
 0.012678037397563457,
 0.01813325844705105,
 -0.04596344381570816,
 0.005129850935190916,
 0.000629669870249927,
 -0.009937318041920662,
 -0.04723094776272774,
 0.021955957636237144,
 -0.03315555676817894,
 0.04502347111701965,
 -0.07196810096502304,
 -0.01697438582777977,
 0.07038077712059021,
 -0.07031456381082535,
 0.008445795625448227,
 -0.018277738243341446,
 0.02408171445131302,
 0.05798165500164032,
 -0.03836032375693321,
 0.03304428979754448,
 0.03480685502290726,
 0.011865177191793919,
 -0.0003612941945903003,
 -0.03448954224586487,
 -0.010199001990258694,
 -0.023041857406497,
 -0.00625643599

In [8]:
# Designing the ChatPrompt Template
chat_prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}""")

In [9]:
from langchain.chains.combine_documents import create_stuff_documents_chain

combined_chain=create_stuff_documents_chain(gemini_llm, chat_prompt)

combined_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\nAnswer the following question based only on the provided context. \nThink step by step before providing a detailed answer. \nI will tip you $1000 if the user finds the answer helpful. \n<context>\n{context}\n</context>\nQuestion: {input}'))])
| ChatGoogleGenerativeAI(model='models/gemini-1.5-flash', temperature=0.0, max_output_tokens=350, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001FB44F7EA20>, async_client=<google.ai.generativelanguage_v1beta.services.generative_service.async_client.GenerativeServiceAsyncClient object at 0x000001FB4506D850>, default_metadata=())
| StrOutputParser(), config={'run_na

In [10]:
retriever=db.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001FB458A2510>)

In [11]:
from langchain.chains import create_retrieval_chain

retriver_chain=create_retrieval_chain(retriever,combined_chain)

retriver_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001FB458A2510>), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\nAnswer the following question based only on the provided context. \nThink step by step before providing a detailed answer. \nI will tip you $1000 if the user finds the answer helpful. \n<context>\n{context}\n</context>\nQuestion: {input}'))])
            | ChatGoogleGener

In [12]:
response_from_RAG=retriver_chain.invoke({"input":input("Please enter your question: ")})

# print(response_from_RAG)

response_from_RAG['answer']

'Here\'s a breakdown of the answer, based on the provided context:\n\n**1. Identify the Relevant Section:**\n\n* The context focuses on "Supervised Learning" and provides a detailed explanation of its concepts. \n\n**2. Extract Key Information:**\n\n* **Definition:** Supervised learning aims to build models that can make accurate predictions on new, unseen data.\n* **Goal:** The goal is to achieve good "generalization" – the ability of the model to perform well on data it hasn\'t seen during training.\n* **Challenges:** Overfitting and underfitting are potential issues that can hinder generalization.\n* **Importance of Data:** The context emphasizes that having more data is often more beneficial than complex model tuning.\n\n**3. Formulate the Answer:**\n\nSupervised learning is a type of machine learning where the algorithm learns from labeled data. This means the data includes both inputs (features) and corresponding outputs (labels). The goal of supervised learning is to build a mod