# **Gemini Agentic RAG Model**

In [2]:


# --- Installations ---
# Core LLM libraries
!pip install -qqq torch transformers accelerate bitsandbytes
# LangChain core
!pip install -qqq langchain
# LangChain community for HuggingFacePipeline and potentially other local integrations
!pip install -qqq langchain-community
# For local embedding models (sentence-transformers is a common choice)
!pip install -qqq sentence-transformers
# Vector store - ChromaDB
!pip install -qqq chromadb
# Document loading from web
!pip install -qqq beautifulsoup4 # for WebBaseLoader's dependency bs4


!pip install -qqq tiktoken
# For environment variables
!pip install -qqq python-dotenv



# --- Imports ---
import os
from dotenv import load_dotenv

# Modified LangChain imports for local models
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_community.llms import HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceEmbeddings # For local embeddings

from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.prompts import PromptTemplate
from langchain.tools import tool
from langchain.tools.render import render_text_description_and_args
from langchain.agents.output_parsers import JSONAgentOutputParser
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents import AgentExecutor
from langchain.memory import ConversationBufferMemory
from langchain_core.runnables import RunnablePassthrough


print("Libraries installed and imported successfully!")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━



Libraries installed and imported successfully!


In [3]:
#Gemini - LLM Model
# --- Setup and LLM Initialization for Google Gemini API ---

# --- CRITICAL: Comprehensive Installations
# Ensure necessary LangChain components are correctly installed
!pip uninstall -y langchain langchain-community langchain-google-genai langchain-core # Clean previous installs
!pip install -qqq langchain        # Core LangChain
!pip install -qqq langchain-community # For common components
!pip install -qqq langchain-google-genai # For Google Gemini integration
!pip install -qqq python-dotenv

# Other dependencies the tutorial uses later (installing now to avoid future issues)
!pip install -qqq chromadb
!pip install -qqq beautifulsoup4
!pip install -qqq tiktoken

print("Required libraries installed.")

# --- Imports for LLM initialization ---
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI # Import for Gemini LLM
from langchain.prompts import PromptTemplate

# --- Set up your Google API Key ---
load_dotenv(override=True) # Load any .env files



os.environ["GOOGLE_API_KEY"] = "AIzaSyCv-guHHFBdrunOlQ9h5aF2tgm6olm3MdE"

# Verify the API key is loaded
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
    raise ValueError("Google API Key not found. Please ensure it's set in os.environ or your .env file.")

# Define the Gemini model name
GEMINI_MODEL_NAME = "gemini-1.5-flash" # 'gemini-pro' is the general text model

print(f"Google API Key and model name ({GEMINI_MODEL_NAME}) set.")


# Initialize the LLM with Google Gemini
llm = ChatGoogleGenerativeAI(
    model=GEMINI_MODEL_NAME,
    temperature=0,
    max_output_tokens=250,

)

print("LLM (Google Gemini) initialized.")



Found existing installation: langchain 0.3.26
Uninstalling langchain-0.3.26:
  Successfully uninstalled langchain-0.3.26
Found existing installation: langchain-community 0.3.26
Uninstalling langchain-community-0.3.26:
  Successfully uninstalled langchain-community-0.3.26
[0mFound existing installation: langchain-core 0.3.66
Uninstalling langchain-core-0.3.66:
  Successfully uninstalled langchain-core-0.3.66
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-generativeai 0.8.5 requires google-ai-generativelanguage==0.6.15, but you have google-ai-generativelanguage 0.6.18 which is incompatible.[0m[31m
[0mRequired libraries installed.
Google API Key and model name (gemini-1.5-flash) set.
LLM (Google Gemini) initialized.


In [4]:

# Set up a prompt template
template = "Answer the {query} accurately. If you do not know the answer, simply say you do not know."
prompt = PromptTemplate.from_template(template)

print("Prompt template created.")



Prompt template created.


In [5]:

# Set up the chain with our prompt and our LLM
agent = prompt | llm

print("Agent chain created.")



Agent chain created.


In [6]:

#agent.invoke({"query": 'What sport is played at the US Open?'})
query1 = 'What sport is played at the US Open?'
print(f"\nInvoking the agent with query: '{query1}'")
result1 = agent.invoke({"query": query1})
print("Output:", result1.content)


Invoking the agent with query: 'What sport is played at the US Open?'
Output: Tennis


In [7]:
query2 = 'Where was the 2024 US Open Tennis Championship?'
print(f"\nInvoking the agent with query: '{query2}'")
result2 = agent.invoke({"query": query2})
print("Output:", result2.content)



Invoking the agent with query: 'Where was the 2024 US Open Tennis Championship?'
Output: The 2024 US Open Tennis Championship was held at the USTA Billie Jean King National Tennis Center in Flushing Meadows, New York.


In [8]:

query = 'Where was the 2025 US Open Tennis Championship?'
print(f"\nInvoking the agent with query: '{query}'")
result = agent.invoke({"query": query})
print("Output:", result.content) # To get the plain text answer


Invoking the agent with query: 'Where was the 2025 US Open Tennis Championship?'
Output: The 2025 US Open Tennis Championship has not yet been held.  Therefore, I do not know the answer.


In [9]:
# Test the agent with a new query ---
query = 'Who won IPL 2025 CUP?'
print(f"\nInvoking the agent with query: '{query}'")
result = agent.invoke({"query": query})
print("Output:", result.content)


Invoking the agent with query: 'Who won IPL 2025 CUP?'
Output: I do not know.  The IPL 2025 season has not yet been played.


In [12]:
# --- Step 5. Establish the knowledge base and retriever (Part 1 - IPL 2025 Focus) ---

# Define the URLs for content extraction, focusing only on IPL 2025.
urls = [
    'https://www.jagranjosh.com/general-knowledge/ipl-2025-winner-rcb-royal-challengers-bengaluru-captain-coach-and-overall-performance-1748975062-1',
    'https://timesofindia.indiatimes.com/sports/cricket/ipl/top-stories/ipl-winners-list-20082025-final-results-opponents-margins/articleshow/121604424.cms',
    'https://www.hindustantimes.com/cricket/ipl-2025-awards-list-who-won-the-orange-cap-purple-cap-emerging-player-award-check-full-list-of-winners-101749010568299.html',
    'https://en.wikipedia.org/wiki/2025_Indian_Premier_League' # Wikipedia is generally good for static content
]

# Load the documents using LangChain WebBaseLoader for the URLs we listed.
print("Loading documents from IPL 2025 URLs... This may take a few moments.")
docs = [WebBaseLoader(url).load() for url in urls]
print("Documents loaded.")

# Flatten the list of lists into a single list of documents.
docs_list = [item for sublist in docs for item in sublist]

# Print a sample document to verify content.
if docs_list:
    print("\nSample document (first loaded page content) :")
    # Print first 1000 characters and metadata to get a good sense
    print(docs_list[0].page_content[:1000])
    print(docs_list[0].metadata)
else:
    print("No documents were loaded or docs_list is empty after loading.")



Loading documents from IPL 2025 URLs... This may take a few moments.
Documents loaded.

Sample document (first loaded page content) :
IPL 2025 Winner: Royal Challengers Bengaluru (RCB) won the 18th Edition of TATA IPL; Check Overall Team Performance Check MP Board 5th, 8th Re-Exam Result Here Check Results   School+UP BoardBihar BoardMP BoardRJ BoardMH BoardUK BoardHP BoardGJ BoardICSE BoardCollegesJobs+Sarkari NaukriSarkari ResultGKResults+JAC 12th ResultJAC Result 2025RBSE Result 2025CBSE 10th ResultCBSE 12th ResultExamsEducation NewsCurrent AffairsMock TestMore+‡§π‡§ø‡§®‡•ç‡§¶‡•ÄVideosShort VideosWeb StoriesSchoolCollegesJobsGKExamsResultsCurrent AffairsEducation NewsCBSEMock TestWeb Stories‡§π‡§ø‡§®‡•ç‡§¶‡•ÄVideosShort VideosFocusSRMU LucknowViksit BiharSanskriti UniversityJames Dyson AwardHealthcare DegreesMP Board ResultRPF ConstableLongest DayCampus Connect Quick Links School & Boards   College Admission   Govt Jobs Alert & Prep   Current Affairs  GK & Aptitude   Homegeneral kno

In [13]:
# --- Step 5. Establish the knowledge base and retriever (Part 2: Split documents) ---

# Import the necessary splitter
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Initialize the text splitter using from_tiktoken_encoder
# 'cl100k_base' is the encoding used by GPT-4, GPT-3.5-turbo, etc.
# Chunk size and overlap are important parameters for RAG performance.
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, # Smaller chunks can be more precise
    chunk_overlap=0   # No overlap for simplicity here, but overlap can improve context
)

# Apply the splitter to our loaded documents
doc_splits = text_splitter.split_documents(docs_list)

print(f"Number of original documents: {len(docs_list)}")
print(f"Number of chunks after splitting: {len(doc_splits)}")
print("\nSample chunk (first chunk content):")
print(doc_splits[0].page_content[:500]) # Print first 500 characters of the first chunk
print(doc_splits[0].metadata)



Number of original documents: 4
Number of chunks after splitting: 167

Sample chunk (first chunk content):
IPL 2025 Winner: Royal Challengers Bengaluru (RCB) won the 18th Edition of TATA IPL; Check Overall Team Performance Check MP Board 5th, 8th Re-Exam Result Here Check Results   School+UP BoardBihar BoardMP BoardRJ BoardMH BoardUK BoardHP BoardGJ BoardICSE BoardCollegesJobs+Sarkari NaukriSarkari ResultGKResults+JAC 12th ResultJAC Result 2025RBSE Result 2025CBSE 10th ResultCBSE 12th ResultExamsEducation NewsCurrent AffairsMock TestMore+‡§π‡§ø‡§®‡•ç‡§¶‡•ÄVideosShort VideosWeb StoriesSchoolCollegesJo
{'source': 'https://www.jagranjosh.com/general-knowledge/ipl-2025-winner-rcb-royal-challengers-bengaluru-captain-coach-and-overall-performance-1748975062-1', 'title': 'IPL 2025 Winner: Royal Challengers Bengaluru (RCB) won the 18th Edition of TATA IPL; Check Overall Team Performance ', 'description': 'Royal Challengers Bengaluru (RCB) clinched the IPL 2025 title, ending an 11-year wait to 

Part 3: Initialize the Embedding Model
Google's ecosystem (Gemini LLM), the most natural choice for embeddings is Google's GoogleGenerativeAIEmbeddings (which uses models like text-embedding-004 or embedding-001). This also runs via API, so no local RAM issues!



In [16]:
# --- Step 5. Establish the knowledge base and retriever (Part 3: Initialize Embedding Model) ---

# Import the Google Embeddings class
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Initialize the embedding model with the correct model name format
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001" # CORRECTED: Added "models/" prefix
)

print("\nEmbedding model (GoogleGenerativeAIEmbeddings) initialized with correct model name.")



Embedding model (GoogleGenerativeAIEmbeddings) initialized with correct model name.


Part 4: Create the Vector Store (Chroma DB)

In [17]:
# --- Step 5. Establish the knowledge base and retriever (Part 4: Create Vector Store) ---

from langchain_community.vectorstores import Chroma

# Create the Chroma vector store from our document chunks and embeddings
print("Creating Chroma vector store... This may take a moment.")
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="ipl-2025-rag-chroma",
    embedding=embeddings,
)
print("Vector store created and documents embedded.")



Creating Chroma vector store... This may take a moment.
Vector store created and documents embedded.


In [18]:
# --- Step 5. Establish the knowledge base and retriever (Part 5: Set up Retriever) ---

# Set up the retriever from the vector store
retriever = vectorstore.as_retriever()

print("Retriever set up.")



Retriever set up.


In [19]:
# --- Step 6. Define the agent's RAG tool ---

from langchain.tools import tool

# Define the RAG tool for IPL 2025 context
@tool
def get_ipl_2025_context(question: str):
    """Get context about the Indian Premier League (IPL) 2025, including team performance and the winner."""
    context = retriever.invoke(question)
    return context

# List of tools available to the agent
tools = [get_ipl_2025_context]

print("RAG tool 'get_ipl_2025_context' defined and added to tools list.")



RAG tool 'get_ipl_2025_context' defined and added to tools list.


In [None]:
# --- Step 7. Establish the prompt template (Structured Chat Prompt) ---


from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.tools.render import render_text_description_and_args

# Define the system prompt with instructions for JSON output and thought process
# The {tools} and {tool_names} placeholders will be filled later.
system_prompt = """Respond to the human as helpfully and accurately as possible. You have access to the following tools: {tools}

Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).

Valid "action" values: "Final Answer" or {tool_names}

Provide only ONE action per $JSON_BLOB, as shown:"

```json
{{
"action": $TOOL_NAME,
"action_input": $INPUT
}}

In [20]:
# --- Step 7. Define the agent's prompt (COMPLETE BLOCK) ---


from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder
from langchain.tools.render import render_text_description_and_args # Needed to render tool descriptions for the prompt

# This defines the strict format the agent must follow for its thoughts and actions.
# {tools} and {tool_names} will be dynamically inserted by LangChain.
system_prompt = """Respond to the human as helpfully and accurately as possible. You have access to the following tools: {tools}

Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).

Valid "action" values: "Final Answer" or {tool_names}

Provide only ONE action per $JSON_BLOB, as shown:"

```

{{

"action": $TOOL_NAME,

"action_input": $INPUT

}}

```

Follow this format:

Question: input question to answer

Thought: consider previous and subsequent steps

Action:

$JSON_BLOB

```

Observation: action result

... (repeat Thought/Action/Observation N times)

Thought: I know what to respond

Action:

```

{{

"action": "Final Answer",

"action_input": "Final response to human"

}}

Begin! Reminder to ALWAYS respond with a valid json blob of a single action.

Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation"""

print("Agent system prompt format defined.")


Agent system prompt format defined.


In [21]:
human_prompt = """{input}

{agent_scratchpad}

(reminder to always respond in a JSON blob)"""
print("Human prompt format defined.")

Human prompt format defined.


In [22]:
# --- Step 7. Define the agent's prompt (Part 3: Final ChatPromptTemplate Assembly) ---

# Combine the system_prompt, optional chat_history, and human_prompt into the final ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history", optional=True),
        ("human", human_prompt),
    ]
)

print("Final ChatPromptTemplate assembled.")



Final ChatPromptTemplate assembled.


In [23]:
# --- Step 7. Define the agent's prompt (Part 4: Partial Prompt for Tools) ---

# This step "partials" the prompt with the dynamically generated tool descriptions and names.
# `render_text_description_and_args` formats the tool for the prompt's `tools` variable.
# `tool_names` populates the `{tool_names}` variable in the system prompt.
prompt = prompt.partial(
    tools=render_text_description_and_args(list(tools)), # 'tools' is our list from Step 6
    tool_names=", ".join([t.name for t in tools]),
)

print("Prompt template finalized with tool information.")



Prompt template finalized with tool information.


In [24]:
# --- Step 8. Set up the agent's memory and chain (COMPLETE BLOCK) ---

# Imports for this step (ensure they are all present)
from langchain.memory import ConversationBufferMemory
from langchain_core.runnables import RunnablePassthrough
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents.output_parsers import JSONAgentOutputParser
from langchain.agents import AgentExecutor

# 1. Set up the agent's memory
memory = ConversationBufferMemory(
    memory_key="chat_history", # Ensure this matches the MessagesPlaceholder name in your prompt
    return_messages=True      # Store history as a list of message objects
)
print("ConversationBufferMemory initialized.")

# 2. Set up the chain with agent's scratchpad, memory, prompt, and LLM
# This chain defines the flow:
# Input -> Format scratchpad/chat history -> Prompt -> LLM (thinks/acts) -> Parse LLM's JSON output
chain = (
    RunnablePassthrough.assign(
        # `intermediate_steps` contains tuples of (AgentAction, observation)
        # `format_log_to_str` converts these into a string digestible by the LLM
        agent_scratchpad=lambda x: format_log_to_str(x["intermediate_steps"]),
        # `chat_history` comes directly from the memory buffer
        chat_history=lambda x: memory.chat_memory.messages,
    )
    | prompt # Our carefully crafted prompt from Step 7
    | llm    # Our Gemini LLM
    | JSONAgentOutputParser() # Parses the LLM's JSON output into AgentAction or AgentFinish
)
print("Agent chain (RunnablePassthrough | prompt | llm | JSONAgentOutputParser) defined.")

# 3. Initialize the AgentExecutor
# This brings everything together: the agent's core logic (chain), its tools, memory, etc.
agent_executor = AgentExecutor(
    agent=chain,
    tools=tools, # Our `tools` list containing `get_ipl_2025_context`
    handle_parsing_errors=True, # Important for robustness if LLM doesn't follow JSON format exactly
    verbose=True, # Will show the agent's thought process (Thought, Action, Observation)
    memory=memory # Connects the memory to the executor
)
print("AgentExecutor initialized.")



ConversationBufferMemory initialized.
Agent chain (RunnablePassthrough | prompt | llm | JSONAgentOutputParser) defined.
AgentExecutor initialized.


  memory = ConversationBufferMemory(


Step 9. Generate responses with the agentic RAG system

In [25]:
# --- Step 9. Generate responses with the agentic RAG system

print("--- Testing Agent with RAG Tool (IPL 2025 Queries) ---")

# 1. Ask a question that requires the agent to use its RAG tool (IPL 2025 winner)
question_ipl_winner = "Who won the IPL 2025 tournament and where was the final played?"
print(f"\nAsking the agent: '{question_ipl_winner}'")

# Invoke the agent executor with verbose output
response_ipl_winner = agent_executor.invoke({"input": question_ipl_winner})

print("\nAgent's Final Answer for IPL 2025 Winner:")
print(response_ipl_winner["output"])

# 2. Ask a slightly more complex question about IPL 2025 details
question_ipl_details = "Tell me about the key players and their performance for the IPL 2025 winning team."
print(f"\nAsking the agent: '{question_ipl_details}'")

response_ipl_details = agent_executor.invoke({"input": question_ipl_details})

print("\nAgent's Final Answer for IPL 2025 Details:")
print(response_ipl_details["output"])

# 3. Test if the agent can decipher when tool calling is not necessary (general knowledge)
question_general = 'What is the capital of France?'
print(f"\nAsking the agent: '{question_general}'")

response_general = agent_executor.invoke({"input": question_general})

print("\nAgent's Final Answer for General Knowledge:")
print(response_general["output"])

print("\n--- End of Agent Testing ---")

--- Testing Agent with RAG Tool (IPL 2025 Queries) ---

Asking the agent: 'Who won the IPL 2025 tournament and where was the final played?'


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
  "action": "get_ipl_2025_context",
  "action_input": {
    "question": "Who won the IPL 2025 tournament and where was the final played?"
  }
}
```
[0m[36;1m[1;3m[Document(metadata={'title': '2025 Indian Premier League - Wikipedia', 'language': 'en', 'source': 'https://en.wikipedia.org/wiki/2025_Indian_Premier_League'}, page_content='Suspension and rescheduling\nOn 9 May 2025, the remaining matches were suspended due to the 2025 India–Pakistan crisis.[15] The revised schedule was announced on 12 May, with the remaining matches resumed on 17 May and were held across six venues.[16][17] On 20 May the playoff fixtures were confirmed[18] with Qualifier 1 and Eliminator played in Maharaja Yadavindra Singh International Cricket Stadium on 29 and 30 May, and Qualifier 2 and final pl