In [17]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
from langchain_tavily import TavilySearch
from langgraph.prebuilt import create_react_agent

In [18]:
# Load the environment variables
load_dotenv()

True

In [19]:
# Create an llm object using Google class and gemini 2.5 flash model
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.7)

In [29]:
# Make a call to the llm and print the response
resp = llm.invoke("Tell me abour RAG in two sentences")
print(resp.content)

RAG (Retrieval-Augmented Generation) enhances large language models by first retrieving relevant information from an external knowledge base based on the user's query. This retrieved context is then provided to the LLM alongside the original prompt, enabling it to generate more accurate, grounded, and up-to-date responses while reducing the likelihood of hallucinations.


In [30]:
print(type(resp))

<class 'langchain_core.messages.ai.AIMessage'>


In [20]:
# Create a tool object using Tavily Search
tool = TavilySearch(
    max_results = 1,
    topic = "general"
)

In [None]:
# Make a call to the tool and print the response
resp = tool.invoke({"query":"Who won the 100 years war?"})
print(resp["results"][0]["content"])

{'query': 'Who won the 100 years war?', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'url': 'https://en.wikipedia.org/wiki/Hundred_Years%27_War', 'title': "Hundred Years' War - Wikipedia", 'content': "The **Hundred Years' War** (French: *Guerre de Cent Ans*; 1337–1453) was a conflict between the kingdoms of England and France and a civil war in France during the Late Middle Ages. In the early years of the war, the English, led by King Edward III and his son Edward the Black Prince, saw resounding successes, notably at the battles of Crécy (1346) and Poitiers (1356), where King John II of France was taken prisoner.", 'score': 0.8719229, 'raw_content': None}], 'response_time': 0.0, 'request_id': '35362eb5-88a0-4fac-8fd7-e3f980f42f13'}


In [34]:
# Create an AI agent by merging the llm with the tool
agent = create_react_agent(llm, [tool])
user_input = "When did Charlie Kirk Die. Only include wikipedia sources"



In [35]:
resp = agent.invoke({"messages":user_input})
print(resp["messages"][3].content)

{'messages': [HumanMessage(content='When did Charlie Kirk Die. Only include wikipedia sources', additional_kwargs={}, response_metadata={}, id='fc0c03be-8371-4297-88f4-a336b8517ec1'), AIMessage(content='', additional_kwargs={'function_call': {'name': 'tavily_search', 'arguments': '{"include_domains": ["wikipedia.com"], "query": "When did Charlie Kirk die"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--2d93f011-3b13-400a-8abe-6c0e0f5d4270-0', tool_calls=[{'name': 'tavily_search', 'args': {'include_domains': ['wikipedia.com'], 'query': 'When did Charlie Kirk die'}, 'id': 'ceb89602-c4d8-4652-bedc-dee4b17b8d54', 'type': 'tool_call'}], usage_metadata={'input_tokens': 1520, 'output_tokens': 101, 'total_tokens': 1621, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 71}}), ToolMessage(content='{"query": "When did Charlie Kirk die", "f

In [33]:
print(type(resp))

<class 'dict'>


In [22]:
from sentence_transformers import SentenceTransformer

# Load the model for generating sentence embeddings
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [23]:
sentences = ["This is an example sentence", "Each sentence is converted", "apple", "banana", "windows", "android"]

# Convert each sentence to a word embedding
embeddings = model.encode(sentences)

  return forward_call(*args, **kwargs)


In [1]:
from pinecone import Pinecone

# Initialize a Pinecone client
client = Pinecone(api_key="")

In [2]:
# Create a dense index which has integrated embeddings
index_name = "quickstart-py"

# If the index with the same name does not already exist
if not client.has_index(index_name):
    client.create_index_for_model(
        name=index_name,
        cloud="aws",
        region="us-east-1",

        # Create embeddings using the default integrated model and map each text to a chunk text, or vector.
        embed={
            "model":"llama-text-embed-v2",
            "field_map":{"text":"chunk_text"}
        }
    )

In [3]:
records = [
    { "_id": "rec1", "chunk_text": "The Eiffel Tower was completed in 1889 and stands in Paris, France.", "category": "history" },
    { "_id": "rec2", "chunk_text": "Photosynthesis allows plants to convert sunlight into energy.", "category": "science" },
    { "_id": "rec3", "chunk_text": "Albert Einstein developed the theory of relativity.", "category": "science" },
    { "_id": "rec4", "chunk_text": "The mitochondrion is often called the powerhouse of the cell.", "category": "biology" },
    { "_id": "rec5", "chunk_text": "Shakespeare wrote many famous plays, including Hamlet and Macbeth.", "category": "literature" },
    { "_id": "rec6", "chunk_text": "Water boils at 100°C under standard atmospheric pressure.", "category": "physics" },
    { "_id": "rec7", "chunk_text": "The Great Wall of China was built to protect against invasions.", "category": "history" },
    { "_id": "rec8", "chunk_text": "Honey never spoils due to its low moisture content and acidity.", "category": "food science" },
    { "_id": "rec9", "chunk_text": "The speed of light in a vacuum is approximately 299,792 km/s.", "category": "physics" },
    { "_id": "rec10", "chunk_text": "Newton's laws describe the motion of objects.", "category": "physics" },
    { "_id": "rec11", "chunk_text": "The human brain has approximately 86 billion neurons.", "category": "biology" },
    { "_id": "rec12", "chunk_text": "The Amazon Rainforest is one of the most biodiverse places on Earth.", "category": "geography" },
    { "_id": "rec13", "chunk_text": "Black holes have gravitational fields so strong that not even light can escape.", "category": "astronomy" },
    { "_id": "rec14", "chunk_text": "The periodic table organizes elements based on their atomic number.", "category": "chemistry" },
    { "_id": "rec15", "chunk_text": "Leonardo da Vinci painted the Mona Lisa.", "category": "art" },
    { "_id": "rec16", "chunk_text": "The internet revolutionized communication and information sharing.", "category": "technology" },
    { "_id": "rec17", "chunk_text": "The Pyramids of Giza are among the Seven Wonders of the Ancient World.", "category": "history" },
    { "_id": "rec18", "chunk_text": "Dogs have an incredible sense of smell, much stronger than humans.", "category": "biology" },
    { "_id": "rec19", "chunk_text": "The Pacific Ocean is the largest and deepest ocean on Earth.", "category": "geography" },
    { "_id": "rec20", "chunk_text": "Chess is a strategic game that originated in India.", "category": "games" },
    { "_id": "rec21", "chunk_text": "The Statue of Liberty was a gift from France to the United States.", "category": "history" },
    { "_id": "rec22", "chunk_text": "Coffee contains caffeine, a natural stimulant.", "category": "food science" },
    { "_id": "rec23", "chunk_text": "Thomas Edison invented the practical electric light bulb.", "category": "inventions" },
    { "_id": "rec24", "chunk_text": "The moon influences ocean tides due to gravitational pull.", "category": "astronomy" },
    { "_id": "rec25", "chunk_text": "DNA carries genetic information for all living organisms.", "category": "biology" },
    { "_id": "rec26", "chunk_text": "Rome was once the center of a vast empire.", "category": "history" },
    { "_id": "rec27", "chunk_text": "The Wright brothers pioneered human flight in 1903.", "category": "inventions" },
    { "_id": "rec28", "chunk_text": "Bananas are a good source of potassium.", "category": "nutrition" },
    { "_id": "rec29", "chunk_text": "The stock market fluctuates based on supply and demand.", "category": "economics" },
    { "_id": "rec30", "chunk_text": "A compass needle points toward the magnetic north pole.", "category": "navigation" },
    { "_id": "rec31", "chunk_text": "The universe is expanding, according to the Big Bang theory.", "category": "astronomy" },
    { "_id": "rec32", "chunk_text": "Elephants have excellent memory and strong social bonds.", "category": "biology" },
    { "_id": "rec33", "chunk_text": "The violin is a string instrument commonly used in orchestras.", "category": "music" },
    { "_id": "rec34", "chunk_text": "The heart pumps blood throughout the human body.", "category": "biology" },
    { "_id": "rec35", "chunk_text": "Ice cream melts when exposed to heat.", "category": "food science" },
    { "_id": "rec36", "chunk_text": "Solar panels convert sunlight into electricity.", "category": "technology" },
    { "_id": "rec37", "chunk_text": "The French Revolution began in 1789.", "category": "history" },
    { "_id": "rec38", "chunk_text": "The Taj Mahal is a mausoleum built by Emperor Shah Jahan.", "category": "history" },
    { "_id": "rec39", "chunk_text": "Rainbows are caused by light refracting through water droplets.", "category": "physics" },
    { "_id": "rec40", "chunk_text": "Mount Everest is the tallest mountain in the world.", "category": "geography" },
    { "_id": "rec41", "chunk_text": "Octopuses are highly intelligent marine creatures.", "category": "biology" },
    { "_id": "rec42", "chunk_text": "The speed of sound is around 343 meters per second in air.", "category": "physics" },
    { "_id": "rec43", "chunk_text": "Gravity keeps planets in orbit around the sun.", "category": "astronomy" },
    { "_id": "rec44", "chunk_text": "The Mediterranean diet is considered one of the healthiest in the world.", "category": "nutrition" },
    { "_id": "rec45", "chunk_text": "A haiku is a traditional Japanese poem with a 5-7-5 syllable structure.", "category": "literature" },
    { "_id": "rec46", "chunk_text": "The human body is made up of about 60% water.", "category": "biology" },
    { "_id": "rec47", "chunk_text": "The Industrial Revolution transformed manufacturing and transportation.", "category": "history" },
    { "_id": "rec48", "chunk_text": "Vincent van Gogh painted Starry Night.", "category": "art" },
    { "_id": "rec49", "chunk_text": "Airplanes fly due to the principles of lift and aerodynamics.", "category": "physics" },
    { "_id": "rec50", "chunk_text": "Renewable energy sources include wind, solar, and hydroelectric power.", "category": "energy" }
]

In [4]:
# Select an index to store the records in
dense_index = client.Index(index_name)

# Upsert (store) the records in the namespace
dense_index.upsert_records("example-namespace", records)

In [5]:
import time
time.sleep(10)

# View stats for the current index
stats = dense_index.describe_index_stats()

In [6]:
stats

{'dimension': 1024,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'example-namespace': {'vector_count': 50}},
 'total_vector_count': 50,
 'vector_type': 'dense'}

In [14]:
query = "Famous historical structures and monuments"

# Search the dense index for results similar to the queries
results = dense_index.search(
    namespace="example-namespace",  # The namespace to execute the query in
    query={
        "top_k":10,  # Number of results to return
        "inputs":{
            "text": query  # The text being queried
        }
    }
)

for hit in results["result"]["hits"]:
    print(f"id: {hit['_id']:<5} | score: {round(hit['_score'], 2):< 5} | category: {hit['fields']['category']:<10} | text: {hit['fields']['chunk_text']:<50}")
    

id: rec17 | score:  0.28 | category: history    | text: The Pyramids of Giza are among the Seven Wonders of the Ancient World.
id: rec38 | score:  0.19 | category: history    | text: The Taj Mahal is a mausoleum built by Emperor Shah Jahan.
id: rec5  | score:  0.18 | category: literature | text: Shakespeare wrote many famous plays, including Hamlet and Macbeth.
id: rec15 | score:  0.1  | category: art        | text: Leonardo da Vinci painted the Mona Lisa.          
id: rec50 | score:  0.09 | category: energy     | text: Renewable energy sources include wind, solar, and hydroelectric power.
id: rec1  | score:  0.08 | category: history    | text: The Eiffel Tower was completed in 1889 and stands in Paris, France.
id: rec26 | score:  0.08 | category: history    | text: Rome was once the center of a vast empire.        
id: rec7  | score:  0.08 | category: history    | text: The Great Wall of China was built to protect against invasions.
id: rec47 | score:  0.07 | category: history    | t

In [15]:
# Search the dense index for results similar to the queries but re-rank them according to relevance
results = dense_index.search(
    namespace="example-namespace",  # The namespace to execute the query in
    query={
        "top_k":10,  # Number of results to return
        "inputs":{
            "text": query  # The text being queried
        }
    },
    rerank={
        "model":"bge-reranker-v2-m3",  # Model to use
        "top_n":10,  # Number of results to rerank
        "rank_fields":["chunk_text"]  # Field to rerank
    }
)

for hit in results["result"]["hits"]:
    print(f"id: {hit['_id']:<5} | score: {round(hit['_score'], 2):< 5} | category: {hit['fields']['category']:<10} | text: {hit['fields']['chunk_text']:<50}")



id: rec1  | score:  0.11 | category: history    | text: The Eiffel Tower was completed in 1889 and stands in Paris, France.
id: rec38 | score:  0.06 | category: history    | text: The Taj Mahal is a mausoleum built by Emperor Shah Jahan.
id: rec7  | score:  0.06 | category: history    | text: The Great Wall of China was built to protect against invasions.
id: rec21 | score:  0.02 | category: history    | text: The Statue of Liberty was a gift from France to the United States.
id: rec17 | score:  0.02 | category: history    | text: The Pyramids of Giza are among the Seven Wonders of the Ancient World.
id: rec26 | score:  0.01 | category: history    | text: Rome was once the center of a vast empire.        
id: rec15 | score:  0.01 | category: art        | text: Leonardo da Vinci painted the Mona Lisa.          
id: rec5  | score:  0.0  | category: literature | text: Shakespeare wrote many famous plays, including Hamlet and Macbeth.
id: rec47 | score:  0.0  | category: history    | text:

In [16]:
# Delete the index
client.delete_index(index_name)

In [28]:


# Define a custom tool to be used by the agent
def get_weather(city:str) -> str:
    """Get Weather for a given city"""
    return f"It's always sunny in {city}!"

# Create the agent using the model and the tool
agent = create_react_agent(
    model=llm,
    tools=[get_weather]
)

# Make a call to the llm and print the response
resp = agent.invoke(
    {"messages":[{"role":"user","content":"what is the weather in sf?"}]}
)

print(resp)

{'messages': [HumanMessage(content='what is the weather in sf?', additional_kwargs={}, response_metadata={}, id='8401aa35-06ef-4232-ae47-99dbdf043ec2'), AIMessage(content='', additional_kwargs={'function_call': {'name': 'get_weather', 'arguments': '{"city": "San Francisco"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--0abb3de3-e59d-44b3-8f40-fcce2cc1a4b0-0', tool_calls=[{'name': 'get_weather', 'args': {'city': 'San Francisco'}, 'id': '6b4921d2-53c2-449b-9404-2886d094d9ef', 'type': 'tool_call'}], usage_metadata={'input_tokens': 46, 'output_tokens': 81, 'total_tokens': 127, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 65}}), ToolMessage(content="It's always sunny in San Francisco!", name='get_weather', id='9a8839a6-6ae8-4594-b273-6175fd9a8e85', tool_call_id='6b4921d2-53c2-449b-9404-2886d094d9ef'), AIMessage(content="It's alw

In [3]:
from pypdf import PdfReader

# Create a pdf reader object
reader = PdfReader("C:\\Users\\Zafar\\Downloads\\AIML_Task_1[1].pdf")

# Print number of pages in pdf file
print(len(reader.pages))

4


In [4]:
# Create a page object
page = reader.pages[0]

# Extract text from the page
print(page.extract_text())

SPADES AI/ML – Project 1
Building a LangChain-based AI Agent using Gemini API
Objective
The purpose of this task is to help you gain hands-on familiarity with:
•Setting up and using theGemini API (Free Tier)for LLM access.
•Building a modularAI Agentusing theLangChainframework.
•Integrating tools that extend model capabilities:
1.Tavily Search Tool– for real-time web retrieval.
2.Retrieval-Augmented Generation (RAG) Tool– using embeddings from
HuggingFaceand vector storage viaPinecone.
•Creating a simple and functionalGradio frontendfor user interaction.
1. High-Level Description
Your task is to design an intelligent chatbot that:
•UsesLangChainas its orchestration framework.
•EmploysGemini(Google’s multimodal LLM) as the reasoning backbone.
•Incorporates two tools:
1. TheTavily Search Toolto answer questions requiring external, real-time
information.
2. ARAG pipelinethat embeds and retrieves context using HuggingFace mod-
els and Pinecone.
•Presents responses interactively through aGr

In [10]:
from pinecone import Pinecone, ServerlessSpec

# Initialize a Pinecone client
client = Pinecone(api_key="")

In [3]:
from sentence_transformers import SentenceTransformer

# Load the model for generating sentence embeddings
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [8]:
sentences = ["This is an example sentence", "Each sentence is converted", "apple", "banana", "windows", "android"]

# Convert each sentence to a word embedding
embeddings = model.encode(sentences)


384


In [12]:
# Create a dense index with seperate embeddings
index_name = "storage-py"

# If the index does not already exist
if not client.has_index(index_name):
    client.create_index(
        name = index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1",
        ),
    )

In [64]:
records = []
for i in range(len(embeddings)):
    records.append({"id":"vec"+str(i+1),"values":embeddings[i].tolist(), "metadata":{"sentence":sentences[i]}})


In [65]:
# Target the index
dense_index = client.Index(index_name)


# Upsert the records into a namespace
dense_index.upsert(namespace="example-namespace", vectors=records)

{'upserted_count': 6}

In [66]:
stats = dense_index.describe_index_stats()
print(stats)

{'dimension': 384,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'example-namespace': {'vector_count': 6}},
 'total_vector_count': 6,
 'vector_type': 'dense'}


In [71]:
query = model.encode("orange").tolist()
results = dense_index.query(
    namespace="example-namespace",
    vector = query,
    top_k=3,
    include_values=True,
    include_metadata=True
)

  return forward_call(*args, **kwargs)


In [74]:
for hit in results["matches"]:
    print("Sentence:", hit["metadata"]["sentence"], " | ", "Score: ", hit["score"])

Sentence: banana  |  Score:  0.518210411
Sentence: apple  |  Score:  0.372343063
Sentence: windows  |  Score:  0.353139877
