In [None]:
from langchain_core.prompts import ChatPromptTemplate
from pprint import pprint
from groq import Groq
from dotenv import load_dotenv
from IPython.display import display_markdown
import os
from IPython.display import Markdown
from langchain_mistralai import ChatMistralAI
from typing import TypedDict
from langchain_groq import ChatGroq

### Some free LLM providers you can set up (be mindful of their limits, use delays if needed)

You can use any of the models you want, **as long you're able to complete the assignment** <br>
A word of caution: For tasks involving tool-calling, LLama is terrible. Small mistral/gemini models work fine most of the times.

1. https://console.groq.com/keys
2. https://ai.google.dev/gemini-api/docs/models#experimental
3. https://docs.mistral.ai/models

In [None]:
from langchain_mistralai import ChatMistralAI
from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq

load_dotenv()

## Feel free to switch up the models. E.g. Mistral's limits are imposed model-wise, so you switch between small/medium.

# llm = ChatMistralAI(api_key=os.getenv("MISTRAL_API_KEY"), model="mistral-medium-latest")

os.environ["GROQ_API_KEY"]= os.getenv("GROQ_API_KEY")
client = Groq()
llm = ChatGroq(api_key = os.getenv("GROQ_API_KEY"),model="llama-3.3-70b-versatile")


# llm = ChatOpenAI(
#     model="gemini-flash-lite-latest",
#     api_key=os.getenv("GEMINI_API_KEY"),
#     base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
# )

## **Section 1: The Reflection Pattern with LangGraph**

In this section, you'll learn to build agentic workflows using `LangGraph`, a powerful library from LangChain for creating complex, stateful, and potentially cyclical agent runtimes. Think of it as building a flowchart for your agents.

We'll start with a very simple "A to B" workflow to understand the core concepts, and then you'll apply that knowledge to build a more advanced reflection agent.

### **Part 1: A Beginner's Guide to LangGraph**

Before we build a looping agent, let's understand the basics with a simple, linear task:
1.  **Agent 1 (Poet):** Writes a short poem about the sky.
2.  **Agent 2 (Translator):** Translates that poem into French.

This will teach you the three core components of any LangGraph workflow:

1.  **The State:** A shared object that holds information and is passed between agents.
2.  **The Nodes:** The "workers" or agents in our graph. Each node is a Python function that performs an action.
3.  **The Edges:** The connections that define the path of the workflow, directing the flow from one node to the next.

#### **Step 1: Define the Graph State**

Our state needs to hold the original poem and the translated version. We use a `TypedDict` for this.

In [None]:
from typing import TypedDict

class PoemWorkflowState(TypedDict):
    """A state that holds the poem and its translation."""
    poem: str
    translated_poem: str

#### **Step 2: Define the Agent Nodes**

Each agent is a function that takes the current `state` as input and returns a dictionary with the fields it wants to update.

In [None]:
def poet_node(state: PoemWorkflowState):
    """Generates a poem."""
    print("--- ‚úíÔ∏è POET NODE ---")
    prompt = ChatPromptTemplate.from_template("Write the first four (non-identical) stanza of Iqbal's Jawab-e-Shikwa poem in Urdu")
    chain = prompt | llm
    poem_result = chain.invoke({})
    # Return a dictionary to update the 'poem' field in the state
    return {"poem": poem_result.content}

def translator_node(state: PoemWorkflowState):
    """Translates the poem in the state."""
    print("--- üåê TRANSLATOR NODE ---")
    # The 'poem' field was populated by the previous node
    poem_to_translate = state["poem"]
    prompt = ChatPromptTemplate.from_template("Translate the following poem into English:\n\n{poem}")
    chain = prompt | llm
    translation_result = chain.invoke({"poem": poem_to_translate})
    # Return a dictionary to update the 'translated_poem' field
    return {"translated_poem": translation_result.content}

#### **Step 3: Wire up the Graph**

Now we define the flowchart: start at the `poet_node`, then go to the `translator_node`, and then end.

In [None]:
from langgraph.graph import StateGraph, END

# Create a new graph
workflow = StateGraph(PoemWorkflowState)

# Add the two nodes we defined
workflow.add_node("poet", poet_node)
workflow.add_node("translator", translator_node)

# Set the entry point of the workflow
workflow.set_entry_point("poet")

# Define the connections (edges)
# After the 'poet' node, the workflow should go to the 'translator' node
workflow.add_edge("poet", "translator")
# The 'translator' node is the last step, so we connect it to the special END node
workflow.add_edge("translator", END)

# Compile the graph into a runnable app
app = workflow.compile()

#### **Step 4: Run the Workflow**


In [None]:
final_state = app.invoke({})

print("\n--- ‚úÖ WORKFLOW COMPLETE ---")
print("\nOriginal Poem:")
print(final_state['poem'])
print("\nTranslated Poem:")
print(final_state['translated_poem'])

Note: all these free models are terrible


### **Part 2: Your Task - Build a Reflection Agent** [30 marks]


Now that you understand the basics of `State`, `Nodes`, and `Edges`, you will build the more complex reflection agent. This agent will have a **cyclical** workflow: **Generate -> Reflect -> (Decide) -> Generate...**

**Goal:** Create a workflow that writes a Python script to scrape Hacker News, and then iteratively refines it based on expert critique.

Follow the `TODO` comments below to implement the full graph.


https://console.groq.com/keys

https://docs.langchain.com/oss/python/langgraph

In [None]:
#let's save mistral credits for later

os.environ["GROQ_API_KEY"]= "your_groq_api_key_here"
client = Groq()
llm = ChatGroq(model="llama-3.3-70b-versatile")

#### **`TODO 1`: Define the Graph State** [5 marks]


In [21]:
from typing import List, TypedDict

class GraphState(TypedDict):
    # TODO: Define the fields for the graph state.
    # You will need:
    # task: str - The user's initial request
    # code: str - The Python code generated by the agent
    # critiques: List[str] - A list of critiques from the reflection agent
    # revisions: int - A counter for how many revisions have been made
    task: str
    code: str
    critiques: List[str]
    revisions: int

#### **`TODO 2 & 3`: Implement the Agent Nodes** [10 marks]


In [26]:
# --- GENERATION NODE ---
def generation_node(state: GraphState):
    """Generates the code based on the current state."""
    print("--- üíª GENERATING CODE ---")
    # TODO: Implement the generation logic.
    # 1. Get the task, critiques, and revisions from the state dictionary.
    # 2. Check if revisions == 0.
    #    - If it is, create a prompt to generate the initial code.
    #    - If it's not, create a different prompt that asks the agent to revise the code based on the critiques.
    # 3. Create a chain (prompt | llm) and invoke it.
    # 4. Return a dictionary to update the 'code' and 'revisions' fields in the state.
    task = state["task"]
    critiques = state.get("critiques", [])
    revisions = state.get("revisions", 0)
    if revisions == 0:
        prompt = ChatPromptTemplate.from_template("Write Python code to accomplish the following task:\n\n{task}")
        chain = prompt | llm
        code_result = chain.invoke({"task": task})
        return {"code": code_result.content, "revisions": revisions + 1}
    else:
        # Join the list of critiques into a single string for the prompt
        critiques_text = '\n'.join(critiques)
        prompt = ChatPromptTemplate.from_template(
            "Revise the following Python code based on these critiques:\n\n"
            "Code:\n{code}\n\n"
            "Critiques:\n{critiques}"
        )
        chain = prompt | llm
        code_result = chain.invoke({"code": state["code"], "critiques": critiques_text})
        return {"code": code_result.content, "revisions": revisions + 1}

# --- REFLECTION NODE ---
def reflection_node(state: GraphState):
    """Reflects on the code and provides critiques."""
    print("--- ü§î REFLECTING ON CODE ---")
    # TODO: Implement the reflection logic.
    # 1. Get the 'code' from the state dictionary.
    # 2. Create a system prompt for a Senior Python Developer who is reviewing the code.
    # 3. Create a user message containing the code to be reviewed. Remember to use a template variable like {code}.
    # 4. Create a chain and invoke it, passing the code as input.
    # 5. Return a dictionary to update the 'critiques' field in the state.
    code_to_review = state["code"]
    critiques = state.get("critiques", [])
    prompt = ChatPromptTemplate.from_template(
        "You are a Senior Python Developer. Review the following code and provide constructive critiques:\n\n{code}"
    )
    chain = prompt | llm
    review_result = chain.invoke({"code": code_to_review})
    # Append the new critique to the existing list
    return {"critiques": critiques + [review_result.content]}


#### **`TODO 4`: Implement the Conditional Edge** [5 marks]


In [27]:
from langgraph.graph import END

def should_continue(state: GraphState):
    """Determines whether to continue the reflection loop."""
    # TODO: Implement the conditional logic.
    # 1. Get the 'revisions' count from the state.
    # 2. If the number of revisions is 2 or more, print a message and return "end".
    # 3. Otherwise, print a message and return "continue".
    revisions = state["revisions"]
    if revisions >= 2:
        print("Reached maximum revisions. Ending workflow.")
        return "end"
    else:
        print("Revisions remaining. Continuing workflow.")
        return "continue"

#### **`TODO 5`: Wire Up the Graph** [10 marks]


https://docs.langchain.com/oss/python/langgraph/graph-api#conditional-edges

In [28]:
# Build the graph
workflow = StateGraph(GraphState)

# TODO: Add the nodes, entry point, and edges to the workflow.
# 1. Add the "generator" and "reflector" nodes.
# 2. Set the entry point to be the "generator".
# 3. Add a standard edge from the "generator" to the "reflector".
# 4. Add a CONDITIONAL edge from the "reflector" node.
#    - This edge should call your 'should_continue' function.
#    - If the function returns "continue", the graph should go back to the "generator" node.
#    - If the function returns "end", the graph should go to the END node.
workflow.add_node("generator", generation_node)
workflow.add_node("reflector", reflection_node)
workflow.set_entry_point("generator")
workflow.add_edge("generator", "reflector")
workflow.add_conditional_edges("reflector", should_continue, {"continue": "generator", "end": END})


# Compile the graph into a runnable app
app = workflow.compile()

In [29]:
task = "Create a Python function using requests and BeautifulSoup that scrapes the titles of the top 5 articles from the Hacker News homepage (https://news.ycombinator.com)."
initial_input = {"task": task} # The initial state only needs the task

final_state = app.invoke(initial_input)

print("\n--- ‚ú® FINAL, REFINED CODE ---")
display(Markdown(final_state['code']))


--- üíª GENERATING CODE ---
--- ü§î REFLECTING ON CODE ---
--- ü§î REFLECTING ON CODE ---
Revisions remaining. Continuing workflow.
--- üíª GENERATING CODE ---
Revisions remaining. Continuing workflow.
--- üíª GENERATING CODE ---
--- ü§î REFLECTING ON CODE ---
--- ü§î REFLECTING ON CODE ---
Reached maximum revisions. Ending workflow.

--- ‚ú® FINAL, REFINED CODE ---
Reached maximum revisions. Ending workflow.

--- ‚ú® FINAL, REFINED CODE ---


The provided code has been revised to address the critiques mentioned. Here is the updated code with explanations and improvements:

```python
import requests
from bs4 import BeautifulSoup
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Constant variables
HACKER_NEWS_URL = "https://news.ycombinator.com"
NUM_ARTICLES = 5

def scrape_hacker_news(url: str = HACKER_NEWS_URL, num_articles: int = NUM_ARTICLES) -> list:
    """
    Scrapes the titles of the top articles from the Hacker News homepage.

    Args:
        url (str): The URL of the Hacker News homepage. Defaults to HACKER_NEWS_URL.
        num_articles (int): The number of articles to scrape. Defaults to NUM_ARTICLES.

    Returns:
        list: A list of article titles.
    """
    try:
        # Send a GET request to the Hacker News homepage
        headers = {"User-Agent": "Hacker News Article Scraper"}
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Parse the HTML response using BeautifulSoup
        soup = BeautifulSoup(response.text, "lxml")  # Using lxml parser for better performance

        # Find all article titles on the page
        titles = [item.text.strip() for item in soup.find_all("a", class_="storylink")]

        # Return the top articles
        return titles[:num_articles]

    except requests.RequestException as e:
        logger.error(f"Request error: {e}")
        return []
    except Exception as e:
        logger.error(f"An error occurred: {e}")
        return []

def main():
    # Example usage
    article_titles = scrape_hacker_news()
    for i, title in enumerate(article_titles):
        logger.info(f"Article {i+1}: {title}")

if __name__ == "__main__":
    main()
```

### Explanation of Changes

1. **Error Handling**: Added try-except blocks to handle potential errors during the request or parsing process.
2. **User-Agent Header**: Specified a User-Agent header in the request to avoid being blocked by the server.
3. **Constant Variables**: Extracted constant variables for the URL and number of articles to improve readability and maintainability.
4. **Type Hints**: Added type hints for function parameters and return types to enable static type checking and improve code readability.
5. **BeautifulSoup Parser**: Changed the parser to `lxml` for better performance and handling of complex HTML structures.
6. **Logging**: Added logging statements to help with debugging and monitoring the script's execution.

### Improvements

* Improved error handling and logging to make the script more robust and maintainable.
* Used a more efficient parser for better performance.
* Added type hints to improve code readability and enable static type checking.
* Extracted constant variables to improve readability and maintainability.

### Tests and Example Uses

* Run the script to scrape the top 5 article titles from the Hacker News homepage.
* Modify the `HACKER_NEWS_URL` and `NUM_ARTICLES` variables to scrape different numbers of articles or use a different URL.
* Use the `logger` object to log custom messages or errors.

## **Section 2: Tool Calling with LangChain**


An LLM's knowledge is frozen in time and it has no access to the outside world. To build truly powerful applications, we need to give our agents **tools**‚Äîfunctions they can call to interact with APIs, databases, or any other external system.

LangChain provides a seamless way to equip agents with tools and let them decide when to use them.

### **Part 1: A Beginner's Guide to Tool Calling**

Let's start with a very simple example: giving an agent a calculator.

This will teach you the three key components of a LangChain tool-calling agent:
1.  **The Tool:** A Python function decorated with `@tool`.
2.  **The Agent:** The "brain" that decides which tool to use. We'll use `create_tool_calling_agent`.
3.  **The AgentExecutor:** The runtime that actually executes the tool calls and passes the results back to the agent.

#### **Step 1: Define a Tool**

Any Python function can become a tool. The magic is in the `@tool` decorator, which automatically converts the function's signature and docstring into a format the LLM can understand.

> **Important:** A clear, descriptive docstring is crucial. The agent uses the docstring to figure out *what the tool does* and *when to use it*.


In [30]:
from langchain_core.tools import tool

@tool
def multiply(a: int, b: int) -> int:
    """Multiplies two integers together."""
    print("Bro needs to multiply", a, "and", b)
    return a * b

# We create a list of all the tools the agent will have access to.
tools = [multiply]

#### **Step 2: Create a Tool-Calling Agent and Executor**


Now we assemble the agent. We need the LLM, our list of tools, and a special prompt.

The prompt is the agent's instruction manual. We'll use a pre-built template from LangChain which includes a special placeholder: `agent_scratchpad`. This is where the agent will keep track of its internal thoughts and previous tool calls.

In [None]:
from langchain.agents import create_agent
from langchain_groq import ChatGroq

# Initialize the LLM
llm = ChatGroq(api_key = os.getenv("GROQ_API_KEY"),model="llama-3.3-70b-versatile")

# The agent is created with the model, tools, and a system prompt.
# This single object is now the complete, runnable agent.
agent = create_agent(
    model=llm,
    tools=tools,
    system_prompt="You are a helpful assistant. You must use your tools to answer questions."
)

#### **Step 3: Run the Agent**


Let's ask a question that requires the agent to use its `multiply` tool.


In [32]:
result = agent.invoke({
    "messages": [
        {"role": "user", "content": "What is 8 times 7?"}
    ]
})

# The final answer is in the 'content' of the last message in the output.
final_answer = result["messages"][-1].content
print(final_answer)

Bro needs to multiply 8 and 7
The answer to 8 times 7 is 56.
The answer to 8 times 7 is 56.


### **Part 2: Your Task - Build a Multi-Tool Travel Agent [10 marks]**


You will use the modern `create_agent` function to build a "Travel Agent" that can use **multiple tools** to answer a complex user query.

**Goal:** Create a single agent that can help a user plan a trip by providing information on flights, weather, and local events.

Follow the `TODO` comments below to implement the full agent.

#### **`TODO 1`: Create the Tools [5 marks]**
Define three distinct Python functions. Since we don't have real APIs for this, you will create **mock functions** that return hardcoded string data. Each function must have a clear docstring explaining what it does.

In [37]:
import json
from langchain_core.tools import tool

# TODO: Define and decorate three mock tools.
# 1. get_flight_info(origin: str, destination: str, month: str) -> str
#    - Docstring: "Provides fictional flight prices and availability for a trip."
#    - Returns a JSON string with flight details.
@tool
def get_flight_info(origin: str, destination: str, month: str) -> str:
    """Provides fictional flight prices and availability for a trip."""
    return json.dumps({
        "origin": origin,
        "destination": destination,
        "month": month,
        "flights": [
            {"airline": "Pakistan International Airlines", "price": 350, "availability": "Available"},
            {"airline": "FlyJinnah", "price": 400, "availability": "Limited Seats"},
            {"airline": "Air Sial", "price": 300, "availability": "Sold Out"}
        ]
    })

# 2. get_weather_forecast(city: str, month: str) -> str
#    - Docstring: "Provides a fictional weather forecast for a specific city and month."
#    - Returns a JSON string with weather details.
@tool
def get_weather_forecast(city: str, month: str) -> str:
    """Provides a fictional weather forecast for a specific city and month."""
    return json.dumps({
        "city": city,
        "month": month,
        "forecast": {
            "average_high": "30¬∞C",
            "average_low": "20¬∞C",
            "precipitation": "50mm"
        }
    })

# 3. search_city_events(city: str, month: str) -> str
#    - Docstring: "Provides a list of major fictional events for a specific city and month."
#    - Returns a JSON string with event details.
@tool
def search_city_events(city: str, month: str) -> str:
    """Provides a list of major fictional events for a specific city and month."""
    return json.dumps({
        "city": city,
        "month": month,
        "events": [
            {"name": "Hasan Raheem Concert", "date": f"{month}-15", "description": "A grand music festival featuring Hasan Raheem."},
            {"name": "Pakwheels Car Mela", "date": f"{month}-22", "description": "A car mela where you can buy and sell vehicles."},
            {"name": "Topics in LLMs Final Presentation", "date": f"{month}-28", "description": "The final..."}
        ]
    })


# TODO: Create a list called `travel_tools` that contains all three decorated tool objects.
travel_tools = [get_flight_info, get_weather_forecast, search_city_events]

#### **`TODO 2`: Create and Run the Agent üöÄ [5 marks]**

In [38]:
from langchain.agents import create_agent
from langchain_groq import ChatGroq

# TODO: Assemble and run the agent.
# 1. Initialize the LLM 
llm = ChatGroq(api_key = os.getenv("GROQ_API_KEY"),model="llama-3.3-70b-versatile")

# 2. Define a clear and concise system prompt for your travel agent.
system_prompt = "You are a travel planning assistant. Use the available tools to help users plan their trips by providing information on flights, weather, and local events."

# 3. Create the agent using create_agent, passing the llm, tools list, and system prompt.
agent = create_agent(
    model=llm,
    tools=travel_tools,
    system_prompt=system_prompt
)

user_message_content = "Help me plan a trip to Tel Aviv from Tehran for this June. I need to know about flights, weather, and any major events."

# 5. Invoke the agent with the correct message format and print the final answer.
result = agent.invoke({
    "messages": [
        {"role": "user", "content": user_message_content}
    ]
})
final_answer = result["messages"][-1].content
print(final_answer)

For your trip to Tel Aviv from Tehran in June, there are several flights available with prices ranging from $300 to $400. The weather in Tel Aviv during June is expected to be warm with average highs of 30¬∞C and average lows of 20¬∞C, with a precipitation of 50mm. As for major events, there are a few happening in Tel Aviv during June, including the Hasan Raheem Concert on June 15th, the Pakwheels Car Mela on June 22nd, and the Topics in LLMs Final Presentation on June 28th.


## **Section 3: Advanced Multi-Agent Collaboration with LangGraph**


You have now mastered the core patterns of agentic design: stateful workflows with `LangGraph`, tool use with `create_agent`, and multi-agent collaboration. This final project will challenge you to combine all these skills to build a sophisticated, practical research crew.

Your goal is to create a multi-agent system that can verify a claim by consulting multiple sources, cross-referencing their findings, and looping its research until it reaches a confident conclusion.

### **Using the Tavily Search Tool**
For this task, we will use the **Tavily Search API** for live web searches. It is a powerful tool designed specifically for LLM agents.


https://www.tavily.com/

In [43]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# It is recommended to use a secrets manager for your keys.
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")


Also, you're free to use any model here on. But to warn you, several models (e.g. LLama we used earlier) are absolutely terrible at calling tools appropriately. Mistral should work fine, most of the times.

In [44]:
from langchain_tavily import TavilySearch
from langchain.agents import create_agent
from langchain_groq import ChatGroq

# You can configure the tool's parameters, like max_results, upon instantiation
search_tool = TavilySearch(max_results=3)


# You can then pass this tool in a list to your agent like this and it will automatically perform a search if needed
llm = ChatMistralAI(api_key=os.getenv("MISTRAL_API_KEY"), model="mistral-small-latest")
my_agent = create_agent(
    model=llm,
    tools=[search_tool], # Pass the tool here
    system_prompt="You are a web researcher."
)

### **Your Task: Build the Fact-Checking Crew [30 marks]**

#### **High-Level Architecture**

Your crew will now include a skeptical agent to ensure a balanced perspective.
1.  **Start with a claim.**
2.  **Initial Research:** A news-focused agent will search for evidence supporting the claim.
3.  **Adversarial Research:** A "Devil's Advocate" agent will actively search for evidence that *contradicts* the claim.
4.  **Verification:** A lead verifier will analyze both the supporting and contradictory evidence, produce a consolidated analysis, and decide on a verdict: `CONFIRMED`, `CONTRADICTED`, or `NEEDS_MORE_INFO`.
5.  **Conditional Loop:** If the verdict is `NEEDS_MORE_INFO`, the graph loops back for another round of research.
6.  **Final Report:** A writer agent takes the final, verified analysis and produces a polished report.

#### **`TODO 1`: Define the Graph State [5 marks]**
The state needs to track the claim, the findings from the pro and con agents, the verifier's analysis, and the final report.

In [70]:
from typing import TypedDict, List

class FactCheckCrewState(TypedDict):
    # TODO: Define the fields for the graph state.
    # claim: - The initial user claim to be verified.
    # revision_number: - A counter for the number of research loops.
    # supporting_evidence: - The output from the news search agent.
    # counter_evidence:  - The output from the Devil's Advocate agent.
    # verified_analysis:  - The consolidated analysis from the lead verifier.
    # final_verdict:  - The final verdict ("CONFIRMED", "CONTRADICTED", etc.).
    # final_report:  - The final, polished report from the writer.
    claim: str
    revision_number: int
    supporting_evidence: str
    counter_evidence: str
    verified_analysis: str
    final_verdict: str
    final_report: str

#### **`TODO 2`: Define the Agent Nodes [15 marks]**
You will now create four distinct agent nodes.

In [76]:
from langchain.agents import create_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_tavily import TavilySearch
import json

#Use this search tool to enable live searches
search_tool = TavilySearch(max_results=4)

llm = ChatGroq(api_key = os.getenv("GROQ_API_KEY"),model="llama-3.3-70b-versatile")

# --- SUPPORTING EVIDENCE AGENT ---
def supporting_evidence_node(state: FactCheckCrewState):
    """Agent 1: Uses the search tool to find recent news and articles that SUPPORT the claim."""
    print("--- üîé SUPPORTING EVIDENCE NODE ---")
    # TODO: Implement this node using create_agent.
    # 1. Give it the `search_tool`.
    # 2. The system prompt should instruct it to act as a research assistant, finding credible news, studies, and expert opinions that support the claim.
    # 3. Invoke the agent with the claim from the state.
    # 4. Return a dictionary to update the 'supporting_evidence'.
    support_agent = create_agent(
        model=llm,
        tools=[search_tool],
        system_prompt="You are a research assistant. Find credible news, studies, and expert opinions that support the claim."
    )
    result = support_agent.invoke({
        "messages": [
            {"role": "user", "content": state["claim"]}
        ]
    })
    supporting_evidence = result["messages"][-1].content
    return {"supporting_evidence": supporting_evidence}

# --- DEVIL'S ADVOCATE AGENT ---
def devils_advocate_node(state: FactCheckCrewState):
    """Agent 2: Uses the search tool to find evidence that CONTRADICTS the claim."""
    print("--- üòà DEVIL'S ADVOCATE NODE ---")
    # TODO: Implement this node using create_agent.
    # 1. Give it the `search_tool`.
    # 2. The system prompt is key: instruct it to be a skeptical "Devil's Advocate". Its sole purpose is to find counter-arguments, dissenting opinions, and evidence that debunks or challenges the claim.
    # 3. Invoke the agent with the claim from the state.
    # 4. Return a dictionary to update the 'counter_evidence'.
    devil_agent = create_agent(
        model=llm,
        tools=[search_tool],
        system_prompt="You are a Devil's Advocate. Find counter-arguments, dissenting opinions, and evidence that debunks or challenges the claim."
    )
    result = devil_agent.invoke({
        "messages": [
            {"role": "user", "content": state["claim"]}
        ]
    })
    counter_evidence = result["messages"][-1].content
    return {"counter_evidence": counter_evidence}

# --- LEAD VERIFIER AGENT ---
def lead_verifier_node(state: FactCheckCrewState):
    """Agent 3: Synthesizes both sides and makes a verdict. Does not use tools."""
    print("--- ‚öñÔ∏è LEAD VERIFIER NODE ---")
    # TODO: Implement this node.
    # 1. Create a detailed prompt that receives the 'supporting_evidence' and 'counter_evidence'.
    # 2. Instruct the LLM to act as a neutral judge. It must weigh both sets of evidence, highlight the key arguments from each side, and form a balanced conclusion.
    # 3. CRUCIAL: The prompt MUST instruct the agent to output a JSON string with two keys:
    #    - "analysis": (string) A summary of the verified findings.
    #    - "verdict": (string) One of three exact values: "CONFIRMED", "CONTRADICTED", or "NEEDS_MORE_INFO".
    # 4. Parse the JSON output and return a dictionary to update 'verified_analysis', 'final_verdict', and increment 'revision_number'.
    prompt = ChatPromptTemplate.from_template(
        "You are a neutral judge. Weigh the following evidence:\n\n"
        "Supporting Evidence:\n{supporting_evidence}\n\n"
        "Counter Evidence:\n{counter_evidence}\n\n"
        "You MUST respond with ONLY a valid JSON object (no other text before or after). "
        "The JSON must have exactly two keys:\n"
        '- "analysis": A summary of the verified findings.\n'
        '- "verdict": One of exactly these three values: "CONFIRMED", "CONTRADICTED", or "NEEDS_MORE_INFO".\n\n'
        "Example format:\n"
        '{{"analysis": "Your analysis here", "verdict": "CONFIRMED"}}'
    )
    chain = prompt | llm
    verification_result = chain.invoke({
        "supporting_evidence": state["supporting_evidence"],
        "counter_evidence": state["counter_evidence"]
    })
    # Parse the JSON output
    output_json = json.loads(verification_result.content)
    return {
        "verified_analysis": output_json["analysis"],
        "final_verdict": output_json["verdict"],
        "revision_number": state["revision_number"] + 1
    }

# --- REPORT WRITER AGENT ---
def report_writer_node(state: FactCheckCrewState):
    """Agent 4: Writes the final, polished report."""
    print("--- üñãÔ∏è REPORT WRITER NODE ---")
    # TODO: Implement this node.
    # 1. Create a prompt that takes the 'verified_analysis' and the 'final_verdict'.
    # 2. Instruct the LLM to write a clear, neutral, and well-structured report for a general audience.
    # 3. Return a dictionary to update the 'final_report'.
    prompt = ChatPromptTemplate.from_template(
        "You are a report writer. Based on the following analysis and verdict, write a clear, neutral, and well-structured report for a general audience:\n\n"
        "Analysis:\n{verified_analysis}\n\n"
        "Verdict:\n{final_verdict}"
    )
    chain = prompt | llm
    report_result = chain.invoke({
        "verified_analysis": state["verified_analysis"],
        "final_verdict": state["final_verdict"]
    })
    return {"final_report": report_result.content}


#### **`TODO 3`: Implement the Conditional Edge [5 marks]**
This function will read the `final_verdict` and decide the next step for the graph.

In [77]:
def should_continue_verification(state: FactCheckCrewState):
    """Determines whether to loop back for more research or finish."""
    print("--- üîÅ DECISION NODE ---")
    # TODO: Implement the conditional logic.
    # 1. Get the 'final_verdict' and 'revision_number' from the state.
    # 2. If the verdict is "NEEDS_MORE_INFO" AND the revision number is less than 2, return "continue_research".
    # 3. Otherwise, return "finish_report".
    final_verdict = state["final_verdict"]
    revision_number = state["revision_number"]
    if final_verdict == "NEEDS_MORE_INFO" and revision_number < 2:
        print("More research needed. Continuing verification loop.")
        return "continue_research"
    else:
        print("Sufficient information gathered. Finishing report.")
        return "finish_report"

#### **`TODO 4`: Wire Up the Graph [5 marks]**


In [79]:
from langgraph.graph import StateGraph, END

# TODO: Build the graph.
# 1. Instantiate StateGraph with your FactCheckCrewState.
# 2. Add all four of your nodes.
# 3. Define the workflow:
#    - The entry point is "supporting_evidence".
#    - supporting_evidence -> devils_advocate -> lead_verifier
#    - After "lead_verifier", add the CONDITIONAL edge.
#      - "continue_research" path should loop back to "supporting_evidence".
#      - "finish_report" path should go to "report_writer".
#    - "report_writer" is the final step before the END.
# 4. Compile the graph.
workflow = StateGraph(FactCheckCrewState)
workflow.add_node("supporting_evidence", supporting_evidence_node)
workflow.add_node("devils_advocate", devils_advocate_node)
workflow.add_node("lead_verifier", lead_verifier_node)
workflow.add_node("report_writer", report_writer_node)
workflow.set_entry_point("supporting_evidence")
workflow.add_edge("supporting_evidence", "devils_advocate")
workflow.add_edge("devils_advocate", "lead_verifier")
workflow.add_conditional_edges("lead_verifier", should_continue_verification, {"continue_research": "supporting_evidence", "finish_report": "report_writer"})
workflow.add_edge("report_writer", END)

app = workflow.compile()

#### **Run Your Completed Crew**


In [82]:
claim = "Pakistan gained independence in 1947."
initial_input = {"claim": claim, "revision_number": 0}

final_state = app.invoke(initial_input)

print("\n\n--- ‚úÖ FINAL REPORT ---")
print(final_state['final_report'])

--- üîé SUPPORTING EVIDENCE NODE ---
--- üòà DEVIL'S ADVOCATE NODE ---
--- ‚öñÔ∏è LEAD VERIFIER NODE ---
--- üîÅ DECISION NODE ---
Sufficient information gathered. Finishing report.
--- üñãÔ∏è REPORT WRITER NODE ---


--- ‚úÖ FINAL REPORT ---
**Report: Confirmation of Pakistan's Independence**

**Introduction**

This report aims to provide a clear and concise overview of the historical fact of Pakistan's independence. Following a thorough analysis of available evidence and historical records, we are able to confirm the country's independence in 1947.

**Key Findings**

The evidence confirms that Pakistan gained independence on August 14, 1947. This date is widely recognized and supported by historical events, including the swearing-in of Muhammad Ali Jinnah as the first governor general of Pakistan. Additionally, the release of commemorative postage stamps on this occasion further substantiates the fact of Pakistan's independence.

**Historical Context**

While there may be ongoing

## **Section 4: A Comparative Study of Fact-Checking Agents** [20 marks]

How much better is a complex agent than a simple one? You will answer that question by empirically evaluating three different fact-checking methods against a real-world dataset of claims.

#### **The Goal**

You will take a dataset of fact-checked claims and run each claim through three different verifiers:
1.  **Method 1: The Zero-Shot LLM:** A baseline agent with no tools, relying solely on its internal knowledge.
2.  **Method 2: The Simple Search Agent:** A single agent equipped with a web search tool (a basic RAG approach).
3.  **Method 3: The Advanced Research Crew:** The multi-agent, adversarial fact-checking crew you just built.

Finally, you will compare the accuracy of each method to determine the value of agentic complexity.

#### **Setup: Loading the Dataset**


In [6]:
import pandas as pd
from tqdm import tqdm

try:
    df = pd.read_csv('claims.csv')
    claims_sample = df[:10].copy()
except FileNotFoundError:
    print("Error: 'claims.csv' not found.")
    claims_sample = pd.DataFrame()

def normalize_verdict(verdict):
    if verdict:
        return "true"
    else:
        return "false"
    

if not claims_sample.empty:
    claims_sample['ground_truth'] = claims_sample['text review'].apply(normalize_verdict)

Note: You might want to enforce structured outputs to ensure your final answer is a True/False: <br> 
https://docs.langchain.com/oss/python/langchain/structured-output <br>
https://forum.langchain.com/t/make-a-llm-with-structured-output-call-a-tool/622

### **Task 1: Baseline Fact-Checker (Zero-Shot LLM) [5 marks]**

This agent has no access to the outside world. It will make its judgment based only on the information it was trained on.

**Your Task:** Implement the `verify_claim_zero_shot` function. This function should use a simple LLM chain to classify a claim as "True" or "False".


In [7]:
# All our necessary imports from previous sections
from langchain.agents import create_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_tavily import TavilySearch
from dotenv import load_dotenv
import os

load_dotenv()

os.environ["GROQ_API_KEY"]= os.getenv("GROQ_API_KEY2")
print("Using GROQ API Key:", os.getenv("GROQ_API_KEY2") is not None)

llm = ChatGroq(api_key = os.getenv("GROQ_API_KEY2"),model="llama-3.3-70b-versatile")
search_tool = TavilySearch(max_results=4)

def verify_claim_zero_shot(claim: str) -> str:
    """Verifies a claim using only the LLM's internal knowledge."""
    # TODO: Implement the zero-shot verifier.
    # 1. Create a ChatPromptTemplate. The system prompt should instruct the LLM
    #    to act as a fact-checker and verify the claim.
    # 2. CRUCIAL: The prompt MUST instruct the model to respond with ONLY the word "True" or "False".
    # 3. Create a simple chain (prompt | llm).
    # 4. Invoke the chain with the claim.
    # 5. Extract the text content from the result.
    # 6. Add a small cleanup step: if "true" is in the lowercased result, return "True". Otherwise, return "False".
    #    This makes your function robust to small variations in the LLM's output.
    prompt = ChatPromptTemplate.from_template(
        "You are a fact-checker. Verify the following claim:\n\n{claim}\n\n"
        "Respond with ONLY the word 'True' if the claim is correct, or 'False' if it is incorrect."
    )
    chain = prompt | llm
    result = chain.invoke({"claim": claim})
    output = result.content.strip().lower()
    if "true" in output:
        return "True"
    else:
        return "False"

# --- Run the evaluation ---
print("Running Zero-Shot Verifier...")
results_zero_shot = []
for claim in tqdm(claims_sample['claim'], desc="Zero-Shot Verification"):
    verdict = verify_claim_zero_shot(claim)
    results_zero_shot.append(verdict)

claims_sample['zero_shot_verdict'] = results_zero_shot

Using GROQ API Key: True
Running Zero-Shot Verifier...


Zero-Shot Verification: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:02<00:00,  3.51it/s]


### **Task 2: Simple RAG Fact-Checker (Agent with Search Tool) [5 marks]**
This agent represents a standard RAG (Retrieval-Augmented Generation) approach. It can search the web for information before making a decision.


**Your Task:** Implement the `verify_claim_with_search` function. This function will create a simple agent equipped with the `TavilySearch` tool.


In [12]:
def verify_claim_with_search(claim: str) -> str:
    """Verifies a claim using a single agent with a web search tool."""
    # TODO: Implement the simple search agent.
    # 1. Create an agent using create_agent.
    # 2. Provide it with a list containing just one tool: `search_tool`.
    # 3. The system prompt should instruct it to use its search tool to find information
    #    and then make a final judgment on the claim.
    # 4. CRUCIAL: The prompt MUST also instruct the model to end its final response
    #    with ONLY the word "True" or "False".
    # 5. Invoke the agent with the claim. Enforcing a structured output will help.
    # 6. Extract the text content from the final message.
    # 7. Use the same cleanup logic as before to return a clean "True" or "False".
    agent = create_agent(
        model=llm,
        tools=[search_tool],
        system_prompt="You are a fact-checker with access to a web search tool. Use the tool to find information and make a final judgment on the claim. You must end your final response with ONLY the word 'True' or 'False'."
    )
    result = agent.invoke({
        "messages": [
            {"role": "user", "content": claim}
        ]
    })
    final_output = result["messages"][-1].content.strip().lower()
    if "true" in final_output:
        return "True"
    else:
        return "False"

# --- Run the evaluation ---
print("\nRunning Simple Search Verifier...")
results_with_search = []
for claim in tqdm(claims_sample['claim'], desc="Search Verification"):
    verdict = verify_claim_with_search(claim)
    results_with_search.append(verdict)

claims_sample['simple_search_verdict'] = results_with_search


Running Simple Search Verifier...


Search Verification:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 5/10 [02:16<02:16, 27.28s/it]


RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01kacdbddhf0ftfct93hyasf0j` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 98430, Requested 1841. Please try again in 3m54.144s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}

### **Task 3: Advanced Fact-Checker (Multi-Agent Crew) [5 marks]**

**You may re-use the multi-agent crew you built in the previous section.**

**Your Task:** Implement the `verify_claim_with_crew` function. This function will invoke your crew and, most importantly, translate its complex output (`CONFIRMED`, `CONTRADICTED`, `NEEDS_MORE_INFO`) into the simple "True"/"False" format required for evaluation.

Note: Restrict the number of revisions to 2. If the crew is not able to arrive at a definitive answer, output 'uncertain'.  I'll leave it upto you to think and decide if such a behavior should be penalized. You may compute your accuracy accordingly.

In [None]:
# First, bring over your complete, working Multi-Agent Crew graph from the previous section.
# (The full solution code for the "Devil's Advocate" crew is assumed to be here)
# ... app = workflow.compile() ...
load_dotenv()
llm = ChatGroq(api_key = os.getenv("GROQ_API_KEY"),model="llama-3.3-70b-versatile")

workflow = StateGraph(FactCheckCrewState)
workflow.add_node("supporting_evidence", supporting_evidence_node)
workflow.add_node("devils_advocate", devils_advocate_node)
workflow.add_node("lead_verifier", lead_verifier_node)
workflow.add_node("report_writer", report_writer_node)
workflow.set_entry_point("supporting_evidence")
workflow.add_edge("supporting_evidence", "devils_advocate")
workflow.add_edge("devils_advocate", "lead_verifier")
workflow.add_conditional_edges("lead_verifier", should_continue_verification, {"continue_research": "supporting_evidence", "finish_report": "report_writer"})
workflow.add_edge("report_writer", END)

app = workflow.compile()

def verify_claim_with_crew(claim: str) -> str:
    """Verifies a claim using the full multi-agent research crew."""
    # TODO: Implement the crew-based verifier.
    # 1. Define the initial state for your LangGraph app. It needs the 'claim' and a 'revision_number' of 0.
    # 2. Invoke the app with this initial state.
    # 3. Get the 'final_verdict' from the resulting state dictionary.
    # 4. Implement the translation logic:
    #    - If the final_verdict is "CONFIRMED", return "True".
    #    - If the final_verdict is "CONTRADICTED", return "False".
    #    - If the final_verdict is "NEEDS_MORE_INFO" return "Other"
    #      (as the claim could not be confidently confirmed).
    initial_state = {"claim": claim, "revision_number": 0}
    final_state = app.invoke(initial_state)
    final_verdict = final_state["final_verdict"]
    if final_verdict == "CONFIRMED":
        return "True"
    elif final_verdict == "CONTRADICTED":
        return "False"
    else:
        return "Other"

# --- Run the evaluation ---
print("\nRunning Multi-Agent Crew Verifier...")
results_with_crew = []
# Ensure your crew's LangGraph `app` is defined and compiled in a cell above this one!
for claim in tqdm(claims_sample['claim'], desc="Crew Verification"):
    verdict = verify_claim_with_crew(claim)
    results_with_crew.append(verdict)

claims_sample['crew_verdict'] = results_with_crew


Running Multi-Agent Crew Verifier...


Crew Verification:   0%|          | 0/10 [00:00<?, ?it/s]

--- üîé SUPPORTING EVIDENCE NODE ---


Crew Verification:   0%|          | 0/10 [00:00<?, ?it/s]


RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01kacdbddhf0ftfct93hyasf0j` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 99650, Requested 1824. Please try again in 21m13.536s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}

### **Final Evaluation & Analysis [5 marks]**

Now for the moment of truth. We will calculate the accuracy of each method by comparing its verdicts to the ground truth from the dataset.

**Your Task:** Run the evaluation code and then, in the final markdown cell, write a brief analysis of the results.

In [None]:
# --- Calculate Accuracy ---
ground_truth = claims_sample['ground_truth'].values

accuracy_zero_shot = (claims_sample['zero_shot_verdict'].values == ground_truth).mean()
accuracy_simple_search = (claims_sample['simple_search_verdict'].values == ground_truth).mean()
accuracy_crew = (claims_sample['crew_verdict'].values == ground_truth).mean()

print("--- Final Results ---")
print(f"Zero-Shot LLM Accuracy: {accuracy_zero_shot:.2%}")
print(f"Simple Search Agent Accuracy: {accuracy_simple_search:.2%}")
print(f"Multi-Agent Crew Accuracy: {accuracy_crew:.2%}")

print("\n--- Detailed Comparison ---")
display(claims_sample[['claim', 'ground_truth', 'zero_shot_verdict', 'simple_search_verdict', 'crew_verdict']])

#### **Analysis of Results**

**(TODO: Write your analysis here in this markdown cell.)**

1. **Which method performed the best? Why?**
2. **Do you think these results were expected?**
3. **If the multi-agent crew was not able to output a final answer - what's better from a social welfare perspective: an output you are not confident about or refraining from giving an output if you are not confident?**