# Deep Research AI Agentic System
    This notebook implements a multi-agent system for deep research with:
    1. Research Agent - Focuses on data collection and information gathering
    2. Answer Agent - Synthesizes information and drafts coherent answers

In [1]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_groq import ChatGroq
from langgraph.graph import StateGraph, START, END
from langgraph.prebuilt import ToolNode
from langchain_core.messages import HumanMessage, AnyMessage, AIMessage
from typing_extensions import Annotated
from langgraph.graph.message import add_messages
from typing import TypedDict

In [2]:
from dotenv import load_dotenv
import os
load_dotenv()
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
if os.getenv("TAVILY_API_KEY") is None:
    raise ValueError("TAVILY_API_KEY is not set")
if os.getenv("GROQ_API_KEY") is None:
    raise ValueError("GROQ_API_KEY is not set")


In [3]:
from langchain_community.tools import ArxivQueryRun, WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper

arxiv_wrapper = ArxivAPIWrapper(top_k_results=2, doc_content_chars_max=100)
wikipedia_wrapper = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=100)

arxiv_tool = ArxivQueryRun(api_wrapper=arxiv_wrapper, description="Search for papers on a given topic")
wikipedia_tool = WikipediaQueryRun(api_wrapper=wikipedia_wrapper)

In [4]:
tavily_tool = TavilySearchResults(max_results=5)
llm = ChatGroq(model_name="llama3-8b-8192", temperature=0)
tools = [arxiv_tool, wikipedia_tool, tavily_tool]
llm_with_tools = llm.bind_tools(tools)

In [5]:
# Define agent state
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]

# Research Agent: Crawls websites for online information
def research_agent(state: AgentState):
    query_text = state["messages"][-1].content  # Extract text content from message
    response = tavily_tool.invoke({"query": query_text})  # Fetch data using Tavily

    # Ensure the response is wrapped in an AIMessage
    return {"messages": [AIMessage(content=str(response))]}

# Answer Drafting Agent: Processes research data and generates a meaningful response
def answer_drafting_agent(state: AgentState):
    research_data = state["messages"][-1].content  # Extract text content from message
    response = llm.invoke(f"Draft an answer based on the provided research data: {research_data}")  # Pass as raw string (not HumanMessage)
    return {"messages": [AIMessage(content=str(response))]}

In [6]:
# Build LangGraph
build_graph = StateGraph(AgentState)
build_graph.add_node("research_agent", research_agent)
build_graph.add_node("answer_drafting_agent", answer_drafting_agent)
build_graph.add_node("tools", ToolNode([tavily_tool]))

# Define execution flow
build_graph.add_edge(START, "research_agent")
build_graph.add_edge("research_agent", "answer_drafting_agent")
build_graph.add_edge("answer_drafting_agent", END)

graph = build_graph.compile()

In [7]:
print(graph.get_graph().print_ascii())

      +-----------+        
      | __start__ |        
      +-----------+        
            *              
            *              
            *              
    +----------------+     
    | research_agent |     
    +----------------+     
            *              
            *              
            *              
+-----------------------+  
| answer_drafting_agent |  
+-----------------------+  
            *              
            *              
            *              
       +---------+         
       | __end__ |         
       +---------+         
None


In [8]:
import json
from langchain_core.messages import HumanMessage
messages = graph.invoke({"messages": "hey my name is jay, tell me recent news on quantum computing"})
for message in messages["messages"]:
  message.pretty_print()

formatted_messages = []

for message in messages["messages"]:
    if isinstance(message, AIMessage) and isinstance(message.content, list):
        for news in message.content:
            formatted_messages.append({
                "title": news["title"],
                "url": news["url"],
                "content": news["content"][:200] + "..."  # Truncated for readability
            })

# Print JSON formatted output
print(json.dumps(formatted_messages, indent=4))

[]


In [15]:
print(messages["messages"])

[HumanMessage(content='hey my name is jay, tell me recent news on quantum computing', additional_kwargs={}, response_metadata={}, id='463f92e8-b568-4dfc-b5a4-0c25e7848e85'), AIMessage(content='[{\'title\': \'Quantum Computers News - ScienceDaily\', \'url\': \'https://www.sciencedaily.com/news/computers_math/quantum_computers/\', \'content\': \'Wednesday, February 19, 2025\\n\\nTuesday, February 18, 2025\\n\\nFriday, February 14, 2025\\n\\nTuesday, February 11, 2025\\n\\nMonday, February 10, 2025\\n\\nFriday, February 7, 2025\\n\\nWednesday, February 5, 2025\\n\\nWednesday, January 29, 2025\\n\\nTuesday, January 28, 2025\\n\\nMonday, January 27, 2025\\n\\nFriday, January 24, 2025\\n\\nThursday, January 23, 2025\\n\\nThursday, January 16, 2025\\n\\nTuesday, January 14, 2025\\n\\nMonday, January 13, 2025\\n\\nThursday, January 9, 2025\\n\\nWednesday, January 8, 2025\\n\\nMonday, January 6, 2025 [...] Thursday, October 31, 2024\\n\\nWednesday, October 30, 2024\\n\\nMonday, October 28, 2024

In [19]:
import json

for msg in messages["messages"]:
    print(f"Type: {type(msg).__name__}")

    # Attempt to parse content if it's a string representation of a list/dictionary
    try:
        parsed_content = json.loads(msg.content)
    except (json.JSONDecodeError, TypeError):
        print(f"Failed to parse content:")
        parsed_content = msg.content  # Keep it as-is if parsing fails

    if isinstance(parsed_content, list):  # If it's a list of dictionaries
        for item in parsed_content:
            print(json.dumps(item, indent=4))  # Pretty print each dictionary
    else:
        print(f"Content: {parsed_content}")

    print("-" * 40)  # Separator for readability

Type: HumanMessage
Failed to parse content:
Content: hey my name is jay, tell me recent news on quantum computing
----------------------------------------
Type: AIMessage
Failed to parse content:
Content: [{'title': 'Quantum Computers News - ScienceDaily', 'url': 'https://www.sciencedaily.com/news/computers_math/quantum_computers/', 'content': 'Wednesday, February 19, 2025\n\nTuesday, February 18, 2025\n\nFriday, February 14, 2025\n\nTuesday, February 11, 2025\n\nMonday, February 10, 2025\n\nFriday, February 7, 2025\n\nWednesday, February 5, 2025\n\nWednesday, January 29, 2025\n\nTuesday, January 28, 2025\n\nMonday, January 27, 2025\n\nFriday, January 24, 2025\n\nThursday, January 23, 2025\n\nThursday, January 16, 2025\n\nTuesday, January 14, 2025\n\nMonday, January 13, 2025\n\nThursday, January 9, 2025\n\nWednesday, January 8, 2025\n\nMonday, January 6, 2025 [...] Thursday, October 31, 2024\n\nWednesday, October 30, 2024\n\nMonday, October 28, 2024\n\nThursday, October 24, 2024\n\nThu