<a href="https://colab.research.google.com/github/amalsalilan/Infosys-Springboard-Virtual-Internship-6.0-Open-Deep-Researcher-batch-2/blob/Tejas_V/Opendeep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Install required packages (run once)
!pip install -q google-generativeai langgraph langchain langchain-google-genai tavily-python python-dotenv


In [None]:
# 2. Imports & API setup
import os
import time
from typing import TypedDict, List

from google.colab import userdata
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
from langgraph.graph import StateGraph, END
from tavily import TavilyClient

# Load secrets from Colab
os.environ["GOOGLE_API_KEY"]  = userdata.get('GOOGLE_API_KEY')
os.environ["TAVILY_API_KEY"]  = userdata.get('TAVILY_API_KEY')

# Initialize LLM and Search client
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.7)
tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])


In [None]:
# 3. Define state and initialization
class ResearchState(TypedDict):
    original_query: str
    sub_questions: List[str]
    search_results: List[dict]
    synthesized_report: str
    current_step: str

def initialize_research(query: str) -> ResearchState:
    return ResearchState(
        original_query=query,
        sub_questions=[],
        search_results=[],
        synthesized_report="",
        current_step="initialized"
    )


In [None]:
# 4. Planner Agent with user selection
def planner_agent(state: ResearchState) -> ResearchState:
    print("\n🧠 PLANNER AGENT")
    planning_prompt = ChatPromptTemplate.from_messages([
        ("system", """You are a research planning expert. Break down the user's question into 3–5 focused sub-questions, one per line, numbered."""),
        ("user", "Main research question: {query}")
    ])
    chain = planning_prompt | llm
    response = chain.invoke({"query": state["original_query"]})

    # Parse auto-generated sub-questions
    candidates = []
    for line in response.content.strip().split("\n"):
        line = line.strip()
        if line and (line[0].isdigit() or line.startswith("-")):
            q = line.split(".",1)[-1].lstrip("-").strip()
            if q: candidates.append(q)
    if not candidates:
        candidates = [state["original_query"]]

    # Display and let user choose/edit
    print("\nAuto-generated sub-questions:")
    for i, q in enumerate(candidates,1):
        print(f"  {i}. {q}")
    print("\nEnter the numbers of the ones you want (comma-separated),")
    print("or type new/edited questions one per line, ending with an empty line.")

    choice = input("Selection or new questions: ").strip()
    chosen = []
    if all(ch.isdigit() or ch in ", " for ch in choice):
        for idx in choice.split(","):
            idx = idx.strip()
            if idx.isdigit() and 1 <= int(idx) <= len(candidates):
                chosen.append(candidates[int(idx)-1])
    else:
        print("Enter your custom sub-questions. Submit an empty line to finish.")
        while True:
            line = input().strip()
            if not line: break
            chosen.append(line)
    # Fallback
    if not chosen:
        chosen = candidates[:3]

    state["sub_questions"] = chosen
    state["current_step"] = "planning_complete"
    print(f"\n✅ Using {len(chosen)} sub-questions:")
    for i, q in enumerate(chosen,1):
        print(f"  {i}. {q}")
    return state


In [None]:
# 5. Searcher Agent
def searcher_agent(state: ResearchState) -> ResearchState:
    print("\n🔍 SEARCHER AGENT")
    results = []
    for i, q in enumerate(state["sub_questions"],1):
        print(f" [{i}/{len(state['sub_questions'])}] Searching: {q}")
        time.sleep(0.5)
        try:
            resp = tavily_client.search(query=q, search_depth="basic", max_results=3)
            for r in resp.get("results",[]):
                results.append({
                    "question": q,
                    "title": r.get("title",""),
                    "url": r.get("url",""),
                    "content": r.get("content","")[:800]
                })
        except Exception as e:
            print(f"  ⚠️ Error: {e}")
    state["search_results"] = results or [{
        "question":state["original_query"],
        "title":"No results","url":"","content":"No data"
    }]
    state["current_step"]="search_complete"
    return state


In [None]:
# 6. Writer Agent
def writer_agent(state: ResearchState) -> ResearchState:
    print("\n✍️ WRITER AGENT")
    context = "\n\n".join(
        f"[Source {i+1}] ({r['question']})\nTitle: {r['title']}\n{r['content']}\nURL: {r['url']}"
        for i,r in enumerate(state["search_results"])
    )
    prompt = ChatPromptTemplate.from_messages([
        ("system", """You are a research writer. Synthesize the info into a markdown report with:
1. Introduction
2. Main Findings by sub-question
3. Key Insights
4. Conclusion
5. Sources (use [Source X] citations)"""),
        ("user", """Original question: {query}

Sub-questions:
{sub_questions}

Data:
{context}

Write the report now in markdown.""")
    ])
    chain = prompt | llm
    resp = chain.invoke({
        "query":state["original_query"],
        "sub_questions":"\n".join(f"- {q}" for q in state["sub_questions"]),
        "context":context
    })
    state["synthesized_report"] = resp.content
    state["current_step"]="complete"
    return state


In [None]:
# 1. Clarify Agent: ask user to refine main query scope
def clarify_agent(state):
    print("\n🔍 CLARIFY AGENT")
    print("Please provide any additional context or constraints for your research question.")
    print("For example: specify time period, geographic region, target audience, or topic focus.")
    extra = input("Additional context (or press Enter to skip): ").strip()
    if extra:
        state["original_query"] += f" ({extra})"
        print(f"✅ Query refined to: {state['original_query']}")
    else:
        print("✅ No additional context provided.")
    state["current_step"] = "clarify_complete"
    return state


In [None]:
# 2. Scoping Agent: narrow down or confirm sub-question scope
def scoping_agent(state):
    print("\n🎯 SCOPING AGENT")
    print("Review the planned sub-questions and optionally narrow them by keyword or topic.")
    print("Enter keywords to filter sub-questions, or press Enter to keep all.")
    keywords = input("Scope keywords (comma-separated): ").strip().lower().split(",")
    if any(k.strip() for k in keywords):
        filtered = []
        for q in state["sub_questions"]:
            if any(k.strip() in q.lower() for k in keywords):
                filtered.append(q)
        if filtered:
            state["sub_questions"] = filtered
            print(f"✅ Sub-questions scoped to {len(filtered)} items.")
        else:
            print("⚠️ No sub-questions matched—keeping original list.")
    else:
        print("✅ No scoping applied; all sub-questions retained.")
    state["current_step"] = "scoping_complete"
    return state


In [None]:
# 3. Update pipeline to include clarify and scoping before planning & searching
def create_research_app_with_clarify():
    g = StateGraph(ResearchState)
    g.add_node("clarify", clarify_agent)
    g.add_node("planner", planner_agent)
    g.add_node("scoping", scoping_agent)
    g.add_node("searcher", searcher_agent)
    g.add_node("writer", writer_agent)
    g.set_entry_point("clarify")
    g.add_edge("clarify", "planner")   # Clarify → Planner
    g.add_edge("planner", "scoping")   # Planner → Scoping
    g.add_edge("scoping", "searcher")  # Scoping → Searcher
    g.add_edge("searcher", "writer")   # Searcher → Writer
    g.add_edge("writer", END)          # Writer → End
    return g.compile()

research_app = create_research_app_with_clarify()


In [None]:
# 4. Reflection Step: ask user for feedback after report
def reflect_with_user(final_state):
    print("\n💭 REFLECTION")
    print("Please review the report and answer:")
    print("1. Were the sub-questions relevant and comprehensive?")
    print("2. Is any key aspect missing?")
    print("3. How would you improve the report or add detail?")
    answers = []
    for question in [
        "1. Relevance/completeness of sub-questions?",
        "2. Missing aspects?",
        "3. Improvements or additions?"
    ]:
        ans = input(f"{question}\n> ").strip()
        answers.append(ans or "No comment")
    # Attach reflections to state
    final_state["reflection"] = answers
    print("\n✅ Thank you for your feedback!")
    return final_state


In [None]:
# 5. Main entry point, now including reflection
def start_research_with_clarify():
    print("\n=== OpenDeepResearcher with Clarify & Reflection ===")
    q = input("Enter your research question: ").strip()
    state = initialize_research(q)
    final = research_app.invoke(state)
    print("\n=== Research Report ===\n")
    print(final["synthesized_report"])
    final = reflect_with_user(final)
    # Optionally save reflections
    print("\nYour reflections:", final.get("reflection", []))

# Run the enhanced workflow
start_research_with_clarify()



=== OpenDeepResearcher with Clarify & Reflection ===
Enter your research question: which is the best laptop

🔍 CLARIFY AGENT
Please provide any additional context or constraints for your research question.
For example: specify time period, geographic region, target audience, or topic focus.
Additional context (or press Enter to skip): india,gamers
✅ Query refined to: which is the best laptop (india,gamers)

🧠 PLANNER AGENT

Auto-generated sub-questions:
  1. What are the most important hardware specifications for gaming laptops in India (e.g., CPU, GPU, RAM, storage, display)?
  2. What are the top-rated gaming laptop brands and models currently available in the Indian market, considering different price points?
  3. What are the common issues or complaints reported by gamers in India regarding specific laptop models or brands?
  4. How do factors like cooling solutions, battery life, and display quality impact the overall gaming experience on laptops in the Indian climate?
  5. What 