<a href="https://colab.research.google.com/github/amalsalilan/Infosys-Springboard-Virtual-Internship-6.0-Open-Deep-Researcher-batch-2/blob/Tejas_V/OpenChatResponse.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade langgraph google-generativeai tavily-python --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.3/153.3 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.9/43.9 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m216.7/216.7 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from getpass import getpass
import json, re, traceback, time
from typing import TypedDict

# LangGraph StateGraph
from langgraph.graph import StateGraph

# Google Gemini client
import google.generativeai as genai

# Tavily client
from tavily import TavilyClient

# Enter API keys
GENAI_API_KEY = getpass("Enter Google Gemini API Key: ")
TAVILY_API_KEY = getpass("Enter Tavily API Key: ")

# Configure clients
genai.configure(api_key=GENAI_API_KEY)
tavily = TavilyClient(api_key=TAVILY_API_KEY)

Enter Google Gemini API Key: ··········
Enter Tavily API Key: ··········


In [None]:

class ResearchState(TypedDict, total=False):
    user_input: str
    clarification: str
    query: str
    summary: str
    pipeline: str

In [None]:

def safe_extract_genai_text(response):
    """
    Given a genai response object, try several known access patterns and return plain text.
    """
    try:

        if hasattr(response, "text") and response.text:
            return response.text.strip()
    except Exception:
        pass

    try:

        cand = response.candidates

        if hasattr(cand, "content"):

            content = getattr(cand, "content", None)
            if content and hasattr(content, "parts"):
                parts = content.parts
                if parts and len(parts) > 0 and getattr(parts, "text", None):
                    return parts.text.strip()

        if hasattr(cand, "text") and cand.text:
            return cand.text.strip()
    except Exception:
        pass


    try:
        return str(response)
    except:
        return ""

In [None]:

def clarification_agent(state: ResearchState) -> ResearchState:
    user_input = state.get("user_input", "").strip()
    if not user_input:
        state["clarification"] = "Could you type your question?"
        return state


    if re.search(r"\bmy name is\b", user_input.lower()):
        state["clarification"] = "This request is clear"
        return state

    if re.search(r"\b(previous|last)\s*(que|question|query|sawal)\b", user_input.lower()):
        state["clarification"] = "This request is clear"
        return state


    prompt = f"""
You are a system that assesses clarity of user research questions.

Question: "{user_input}"

Classify the question into one of:
- clear
- vague_guessable
- too_vague

If you return "vague_guessable", provide a short "refined_question" that is a reasonable interpretation.
Respond ONLY in JSON with keys: "status" and "refined_question" (string or empty).
Example:
{{"status":"vague_guessable", "refined_question":"..."}}
"""

    try:
        model = genai.GenerativeModel("gemini-1.5-flash")
        response = model.generate_content(prompt)
        text_out = safe_extract_genai_text(response)

        parsed = {}
        try:
            parsed = json.loads(text_out)
        except Exception:

            m = re.search(r"\{.*\}", text_out, flags=re.DOTALL)
            if m:
                try:
                    parsed = json.loads(m.group(0))
                except:
                    parsed = {}
        status = parsed.get("status", "").lower()
        refined = parsed.get("refined_question", "").strip()
    except Exception:
        traceback.print_exc()
        status, refined = "clear", ""

    if status == "clear":
        state["clarification"] = "This request is clear"
    elif status == "vague_guessable" and refined:
        state["clarification"] = refined
    else:
        state["clarification"] = "Could you provide more details about your question?"

    return state

In [None]:

def query_generator(state: ResearchState) -> ResearchState:
    clarification = state.get("clarification", "")
    user_input = state.get("user_input", "")

    if clarification == "This request is clear":
        state["query"] = user_input
    elif clarification.startswith("Could you provide"):
        state["query"] = f"{user_input} (needs clarification: {clarification})"
    else:

        state["query"] = clarification or user_input

    return state

In [None]:

def decide_search(query: str) -> bool:

    try:
        prompt = f"""
You are a decision module. Given a research question, answer whether it requires real-time web search
or can be answered from general knowledge (no web search). Return JSON: {{"need_search": true/false}}.

Question: "{query}"
"""
        model = genai.GenerativeModel("gemini-1.5-flash")
        response = model.generate_content(prompt)
        text_out = safe_extract_genai_text(response)

        parsed = {}
        try:
            parsed = json.loads(text_out)
        except:
            m = re.search(r"\{.*\}", text_out, flags=re.DOTALL)
            if m:
                try:
                    parsed = json.loads(m.group(0))
                except:
                    parsed = {}
        return bool(parsed.get("need_search", True))
    except Exception:
        return True

def tavily_search(query: str, max_results: int = 5):
    """
    Wrapper for TavilyClient search.
    Returns a list of result dicts or a string description if the client isn't available.
    """
    try:

        if hasattr(tavily, "search"):
            results = tavily.search(query, max_results=max_results)
            return results
        elif hasattr(tavily, "query"):
            results = tavily.query(query, max_results=max_results)
            return results
        else:

            if hasattr(tavily, "run"):
                return tavily.run(query)

            return f""
    except Exception as e:
        return f""

def research_pipeline(state: ResearchState) -> ResearchState:
    """
    Main research node - decides memory shortcuts, whether to use web search,
    and produces a short summary (or placeholder).
    """
    global memory
    query = (state.get("query") or "").strip()
    if not query:
        state["pipeline"] = "No query provided."
        state["summary"] = "No summary available."
        return state


    if re.search(r"\b(previous|last)\s*(que|question|query|sawal)\b", query.lower()):
        if memory["history"]:
            last_q = memory["history"][-1]["Q"]
            state["pipeline"] = "Retrieved from memory (history)."
            state["summary"] = f"Your previous question was: '{last_q}'"
        else:
            state["pipeline"] = "Memory empty."
            state["summary"] = "There is no previous question in memory."
        return state

    if "my name" in query.lower():
        name = memory["facts"].get("name", "I don’t know yet.")
        state["pipeline"] = "Retrieved from memory (facts)."
        state["summary"] = f"Your name is {name}."
        return state


    facts = memory.get("facts", {})
    if query.lower() in (k.lower() for k in facts.keys()):
        matched = next((v for k, v in facts.items() if k.lower() == query.lower()), None)
        state["pipeline"] = f"Retrieved from memory: {matched}"
        state["summary"] = matched
        return state


    need_search = decide_search(query)

    model = genai.GenerativeModel("gemini-1.5-flash")
    try:
        if need_search:

            tavily_results = tavily_search(query, max_results=5)

            combined_info = f"Query: {query}\n\nWeb results (short): {json.dumps(tavily_results, default=str)[:4000]}\n\nSummarize the key findings in 3-5 bullet points."
            response = model.generate_content(combined_info)
            text_out = safe_extract_genai_text(response)
            state["pipeline"] = "Tavily + Gemini"
            state["summary"] = text_out
        else:

            prompt = f"Query: {query}\nProvide a concise answer or short summary (3-5 lines)."
            response = model.generate_content(prompt)
            text_out = safe_extract_genai_text(response)
            state["pipeline"] = "Gemini Only"
            state["summary"] = text_out
    except Exception as e:
        traceback.print_exc()
        state["pipeline"] = f"Error during research: {str(e)}"
        state["summary"] = "An error occurred while fetching results."

    return state

In [None]:

graph = StateGraph(ResearchState)


graph.add_node("ClarificationAgent", clarification_agent)
graph.add_node("QueryGenerator", query_generator)
graph.add_node("ResearchPipeline", research_pipeline)


graph.set_entry_point("ClarificationAgent")
graph.add_edge("ClarificationAgent", "QueryGenerator")
graph.add_edge("QueryGenerator", "ResearchPipeline")


app = graph.compile()
print("✅ StateGraph compiled successfully.")

✅ StateGraph compiled successfully.


In [None]:
# Initialize memory and provide a chat() function to process one input at a time
memory = {
    "facts": {},    # Persistent knowledge (like user name, facts)
    "history": []   # Conversation log
}

def extract_facts_with_gemini(text: str):
    """
    Use Gemini to extract personal facts in JSON-list format: [{"key":"...", "value":"..."}]
    Fallbacks are safe and non-fatal.
    """
    model = genai.GenerativeModel("gemini-1.5-flash")
    prompt = f"""
Extract any personal facts (name, age, location, role, company) from the following user sentence.
Return a JSON list of objects with "key" and "value". If none, return.

Sentence: "{text}"
"""
    try:
        response = model.generate_content(prompt)
        text_out = safe_extract_genai_text(response)

        facts = []
        try:
            facts = json.loads(text_out)
        except:

            m = re.search(r"\[.*\]", text_out, flags=re.DOTALL)
            if m:
                try:
                    facts = json.loads(m.group(0))
                except:
                    facts = []
        if not isinstance(facts, list):
            facts = []
        return facts
    except Exception:
        return []

def chat(user_input: str, remember_name_rule: bool = True):

    global memory


    if remember_name_rule and re.search(r"\bmy name is\b", user_input.lower()):

        try:
            name_parts = user_input.lower().split("my name is", 1)[1].strip().split()
            if name_parts:
              memory["facts"]["name"] = name_parts[0].capitalize()
              print(f"✅ Stored name='{memory['facts']['name']}' in memory.")
        except Exception:
            pass
    else:

        try:
            facts_list = extract_facts_with_gemini(user_input)
            for f in facts_list:
                key = f.get("key", "").lower().strip()
                value = f.get("value", "").strip()
                if key and value:
                    memory["facts"][key] = value
                    print(f"✅ I'll remember your {key} = {value}")
        except Exception:
            pass


    if user_input.lower().startswith("what is my"):
        key = user_input.lower().replace("what is my", "").strip()
        val = memory["facts"].get(key, "I don’t know yet.")
        print(f"Memory: {val}")
        return {"user_input": user_input, "clarification": "", "query": "", "pipeline": "recall", "summary": val}


    state: ResearchState = {
        "user_input": user_input,
        "clarification": "",
        "query": "",
        "summary": "",
        "pipeline": ""
    }

    try:
        state = app.invoke(state)
    except Exception as e:
        traceback.print_exc()
        state["pipeline"] = f"Graph invocation error: {str(e)}"
        state["summary"] = ""


    print("\n###  Clarification Agent")
    print(state.get("clarification", ""))
    print("\n###  Final Research Query")
    print(state.get("query", ""))
    print("\n###  Research Pipeline")
    print(state.get("pipeline", ""))
    print("\n###  Final Summary")
    print(state.get("summary", ""))


    memory["history"].append({
        "timestamp": time.time(),
        "Q": user_input,
        "clarification": state.get("clarification", ""),
        "query": state.get("query", ""),
        "pipeline": state.get("pipeline", ""),
        "A": state.get("summary", "")
    })

    return state

In [None]:

print(" OpenDeepResearcher Chatbot (type 'quit' or 'exit' to stop)\n")

while True:
    try:
        user_input = input("You: ").strip()
        if user_input.lower() in ["quit", "exit", "bye"]:
            print(" Goodbye! Session ended.")
            break


        state = chat(user_input)

    except KeyboardInterrupt:
        print("\n Interrupted. Goodbye!")
        break
    except Exception as e:
        print(f" Error: {e}")

 OpenDeepResearcher Chatbot (type 'quit' or 'exit' to stop)

You: who is the best footballer of the world current

###  Clarification Agent
Who is currently considered the best footballer in the world, based on widely accepted awards and rankings?

###  Final Research Query
Who is currently considered the best footballer in the world, based on widely accepted awards and rankings?

###  Research Pipeline
Tavily + Gemini

###  Final Summary
* **Rodri is the current Ballon d'Or winner (2024):**  Multiple sources cite Rodri as the recipient of the 2024 Ballon d'Or award, making him the current holder of the prestigious title.

* **Ousmane Dembélé is a leading contender for the 2025 Ballon d'Or:** Several sources place Dembélé at the top or near the top of their 2025 Ballon d'Or power rankings.

* **Lamine Yamal is also a prominent contender for 2025:**  He consistently appears high in various 2025 Ballon d'Or prediction lists.

* **No single definitive "best" player exists beyond the curre