In [10]:
#!/usr/bin/env python3
"""
main_wikipedia_only.py

Multi-agent research assistant that uses ONLY Wikipedia (no ML models, no external APIs).
- Planner: simple templated questions derived from the topic.
- Search: Wikipedia REST summary endpoint + search fallback.
- Synthesizer: combine the Wikipedia extracts into a final report.

Usage:
    python main_wikipedia_only.py
Then enter a topic when prompted.

Notes:
- Requires 'requests' (pip install requests).
- Prints only the final report (no extra debug output).
"""

import requests
import time
import re
from typing import List, Tuple, Optional
from urllib.parse import quote_plus

USER_AGENT = "WikiResearchAssistant/1.0 (contact: none)"

WIKIPEDIA_SUMMARY_URL = "https://en.wikipedia.org/api/rest_v1/page/summary/{}"
WIKIPEDIA_SEARCH_URL = "https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={}&format=json&utf8=1"

# -------------------------
# Utilities: sanitize topic / question -> query
# -------------------------
def sanitize_topic(raw: str) -> str:
    if not raw:
        return ""
    s = raw.strip()
    s = re.sub(r'^(who is|who was|what is|what are|tell me about|give me info on|info about)\s+', '', s, flags=re.I).strip()
    return s

def extract_query_from_question(question: str) -> str:
    q = question.strip()
    q = re.sub(r'^(who is|who was|what is|what are|tell me about|give me details on|provide info on)\s+', '', q, flags=re.I).strip()
    q = q.rstrip(" ?.")
    return q

# -------------------------
# Wikipedia helpers
# -------------------------
def wikipedia_summary(title_or_query: str, timeout: int = 10) -> Optional[str]:
    """
    Try the Wikipedia REST summary for title_or_query (replace spaces with underscores).
    If not found (404 or missing extract), return None.
    """
    if not title_or_query:
        return None
    headers = {"User-Agent": USER_AGENT}
    # try direct title (underscore)
    for candidate in (title_or_query.replace(" ", "_"), title_or_query):
        url = WIKIPEDIA_SUMMARY_URL.format(quote_plus(candidate))
        try:
            r = requests.get(url, headers=headers, timeout=timeout)
            if r.status_code == 200:
                j = r.json()
                # 'extract' is human readable summary
                extract = j.get("extract")
                if extract:
                    return extract.strip()
                # sometimes description exists
                if j.get("description"):
                    return j.get("description").strip()
            # if 404 or no useful info, continue to search fallback
        except requests.RequestException:
            # network or timeout -> treat as no result
            return None
    return None

def wikipedia_search_then_summary(query: str, timeout: int = 10) -> Optional[str]:
    """
    Use Wikipedia search API to find most relevant page title, then get its summary.
    """
    if not query:
        return None
    headers = {"User-Agent": USER_AGENT}
    url = WIKIPEDIA_SEARCH_URL.format(quote_plus(query))
    try:
        r = requests.get(url, headers=headers, timeout=timeout)
        if r.status_code != 200:
            return None
        j = r.json()
        hits = j.get("query", {}).get("search", [])
        if not hits:
            return None
        first = hits[0]
        title = first.get("title")
        if not title:
            return None
        # fetch summary for that title
        return wikipedia_summary(title, timeout=timeout)
    except requests.RequestException:
        return None

# -------------------------
# Agents (Wikipedia-only)
# -------------------------
def planner_agent(topic: str) -> List[str]:
    """
    Simple deterministic planner: produce 3 templated research questions based on topic.
    """
    t = sanitize_topic(topic)
    if not t:
        return []
    questions = [
        f"Who is {t}?",
        f"What are the most important facts about {t}?",
        f"What are recent developments related to {t}?"
    ]
    return questions

def search_agent(question: str) -> str:
    """
    For a given question, attempt to fetch a concise factual answer using Wikipedia.
    Returns the raw Wikipedia extract (or a short message if not found).
    """
    query = extract_query_from_question(question)
    if not query:
        return "(No query generated from question.)"

    # Try direct summary by title/title-variant
    summary = wikipedia_summary(query)
    if summary:
        return f"(Wikipedia) {summary}"

    # If not found, try search fallback
    summary2 = wikipedia_search_then_summary(query)
    if summary2:
        return f"(Wikipedia) {summary2}"

    return "(Wikipedia) No article found for the query."

def synthesizer_agent(topic: str, research_results: List[Tuple[str, str]]) -> str:
    """
    Combine results into a simple report. Use only the gathered Wikipedia text.
    """
    topic_clean = sanitize_topic(topic)
    intro = f"Introduction:\nThis report summarizes information about {topic_clean} using only Wikipedia content.\n\n"

    findings = "Findings:\n"
    for q, data in research_results:
        # Keep each finding concise: use first 800 characters of the wiki text for safety
        snippet = data.strip()
        if len(snippet) > 800:
            snippet = snippet[:800].rsplit(".", 1)[0] + "..."
        findings += f"- {q}: {snippet}\n\n"

    conclusion = "Conclusion:\nThis concludes the synthesized summary based solely on Wikipedia extracts.\n"
    return intro + findings + conclusion

# -------------------------
# Conductor / main (CLI)
# -------------------------
def main():
    try:
        topic = input("What topic would you like me to research today? ").strip()
    except KeyboardInterrupt:
        print("\nInterrupted. Exiting.")
        return

    if not topic:
        print("A topic is required. Exiting.")
        return

    questions = planner_agent(topic)
    research_results = []
    for q in questions:
        ans = search_agent(q)
        research_results.append((q, ans))
        # be polite to Wikipedia
        time.sleep(0.3)

    final_report = synthesizer_agent(topic, research_results)

    # Print ONLY the final report
    print("\n\n--- FINAL RESEARCH REPORT ---\n")
    print(final_report)
    print("\n--- END OF REPORT ---\n")
if __name__ == "__main__":
    main()



--- FINAL RESEARCH REPORT ---

Introduction:
This report summarizes information about physics using only Wikipedia content.

Findings:
- Who is physics?: (Wikipedia) Physics is the scientific study of matter, its fundamental constituents, its motion and behavior through space and time, and the related entities of energy and force. It is one of the most fundamental scientific disciplines. A scientist who specializes in the field of physics is called a physicist.

- What are the most important facts about physics?: (Wikipedia) Physics is the scientific study of matter, its fundamental constituents, its motion and behavior through space and time, and the related entities of energy and force. It is one of the most fundamental scientific disciplines. A scientist who specializes in the field of physics is called a physicist.

- What are recent developments related to physics?: (Wikipedia) The Solvay Conferences have been devoted to preeminent unsolved problems in both physics and chemistry