In [49]:
import os 
import re
import json
from groq import Groq
import datetime
from dotenv import load_dotenv

In [50]:
load_dotenv()

True

In [51]:
search_api_key = os.getenv("TAVILY_SEARCH_API")

In [52]:
# os.chdir("..")

In [53]:
base_data_path = r"C:\Users\Wayne\Documents\GitHub\MultiAgentResearcher\model_output_data"


if os.path.exists(base_data_path):
    print("It exists")
else:
    print("Does not exist")

It exists


In [54]:
folders = [
    os.path.join(base_data_path, f) # Gives me the path so I can run datetime on it and get the most recent folder 
    for f in os.listdir(base_data_path) # Grabs all the folders in it
    if os.path.isdir(os.path.join(base_data_path, f)) # Validates that the path actually exists
]


In [55]:
latest_folder = max(folders, key=os.path.getctime)
print("Latest folder:", latest_folder)


Latest folder: C:\Users\Wayne\Documents\GitHub\MultiAgentResearcher\model_output_data\research_on_python


## Temporary List to store data 

In [56]:
messages = []

In [57]:
tasks_path = os.path.join(latest_folder, "tasks.json")

with open(tasks_path, "r") as f:
    tasks = json.load(f)

In [58]:
from tavily import TavilyClient

tavily = TavilyClient(api_key=search_api_key)

def fetch_search_results(query):

    response = tavily.search(
        query=query,
        search_depth="basic",
        max_results=5
    )

    structured_results = []

    for result in response["results"]:
        structured_results.append({
            "title": result["title"],
            "url": result["url"],
            "snippet": result["content"][:500]  # limit tokens
        })

    return structured_results

In [59]:
retriever_system_prompt = """
You are a research retrieval agent.
Your job: Extract accurate facts from the provided search results for the following task:
Task: {task_description}
Rules:
- Use ONLY the given search results
- Do NOT invent information
- Keep summaries concise and factual
- Include the source URL for every item
- If results are insufficient, return an empty list

IMPORTANT: Your response must contain ONLY the <answer> block below. No extra text, no explanation, no preamble.

<answer>
[
  {{
    "source": "...",
    "title": "...",
    "summary": "...",
    "key_points": ["...", "..."]
  }}
]
</answer>

Do not write anything before or after the <answer> tags.
"""

In [60]:
all_results = {}

In [61]:
retriever_agent = Groq()

for task in tasks:
    task_description = task["description"]
    formatted_prompt = retriever_system_prompt.format(task_description=task_description)
    search_results = fetch_search_results(task_description)

    messages = [
        {"role": "system", "content": formatted_prompt},
        {"role": "user", "content": f"Here are the search results:\n\n{search_results}"}
    ]

    completion = retriever_agent.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=messages,
        stream=False
    )

    reply = completion.choices[0].message.content

    print("Task:", task_description)
    print(reply)
    print("=" * 50)

Task: Identify historical development and milestones of the Python programming language
<answer>
[
  {
    "source": "Facebook",
    "title": "Python programming language history and overview",
    "summary": "Python programming language created by Guido van Rossum, conceived in the late 1980s",
    "key_points": [
      "Conceived in the late 1980s by Guido van Rossum",
      "First full release (1.0) in 1994",
      "Developed as a productivity-boosting alternative to traditional programming languages"
    ]
  },
  {
    "source": "Cornell University's Computing and Visualization Club",
    "title": "Introduction to Python Programming - Overview - History",
    "summary": "Python was initially developed by Guido van Rossum",
    "key_points": [
      "Development began in December 1989",
      "First full release (1.0) in 1994",
      "2.0 release in 2000 and its base for many programs currently available"
    ]
  },
  {
    "source": "Exyte",
    "title": "A brief history of Python 

In [62]:
agent_output = reply
agent_output

'<answer>\n[\n  {\n    "source": "Medium",\n    "title": "Python Development Use Cases Across Industries",\n    "summary": "Across all industries, Python is widely used for AI and machine learning solutions, business process automation, and data engineering and analytics.",\n    "key_points": ["AI and machine learning solutions", "Business process automation", "Data engineering and analytics"]\n  },\n  {\n    "source": "edX",\n    "title": "Python use cases",\n    "summary": "Python is used for traditional programming tasks, data analysis, and even AI development.",\n    "key_points": ["Traditional programming tasks", "Data analysis", "AI development"]\n  },\n  {\n    "source": "3ritechnologies",\n    "title": "10 Companies Using Python in 2026 | Real Use Cases & Career ...",\n    "summary": "Companies using Python in 2026 include data-driven decision-making and system automation.",\n    "key_points": ["Data analysis", "Automation", "Scalable systems"]\n  },\n  {\n    "source": "Mobilu

In [63]:
start = reply.find("<answer>") + len("<answer>")
end = reply.find("</answer>")
json_text = reply[start:end].strip()

try:
    task_results = json.loads(json_text)
except json.JSONDecodeError as e:
    print(f"Parse failed for task '{task_description}': {e}")
    print("Raw output:", json_text)
    task_results = []

all_results[task_description] = task_results  # keyed by task

with open(os.path.join(latest_folder, "retrieval_results.json"), "w") as f:
    json.dump(all_results, f, indent=4)

print(all_results)

{'Explore use cases and applications of Python in various industries (e.g., data science, web development, automation)': [{'source': 'Medium', 'title': 'Python Development Use Cases Across Industries', 'summary': 'Across all industries, Python is widely used for AI and machine learning solutions, business process automation, and data engineering and analytics.', 'key_points': ['AI and machine learning solutions', 'Business process automation', 'Data engineering and analytics']}, {'source': 'edX', 'title': 'Python use cases', 'summary': 'Python is used for traditional programming tasks, data analysis, and even AI development.', 'key_points': ['Traditional programming tasks', 'Data analysis', 'AI development']}, {'source': '3ritechnologies', 'title': '10 Companies Using Python in 2026 | Real Use Cases & Career ...', 'summary': 'Companies using Python in 2026 include data-driven decision-making and system automation.', 'key_points': ['Data analysis', 'Automation', 'Scalable systems']}, {'