In [1]:
import os 
import re
import json
from groq import Groq
import datetime
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
search_api_key = os.getenv("TAVILY_SEARCH_API")

In [4]:
# os.chdir("..")

In [5]:
base_data_path = r"C:\Users\Wayne\Documents\GitHub\MultiAgentResearcher\model_output_data"


if os.path.exists(base_data_path):
    print("It exists")
else:
    print("Does not exist")

It exists


In [6]:
folders = [
    os.path.join(base_data_path, f) # Gives me the path so I can run datetime on it and get the most recent folder 
    for f in os.listdir(base_data_path) # Grabs all the folders in it
    if os.path.isdir(os.path.join(base_data_path, f)) # Validates that the path actually exists
]


In [7]:
latest_folder = max(folders, key=os.path.getctime)
print("Latest folder:", latest_folder)


Latest folder: C:\Users\Wayne\Documents\GitHub\MultiAgentResearcher\model_output_data\research_on_java


## Temporary List to store data 

In [8]:
messages = []

In [9]:
tasks_path = os.path.join(latest_folder, "tasks.json")

with open(tasks_path, "r") as f:
    tasks = json.load(f)

In [10]:
retriever_agent = Groq()

In [11]:
from tavily import TavilyClient

tavily = TavilyClient(api_key=search_api_key)

def fetch_search_results(query):

    response = tavily.search(
        query=query,
        search_depth="basic",
        max_results=5
    )

    structured_results = []

    for result in response["results"]:
        structured_results.append({
            "title": result["title"],
            "url": result["url"],
            "snippet": result["content"][:500]  # limit tokens
        })

    return structured_results

In [12]:
retriever_system_prompt = """
You are a research retrieval agent.
Your job: Extract accurate facts from the provided search results for the following task:
Task: {task_description}

Rules:
- Use ONLY the given search results
- Do NOT invent information
- Keep summaries concise and factual
- Include the source URL for every item
- If results are insufficient, return an empty list

IMPORTANT: Your response must contain ONLY the <answer> block below. No extra text, no explanation, no preamble.

<answer>
[
  {{
    "source": "...",
    "title": "...",
    "summary": "...",
    "key_points": ["...", "..."]
  }}
]
</answer>

Do not write anything before or after the <answer> tags.
"""

messages.append({"role": "system", "content": retriever_system_prompt})

In [13]:
retriever_agent = Groq()

for task in tasks:

    task_description = task["description"]

    formatted_prompt = retriever_system_prompt.format(
        task_description=task_description
    )

    search_results = fetch_search_results(task_description)

    # Fresh message state per task
    messages = [
        {"role": "system", "content": formatted_prompt},
        {
            "role": "user",
            "content": f"Here are the search results:\n\n{search_results}"
        }
    ]

    completion = retriever_agent.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=messages,
        stream=False
    )

    reply = completion.choices[0].message.content

    print("Task:", task_description)
    print(reply)
    print("=" * 50)

Task: Investigate the history and evolution of the Java programming language
<answer>
[
  {
    "source": "https://www.theknowledgeacademy.com/blog/history-and-evolution-of-java/",
    "title": "History and Evolution of Java Programming Language",
    "summary": "A comprehensive guide on the history and evolution of Java.",
    "key_points": []
  },
  {
    "source": "https://www.freejavaguide.com/history.html",
    "title": "History of Java programming language | www.freejavaguide.com",
    "summary": "Java is an object-oriented programming language developed by James Gosling and colleagues at Sun Microsystems in the early 1990s.",
    "key_points": [
      "Java is intended to be compiled to a bytecode,",
      "Java is run by a Java Virtual Machine",
      "Java has networking support and remote code execution"
    ]
  },
  {
    "source": "https://pvs-studio.com/en/blog/posts/java/1256/",
    "title": "History of Java: evolution, legal battles with Microsoft, Mars ...",
    "summar

In [14]:
from pprint import pprint

In [15]:
pprint(messages)

[{'content': '\n'
             'You are a research retrieval agent.\n'
             'Your job: Extract accurate facts from the provided search '
             'results for the following task:\n'
             'Task: Explore the most popular libraries, frameworks, and tools '
             'used in Java development\n'
             '\n'
             'Rules:\n'
             '- Use ONLY the given search results\n'
             '- Do NOT invent information\n'
             '- Keep summaries concise and factual\n'
             '- Include the source URL for every item\n'
             '- If results are insufficient, return an empty list\n'
             '\n'
             'IMPORTANT: Your response must contain ONLY the <answer> block '
             'below. No extra text, no explanation, no preamble.\n'
             '\n'
             '<answer>\n'
             '[\n'
             '  {\n'
             '    "source": "...",\n'
             '    "title": "...",\n'
             '    "summary": "...",\n'
  

In [16]:
agent_output = reply
agent_output

'<answer>\n[\n  {\n    "source": "https://medium.com/thefreshwrites/exploring-the-most-common-java-libraries-frameworks-6c7ae547d23c",\n    "title": "The Spring Framework",\n    "summary": "The most popular Java framework for building enterprise-level applications.",\n    "key_points": ["maps Java objects to database tables", "builds enterprise-level applications"]\n  },\n  {\n    "source": "https://www.diffblue.com/resources/most-popular-libraries-used-by-java-developers/",\n    "title": "Popular libraries used by Java developers",\n    "summary": "Essential libraries in the Java ecosystem.",\n    "key_points": ["enhance productivity", "simplify complex tasks"]\n  },\n  {\n    "source": "https://rollbar.com/blog/most-popular-java-web-frameworks/",\n    "title": "Most Popular Java Web Frameworks",\n    "summary": "Complete programming model built on and with Java.",\n    "key_points": ["Spring Boot", "modern Java features", "excellent developer experience"]\n  },\n  {\n    "source": "h

In [17]:
start = agent_output.find("<answer>") + len("<answer>")
end = agent_output.find("</answer>")

json_text = agent_output[start:end].strip()

search_result = json.loads(json_text)

with open(os.path.join(latest_folder, "retrieval_results.json"), "w") as f:
    json.dump(search_result, f, indent=4)

print(search_result)

[{'source': 'https://medium.com/thefreshwrites/exploring-the-most-common-java-libraries-frameworks-6c7ae547d23c', 'title': 'The Spring Framework', 'summary': 'The most popular Java framework for building enterprise-level applications.', 'key_points': ['maps Java objects to database tables', 'builds enterprise-level applications']}, {'source': 'https://www.diffblue.com/resources/most-popular-libraries-used-by-java-developers/', 'title': 'Popular libraries used by Java developers', 'summary': 'Essential libraries in the Java ecosystem.', 'key_points': ['enhance productivity', 'simplify complex tasks']}, {'source': 'https://rollbar.com/blog/most-popular-java-web-frameworks/', 'title': 'Most Popular Java Web Frameworks', 'summary': 'Complete programming model built on and with Java.', 'key_points': ['Spring Boot', 'modern Java features', 'excellent developer experience']}, {'source': 'https://finitestate.io/blog/top-10-java-libraries', 'title': 'Top 10 Java Libraries Every Developer Should