In [57]:
import os 
import re
import json
from groq import Groq
import datetime

In [58]:
# os.chdir("..")

In [59]:
base_data_path = r"C:\Users\Wayne\Documents\GitHub\MultiAgentResearcher\model_output_data"


if os.path.exists(base_data_path):
    print("It exists")
else:
    print("Does not exist")

It exists


In [60]:
folders = [
    os.path.join(base_data_path, f) # Gives me the path so I can run datetime on it and get the most recent folder 
    for f in os.listdir(base_data_path) # Grabs all the folders in it
    if os.path.isdir(os.path.join(base_data_path, f)) # Validates that the path actually exists
]


In [61]:
latest_folder = max(folders, key=os.path.getctime)
print("Latest folder:", latest_folder)


Latest folder: C:\Users\Wayne\Documents\GitHub\MultiAgentResearcher\model_output_data\research_on_java


## Temporary List to store data 

In [62]:
messages = []

In [63]:
tasks_path = os.path.join(latest_folder, "tasks.json")

with open(tasks_path, "r") as f:
    tasks = json.load(f)

In [64]:
retriever_agent = Groq()

In [65]:
search_results = [
    {
        "title": "Java Programming Language Overview",
        "url": "https://example.com/java-overview",
        "snippet": "Java is a high level object oriented programming language developed by Sun Microsystems."
    },
    {
        "title": "Key Features of Java",
        "url": "https://example.com/java-features",
        "snippet": "Java provides platform independence, automatic memory management, and strong security."
    }
]


In [66]:
retriever_system_prompt = """
You are a research retrieval agent.
Your job: Extract accurate facts from the provided search results for the following task:
Task: {task_description}

Rules:
- Use ONLY the given search results
- Do NOT invent information
- Keep summaries concise and factual
- Include the source URL for every item
- If results are insufficient, return an empty list

IMPORTANT: Your response must contain ONLY the <answer> block below. No extra text, no explanation, no preamble.

<answer>
[
  {{
    "source": "...",
    "title": "...",
    "summary": "...",
    "key_points": ["...", "..."]
  }}
]
</answer>

Do not write anything before or after the <answer> tags.
"""

messages.append({"role": "system", "content": retriever_system_prompt})

In [67]:
retriever_agent = Groq()

for task in tasks:

    task_description = task["description"]

    formatted_prompt = retriever_system_prompt.format(
        task_description=task_description
    )

    search_results_str = json.dumps(search_results, indent=2)

    # Fresh message state per task
    messages = [
        {"role": "system", "content": formatted_prompt},
        {
            "role": "user",
            "content": f"Here are the search results:\n\n{search_results_str}"
        }
    ]

    completion = retriever_agent.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=messages,
        stream=False
    )

    reply = completion.choices[0].message.content

    print("Task:", task_description)
    print(reply)
    print("=" * 50)

Task: Investigate the history and evolution of the Java programming language
<answer>
[
  {
    "source": "https://example.com/java-overview",
    "title": "Java Programming Language Overview",
    "summary": "Java is a high-level object-oriented programming language developed by Sun Microsystems.",
    "key_points": ["Platform independence", "Automatic memory management", "Strong security"]
  },
  {
    "source": "https://example.com/java-features",
    "title": "Key Features of Java",
    "summary": "Java provides platform independence, automatic memory management, and strong security.",
    "key_points": []
  }
]
</answer>
Task: Gather information on the design goals, features, and philosophy of Java
<answer>
[
  {
    "source": "https://example.com/java-overview",
    "title": "Java Programming Language Overview",
    "summary": "Java is a high-level object-oriented programming language developed by Sun Microsystems.",
    "key_points": []
  },
  {
    "source": "https://example.co

In [68]:
from pprint import pprint

In [69]:
pprint(messages)

[{'content': '\n'
             'You are a research retrieval agent.\n'
             'Your job: Extract accurate facts from the provided search '
             'results for the following task:\n'
             'Task: Explore the most popular libraries, frameworks, and tools '
             'used in Java development\n'
             '\n'
             'Rules:\n'
             '- Use ONLY the given search results\n'
             '- Do NOT invent information\n'
             '- Keep summaries concise and factual\n'
             '- Include the source URL for every item\n'
             '- If results are insufficient, return an empty list\n'
             '\n'
             'IMPORTANT: Your response must contain ONLY the <answer> block '
             'below. No extra text, no explanation, no preamble.\n'
             '\n'
             '<answer>\n'
             '[\n'
             '  {\n'
             '    "source": "...",\n'
             '    "title": "...",\n'
             '    "summary": "...",\n'
  

In [70]:
agent_output = reply
agent_output

'<answer>\n[\n  {\n    "source": "https://example.com/java-overview",\n    "title": "Libraries for Java",\n    "summary": "There is no specific information in the given search results.",\n    "key_points": []\n  },\n  {\n    "source": "https://example.com/java-features",\n    "title": "Popular Java Frameworks",\n    "summary": "There is no specific information in the given search results.",\n    "key_points": []\n  },\n  {\n    "source": "https://example.com/java-tools",\n    "title": "Most popular Java Development Tools",\n    "summary": "Eclipse, NetBeans, and IntelliJ IDEA are popular Integrated Development Environments (IDEs) for Java development. Maven and Gradle are popular build tools. Java Development Kit (JDK) is a collection of tools for developing, testing, and running Java applications. JUnit is a unit testing framework for Java. ",\n    "key_points": [\n      "Eclipse",\n      "NetBeans",\n      "IntelliJ IDEA",\n      "Maven",\n      "Gradle",\n      "Java Development Kit

In [71]:
start = agent_output.find("<answer>") + len("<answer>")
end = agent_output.find("</answer>")

json_text = agent_output[start:end].strip()

search_result = json.loads(json_text)

with open(os.path.join(latest_folder, "retrieval_results.json"), "w") as f:
    json.dump(search_result, f, indent=4)

print(search_result)

[{'source': 'https://example.com/java-overview', 'title': 'Libraries for Java', 'summary': 'There is no specific information in the given search results.', 'key_points': []}, {'source': 'https://example.com/java-features', 'title': 'Popular Java Frameworks', 'summary': 'There is no specific information in the given search results.', 'key_points': []}, {'source': 'https://example.com/java-tools', 'title': 'Most popular Java Development Tools', 'summary': 'Eclipse, NetBeans, and IntelliJ IDEA are popular Integrated Development Environments (IDEs) for Java development. Maven and Gradle are popular build tools. Java Development Kit (JDK) is a collection of tools for developing, testing, and running Java applications. JUnit is a unit testing framework for Java. ', 'key_points': ['Eclipse', 'NetBeans', 'IntelliJ IDEA', 'Maven', 'Gradle', 'Java Development Kit (JDK)', 'JUnit']}]
