In [37]:
from groq import Groq
from dotenv import load_dotenv
import os
import pprint
import json
import re

load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")

In [38]:
base_data_path = r"C:\Users\Wayne\Documents\GitHub\MultiAgentResearcher\model_output_data"


if os.path.exists(base_data_path):
    print("It exists")
else:
    print("Does not exist")

It exists


In [39]:
folders = [
    os.path.join(base_data_path, f) # Gives me the path so I can run datetime on it and get the most recent folder 
    for f in os.listdir(base_data_path) # Grabs all the folders in it
    if os.path.isdir(os.path.join(base_data_path, f)) # Validates that the path actually exists
]


In [40]:
latest_folder = max(folders, key=os.path.getctime)
print("Latest folder:", latest_folder)


Latest folder: C:\Users\Wayne\Documents\GitHub\MultiAgentResearcher\model_output_data\research_on_python


In [41]:
synthesis_system_prompt = """ 
You are a research synthesis agent.

You are given:
1. A research task description.
2. Structured retrieval results in JSON format.

Your job is to synthesize the retrieved evidence.

Strict requirements:

- Combine overlapping ideas across sources.
- Identify recurring themes.
- Do NOT summarize each source individually.
- Do NOT invent new information.
- Only use facts present in the retrieval data.
- If evidence is weak or repetitive, state that clearly.
- If retrieval results are empty, mark insufficient evidence.

Return ONLY structured JSON inside <answer> tags.

Output format:

<answer>
{
  "task": "{task_description}",
  "synthesized_summary": "...",
  "core_concepts": ["...", "..."],
  "strongly_supported_points": ["...", "..."],
  "weak_or_missing_areas": ["..."]
}
</answer>

No explanation outside the <answer> block.
Be analytical, concise, and structured.
"""

In [42]:
sources_path = os.path.join(latest_folder, "retrieval_results.json")

with open(sources_path, "r") as f:
    retrieval_data = json.load(f)

In [43]:
synthesized_results = {}

In [44]:
synthesis_agent = Groq()

for task_description, task_sources in retrieval_data.items():
    formatted_prompt = synthesis_system_prompt.replace("{task_description}", task_description)
    
    messages = [
        {"role": "system", "content": formatted_prompt},
        {"role": "user", "content": json.dumps(task_sources, indent=2)}
    ]
    
    completion = synthesis_agent.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=messages,
        stream=False
    )
    
    reply = completion.choices[0].message.content

In [45]:
start = reply.find("<answer>") + len("<answer>")
end = reply.find("</answer>")
json_text = reply[start:end].strip()

try:
    search_result = json.loads(json_text)
except json.JSONDecodeError as e:
    print(f"Parse failed for '{task_description}': {e}")
    search_result = {"error": "parse_failed", "raw": json_text}

synthesized_results[task_description] = search_result  # store each task result

with open(os.path.join(latest_folder, "synthesis_results.json"), "w") as f:
    json.dump(synthesized_results, f, indent=4)

print(synthesized_results)

{'Explore use cases and applications of Python in various industries (e.g., data science, web development, automation)': {'task': 'Explore use cases and applications of Python in various industries (e.g., data science, web development, automation)', 'synthesized_summary': 'Python has a wide range of applications across various industries, including AI and machine learning solutions, business process automation, data engineering and analytics, traditional programming tasks, data analysis, AI development, data-driven decision-making, system automation, web scraping and data extraction, game development, scientific computing, and data visualization. It is used in web development, data science, and Artificial Intelligence.', 'core_concepts': ['Python', 'AI and machine learning solutions', 'Business process automation', 'Data engineering and analytics', 'Traditional programming tasks', 'Data analysis', 'AI development', 'Web development', 'Data science', 'Artificial Intelligence'], 'strongl