In [16]:
from groq import Groq
from dotenv import load_dotenv
import os
import pprint
import json
import re

load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")

In [17]:
base_data_path = r"C:\Users\Wayne\Documents\GitHub\MultiAgentResearcher\model_output_data"


if os.path.exists(base_data_path):
    print("It exists")
else:
    print("Does not exist")

It exists


In [18]:
folders = [
    os.path.join(base_data_path, f) # Gives me the path so I can run datetime on it and get the most recent folder 
    for f in os.listdir(base_data_path) # Grabs all the folders in it
    if os.path.isdir(os.path.join(base_data_path, f)) # Validates that the path actually exists
]


In [19]:
latest_folder = max(folders, key=os.path.getctime)
print("Latest folder:", latest_folder)


Latest folder: C:\Users\Wayne\Documents\GitHub\MultiAgentResearcher\model_output_data\research_on_iphones


In [20]:
gap_system_prompt = """
You are a research gap detection agent.

You are given synthesized research outputs across multiple tasks in JSON format.

Your job:

- Analyze all task syntheses together.
- Identify recurring weaknesses.
- Detect missing dimensions across the overall research.
- Evaluate coverage breadth and depth.
- Suggest new high-value research tasks.

STRICT RULES:
- Output MUST be valid JSON.
- Use double quotes for all property names.
- No trailing commas.
- No comments.
- No explanation outside the <answer> block.
- If uncertain, return empty arrays instead of guessing.

Return ONLY this structure inside <answer> tags:

<answer>
{
  "global_gaps": [],
  "cross_task_weaknesses": [],
  "coverage_assessment": {
    "breadth": "",
    "depth": "",
    "balance": ""
  },
  "suggested_new_tasks": [
    {
      "description": "",
      "priority": 5,
      "type": "research"
    }
  ]
}
</answer>
"""

In [21]:
sources_path = os.path.join(latest_folder, "synthesis_results.json")

with open(sources_path, "r") as f:
    synthesized_data = json.load(f)

In [22]:
gap_agent = Groq()


messages = [
    {"role": "system", "content": gap_system_prompt},
    {"role": "user", "content": json.dumps(synthesized_data, indent=2)}
]

completion = gap_agent.chat.completions.create(
    model="llama-3.1-8b-instant",
    messages=[
        {"role": "system", "content": gap_system_prompt},
        {"role": "user", "content": json.dumps(synthesized_data, indent=2)}
    ],
    stream=False
)

reply = completion.choices[0].message.content


In [23]:

start = reply.find("<answer>") + len("<answer>")
end = reply.find("</answer>")
json_text = reply[start:end].strip()

gap_results = json.loads(json_text)

with open(os.path.join(latest_folder, "gap_results.json"), "w") as f:
    json.dump(gap_results, f, indent=4)

print(gap_results)

{'global_gaps': [], 'cross_task_weaknesses': ['Labor practices and human rights issues'], 'coverage_assessment': {'breadth': 'Medium to High', 'depth': 'Medium', 'balance': 'Environmental and social implications are covered, but labor and political impacts are underrepresented'}, 'suggested_new_tasks': [{'description': 'Examine the effects of iPhone ownership on individuals and communities in low-income and marginalized ecosystems', 'priority': 4, 'type': 'research'}, {'description': 'Investigate the specific labor practices and human rights issues in iPhone supply chains', 'priority': 5, 'type': 'research'}, {'description': 'Evaluate the political and economic impacts of iPhone production and usage on countries and regions', 'priority': 3, 'type': 'research'}]}
