In [6]:
from agents.initiator import Initiator
from agents.planner import Planner
from agents.actor import Actor
from agents.critic import Critic
from toolbox.toolbox import ToolManager
import json
model = 'qwen2.5-coder'
tool_manager = ToolManager()
initiator = Initiator(tool_manager, model=model)
planner = Planner(tool_manager, model=model)
actor = Actor(tool_manager, model=model)
critic = Critic(tool_manager, model=model)

while True:
    task_info = initiator.generate_task()
    plan = planner.create_plan(task_info)

    clean_artifacts = {}
    full_artifacts = {}
    is_finished = False

    max_iterations = 3      # How many times to attempt the entire plan
    max_attempts = 3        # How many times to attempt each subtask

    for iteration in range(max_iterations):
        completed_all_subtasks = True  # Assume we’ll complete them until proven otherwise

        for subtask in plan:
            subtask_key = subtask['subtask']
            # Initialize artifacts for this subtask on each new attempt
            clean_artifacts[subtask_key] = {}
            full_artifacts[subtask_key] = []

            attempts = 0
            critic_comment = None

            while attempts < max_attempts:
                actor_output = actor.perform_subtask(subtask, clean_artifacts, critic_comment)
                critic_output = critic.evaluate(subtask, actor_output)

                # Keep a record of every attempt in full_artifacts
                full_artifacts[subtask_key].append({
                    'completed': critic_output.get("is_correct", False),
                    'output': actor_output['output'],
                    'critic_report': critic_output['report']
                })
                full_artifacts[subtask_key][-1]['chosen_tool'] = actor_output['chosen_tool']
                full_artifacts[subtask_key][-1]['created_tool'] = actor_output['created_tool']
                    

                if critic_output.get("is_correct", False):
                    print(f"Task {subtask_key} completed. Critic Report: {critic_output['report']}")
                    # Store a “clean” artifact for this subtask
                    clean_artifacts[subtask_key] = {
                        'output': actor_output['output'],
                        'critic_report': critic_output['report']
                    }
                    break  # Subtask is done, move on to the next subtask
                else:
                    attempts += 1
                    print(f"Task {subtask_key} failed. Critic Report: {critic_output['report']}")
                    critic_comment = critic_output.get("report", None)

            # Check if the subtask was actually completed
            if not clean_artifacts[subtask_key]:  # Means we never set it (no successful attempt)
                print(f"Task {subtask_key} is not finished after {max_attempts} attempts.")
                plan = planner.create_plan(task_info, artifacts=full_artifacts, previous_plan=plan)
                print(f'New plan:\n{plan}')
                completed_all_subtasks = False
                break  # Break out of the subtask loop, retry the updated plan in the next iteration

        if completed_all_subtasks:
            # If we didn’t break from the for-loop, it means all subtasks were completed
            is_finished = True
            break

    # After trying up to max_iterations times:
    if is_finished:
        print("All subtasks completed successfully!")
    else:
        print(f"Could not complete plan after {max_iterations} iterations.")


    initiator.conclude(succeeded=is_finished, task_info=task_info, plan=plan, artifacts=full_artifacts)


Tool 'open_email_url' has been created at ./generated_tools/open_email_url.py.
open_email_url deleted
Task Identify the email URL failed. Critic Report: The chosen tool and approach do not correctly solve the subtask. The output is 'mailto:', which is incomplete and lacks an email address, making it unusable for accessing an actual email. The tool's purpose is to generate a mailto URL based on an email address, but the implementation fails to incorporate the required email address argument.
Tool 'generate_mailto_url' has been created at ./generated_tools/generate_mailto_url.py.
generate_mailto_url deleted
Task Identify the email URL failed. Critic Report: The actor used the correct tool to generate a mailto URL for accessing an email. The tool is well-defined with clear documentation of its purpose and parameters. However, the success criteria requires identifying a URL to open in the default web browser, not just generating a URL that could be clicked manually. The output 'mailto:exam