## Coding Agents - Experimental

- Note : we are pinning the inspect_ai due to a recent breaking change
- Coding agents generate code in the sandbox in /my-repo
- This is mapped to the local repo my-repo so you can see what's going on
- This mapping happens in `compose.yaml` file
- The coding agents are installed in the sandbox container ahead of time.
- See `container/Dockerfile.mcp`

## Installation

In [None]:
%pip install -q openai anthropic ipywidgets colorama
import os
os.environ['XDG_RUNTIME_DIR']="/tmp"
os.environ['INSPECT_EVAL_MODEL'] = "openai/gpt-4o-mini"

from helpers.reporter.pretty import pretty_results

## Run a coding agent

We add all the models we want to test across

In [None]:
from inspect_ai import Task, task, eval
from inspect_ai.dataset import Sample
from inspect_ai.solver import system_message, generate
from inspect_ai.scorer import includes, model_graded_fact
from textwrap import dedent

from helpers.solver.aider_coder import aider_coder
from helpers.solver.codex_coder import codex_coder
from helpers.solver.claude_coder import claude_coder

from helpers.scorer.shell import command_results
from helpers.solver.git_repo import repo_clone
from helpers.solver.shell import script_exec

@task
def coding_agent_run() -> Task:

    dataset=[
        Sample(
            input=dedent("""Generate a javascript file name hello-world.js that prints out hello. 
            Now also add tests to the code , so npm run test work. 
            So if you need to install any packages, do that too."""),
            target="The generated code should have the filename hello-world.js",
        )
    ]
    
    repo_dir = "/my-repo/workshop"
    repo_url = "https://github.com/jedi4ever/from-prompt-to-mcp.git"

    return Task(
        dataset=dataset,
        solver=[
            script_exec(command=f"rm -rf {repo_dir}"), # remove the directory if it exists
            script_exec(command=f"mkdir -p {repo_dir}"), # create the directory if it doesn't exist
            # repo_clone(repo_url=repo_url, repo_dir=repo_dir),
            # aider_coder(repo_dir=repo_dir),
            codex_coder(repo_dir=repo_dir), # have open ai code code the code
            #claude_coder(repo_dir=repo_dir)
        ],
        scorer=[
            # model_graded_fact(), 
            command_results(cwd=repo_dir, cmd="node", cmd_args=["hello-world.js"], cmd_output="hello"),
            command_results(cwd=repo_dir, cmd="npm", cmd_args=["run","test"]),
        ],
        sandbox="docker" #runs n a container
    )

results = eval(coding_agent_run, log_level="info",display="conversation")
print(pretty_results(results))