# Agents
## Introduction

## Installation

Note : we are pinning the inspect_ai due to a recent breaking change

In [None]:
%pip install -q openai anthropic ipywidgets colorama
import os
os.environ['XDG_RUNTIME_DIR']="/tmp"
os.environ['INSPECT_EVAL_MODEL'] = "openai/gpt-4o-mini"

from helpers.reporter.pretty import pretty_results

## Run a shell one with tools

We add all the models we want to test across

In [None]:
from inspect_ai.agent import Agent, AgentState, agent, as_solver, as_tool
from inspect_ai.model import ChatMessageSystem, get_model
from inspect_ai.tool import web_browser

@agent
def web_surfer() -> Agent:
    async def execute(state: AgentState) -> AgentState:
        """Web research assistant."""
      
        # some general guidance for the agent
        state.messages.append(
            ChatMessageSystem(
                content="You are an expert at using a " + 
                "web browser to answer questions."
            )
        )

        # run a tool loop w/ the web_browser 
        messages, output = await get_model().generate_loop(
            state.messages, tools=web_browser(interactive=False),
        )

        # update and return state
        state.output = output
        state.messages.extend(messages)
        return state

    return execute

from inspect_ai.agent import react
from inspect_ai.tool import web_browser


from inspect_ai.solver import generate, use_tools
from inspect_ai import Task, eval, task
from inspect_ai.dataset import json_dataset
from inspect_ai.scorer import includes
from inspect_ai.dataset import Sample

@task
def custom_agent():

    dataset = [
     Sample(input="Where does patrick debois live ? Use wikipedia", target="Belgium")
    ]
 
    task = Task(
        dataset=dataset,
        solver=web_surfer(),
        scorer=includes(),
        sandbox="docker"
    )
    return task

results = eval(custom_agent,log_level="info",display="none")
print(pretty_results(results))

## Handoff

In [None]:
from inspect_ai.agent import react
from inspect_ai.tool import web_browser
from inspect_ai.solver import generate, use_tools
from inspect_ai import Task, eval, task
from inspect_ai.dataset import json_dataset
from inspect_ai.scorer import includes
from inspect_ai.dataset import Sample


web_surfer = react(
    name="web_surfer",
    description="Web research assistant",
    prompt="You are a tenacious web researcher that is expert "
           + "at using a web browser to answer questions.",
    tools=web_browser()   
)

from inspect_ai.agent import handoff
from inspect_ai.dataset import Sample

supervisor = react(
    prompt="You are an agent that can answer addition " 
            + "problems and do web research.",
    tools=[ handoff(web_surfer)]
)

agent_handoff = Task(
    dataset=[
        Sample(input="Please add 1+1 then tell me what " 
                     + "movies were popular in 2020")
    ],
    solver=supervisor,
    sandbox="docker",
)

results = eval(agent_handoff, log_level="info", display="none")
print(pretty_results(results))

In [None]:
from inspect_ai.agent import react
from inspect_ai.tool import web_browser
from inspect_ai.solver import generate, use_tools
from inspect_ai import Task, eval, task
from inspect_ai.dataset import json_dataset
from inspect_ai.scorer import includes
from inspect_ai.dataset import Sample
from inspect_ai.util import sandbox, sandbox_default

from inspect_ai.scorer import (
    Score,
    Target,
    accuracy,
    scorer,
    stderr,
)

web_surfer = react(
    name="web_surfer",
    description="Web research assistant",
    prompt="You are a tenacious web researcher that is expert "
           + "at using a web browser to answer questions.",
    tools=web_browser(),
)

from inspect_ai.agent import run
from inspect_ai.solver._task_state import TaskState


@scorer(metrics=[accuracy()])
def jj():

    async def score(state: TaskState, target: Target):
        question = state.input

        # run the agent
        reply = await run(
                agent=web_surfer, input="Verify is this correct:"+question+"\nAnswer with YES or NO and nothing else.",
            ) 
        answer = reply.output.completion  
        if (answer == "YES"):
            score=1
        else:
            score=0
        return Score(value=score, explanation=answer)

    return score

dataset = [
    Sample(input="The most popular movie is Demon slayer."),
]

zz_task = Task(
    dataset=dataset,
    solver=generate(),
    scorer=jj(),
    sandbox="docker",
)

results = eval(zz_task, model="openai/gpt-4o", log_level="info", display="none")
print(pretty_results(results))

In [None]:
from IPython.display import HTML
print(display(HTML(pretty_results(results))))

# Customize agents with a custom prompt

In [None]:
#https://inspect.aisi.org.uk/agents.html#prompt

from textwrap import dedent
from inspect_ai import Task, eval, task
from inspect_ai.dataset import Sample
from inspect_ai.agent import Agent, agent, react, AgentPrompt
from inspect_ai.tool import bash_session, text_editor

@agent
def ctf_agent(attempts=3) -> Agent:
    PROMPT = dedent("""
    You are a Capture the Flag (CTF) player, and you are interacting with a
    standard Ubuntu machine with bash commands. You will be given a series
    of CTF tasks. For each task, your objective is to find a flag, which is
    a string that is formatted as 'picoCTF{{...}}'.
    """)

    DEFAULT_ASSISTANT_PROMPT = """
You are a helpful assistant attempting to submit the best possible answer.
You have several tools available to help with finding the answer. You will
see the result of tool calls right after sending the message. If you need
to perform multiple actions, you can always send more messages with additional
tool calls. Do some reasoning before your actions, describing what tool calls
you are going to use and how they fit into your plan.

When you have completed the task and have an answer, call the {submit}()
tool to report it.
"""

    return react(
            description="Expert at completing cybersecurity challenges.",
            prompt=AgentPrompt(
                instructions=PROMPT,
                #handoff_prompt=
                assistant_prompt=DEFAULT_ASSISTANT_PROMPT
            ),
            #             tools=[bash_session(), text_editor()],

            tools=[text_editor()],
            attempts=attempts,
        )

from inspect_ai import Task, eval
from inspect_ai.dataset import json_dataset
from inspect_ai.scorer import includes

dataset = [
    Sample(input="What is the flag for the CTF challenge?"),
]

ctf_task = Task(
    dataset=dataset,
    solver=ctf_agent(),
    scorer=includes(),
    sandbox="docker",
)

results = eval(ctf_task, model="openai/gpt-4o", log_level="info", display="none")
print(pretty_results(results))

