# Developing LLM agents

In [None]:
%%capture
!pip install -q condacolab
import condacolab
condacolab.install() #get conda for colab

!git clone https://github.com/andrea-gariboldi/agents_seminar #clone seminar repository

import sys
!{sys.executable} -m pip install -q pydantic-ai==1.7.0 pandas==2.3.1 scikit-learn==1.7.1 #install necessary packages

sys.path.append('/content/agents_seminar') #add repo to path for main script

!conda env create -f /content/agents_seminar/env.yaml #create conda env for the agent

!curl -fsSL https://ollama.com/install.sh | sh #install Ollama

!nohup ollama serve > ollama.log 2>&1 &
!sleep 5 #start Ollama and make it listen

!!ollama pull gpt-oss:20b #pull llm model

os.chdir('/content/agents_seminar') #set current working dir inside the repository

In [4]:
from pydantic_ai import Agent, ModelRetry
import os

from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.ollama import OllamaProvider

from utils.agent_utils import run_agent
from utils.workspace_utils import cleanup_workspace
from eval.evaluate_result import evaluate_clustering
from utils.agent_output import AgentOutput

workspace_dir = f"{os.getcwd()}/agents_workspace/"

cleanup_workspace(workspace_dir=workspace_dir) # delete files produced by previous runs

ollama_model = OpenAIChatModel(
    model_name='gpt-oss:20b',
    provider=OllamaProvider(base_url='http://localhost:11434/v1'),
)

agent = Agent(
    model=ollama_model,
    system_prompt="You are an expert agent with scripting skills. Use the tools available to answer the user's requests.",
    output_type=AgentOutput,
    retries=5
)

@agent.tool_plain
def list_files_in_directory(directory_path: str) -> str:
    "List all the files in a given directory."
    files = os.listdir(directory_path)
    return "\n".join(files)

@agent.tool_plain
def read_dataset(dataset_path: str) -> str:
    "Read the first 10 rows of a CSV dataset and return them as a string."
    import pandas as pd
    df = pd.read_csv(dataset_path, nrows=10)
    return df.to_string()

@agent.tool_plain
def write_python_file(code: str, file_path: str) -> str:
    "Write content to a Python file at the specified (full) path."
    with open(file_path, 'w') as f:
        f.write(code)
    return f"File written to {file_path}"

@agent.output_validator # before agent submission, this function is being called to validate the agent outputs
async def validate_output(output: AgentOutput) -> AgentOutput:
    if not os.path.isfile(output.script_path):
        raise ModelRetry(f"The script path {output.script_path} does not exist.")
    return output

user_prompt = f"""Your final deliverable is to cluster a dataset. The original dataset is in the directory: {os.getcwd()}/agents_workspace/data/. Take decisions on how to cluster based on the data provided.
            You should provide a python script that takes two commnad line arguments: --input for the input dataset path and --output for the output submission file path.
            The script should read the dataset from the input path, perform clustering, and save a submission file that contains the original columns,
            plus a new column 'cluster_id' indicating the cluster assignment for each row.
            Save the script as cluster.py in the current working directory ({workspace_dir}).
            """

async def main():
    agent_output = await run_agent( #this runs the agent loop
        agent = agent,
        user_prompt=user_prompt,
        max_steps=10
    )

    evaluate_clustering(script_path=agent_output.script_path) #fixed programmatic evaluation after agent run

await main()

[95m[SYSTEM PROMPT] You are an expert agent with scripting skills. Use the tools available to answer the user's
requests.[0m
[95m[USER PROMPT] Your final deliverable is to cluster a dataset. The original dataset is in the directory:
/home/agari01/agents_seminar/agents_workspace/data/. Take decisions on how to cluster based on the data provided.
You should provide a python script that takes two commnad line arguments: --input for the input dataset path and
--output for the output submission file path.             The script should read the dataset from the input path,
perform clustering, and save a submission file that contains the original columns,             plus a new column
'cluster_id' indicating the cluster assignment for each row.             Save the script as cluster.py in the current
working directory (/home/agari01/agents_seminar/agents_workspace/).             [0m
[92mðŸ’­ [Thinking][0m
[92mWe need to inspect dataset. Let's list files.[0m
[38;5;208mðŸ”§ [Calling: l