## Imports

In [1]:
import os

from pydantic import BaseModel, Field

from multi_agent_llm import AIOT, OpenAILLM

# Tool Runner

In [2]:
import subprocess
import sys
import tempfile
import shutil


def tool_runner(code: str, pip_dependencies: list) -> str:

    stdout = ""
    stderr = ""
    error = ""

    # Create a temporary directory for the virtual environment
    temp_dir = tempfile.mkdtemp()
    venv_dir = os.path.join(temp_dir, "venv")

    try:
        # Create the virtual environment
        subprocess.run([sys.executable, "-m", "venv", venv_dir], check=True)

        # Activate the virtual environment
        venv_python = os.path.join(venv_dir, "bin", "python")

        # Install dependencies
        if pip_dependencies:
            subprocess.run(
                [venv_python, "-m", "pip", "install", "--upgrade", "--quiet", "pip"],
            )
            subprocess.run(
                [venv_python, "-m", "pip", "install", "--quiet"] + pip_dependencies,
                check=True
            )

        # Run the script
        process = subprocess.run(
            [venv_python, "-c", code],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        stdout = process.stdout
        stderr = process.stderr

    except subprocess.CalledProcessError as e:
        error = f"Subprocess error: {str(e)}"
    except Exception as e:
        error = f"General error: {str(e)}"
    finally:
        # Clean up the virtual environment
        shutil.rmtree(temp_dir, ignore_errors=True)

    if error:
        return "ERROR: " + error

    return "\n\n".join([stdout, stderr]).strip()

## Set OpenAI API Key

In [3]:
os.environ['OPENAI_API_KEY'] = open('../openai_token.key').read().strip()

## Initialize an LLM client

In [4]:
llm = OpenAILLM(model_name="gpt-4o")
# llm = OpenAILLM(model_name="gpt-4o-mini")

## Define an output schema

In [5]:
class QueryAnswer(BaseModel):
    explanation: str = Field(description="Explanation of the answer")
    answer: str = Field(description="Final Answer")

# ATTEMPT #1 -- tool to get weather data

**current outcome**: IDA tries over and over to correctly generate a working tool but requests to external APIs keep failing, resulting in a loop that is eventually terminated manually (no hard exit is implemented yet).

**outcome with 4o**: Similar result, eventually creates mock or fake data to avoid errors.

NOTE: bulb (💡) printouts are temporary for debugging

In [6]:
aiot = AIOT(
    llm=llm,
    iterations=5,
    answer_schema=QueryAnswer,
    tool_runner=tool_runner,
)

PROMPT = "Which city in California has had the most rainfall this year? (2024)"

result = aiot.run(PROMPT)

result

💡 Received tool request from the brain. {'name': 'RainfallDataFetcher', 'description': 'A tool that fetches the latest rainfall statistics for all cities in California for the year 2024. This tool should access recent data sources such as weather APIs or databases that maintain the latest weather records, including cumulative rainfall amounts for each city.', 'input': None}
💡💡💡 Current tool context: {}
💡 Tool Generated:
 import requests
import json

# Specify the endpoint and the parameters for the API request
API_ENDPOINT = "https://example-weather-api.com/rainfall"
PARAMS = {
    'region': 'California',
    'year': '2024'
}

# Function to fetch the latest rainfall data

def fetch_rainfall_data():
    try:
        # Making a GET request to the API
        response = requests.get(API_ENDPOINT, params=PARAMS)
        
        # Raise an exception if the request was unsuccessful
        response.raise_for_status()
        
        # Parse the JSON response from the API
        data = res

# Attempt #2 -- tool that does calculations

**current outcome**: IDA prefers to solve and check solutions without initiating tool generation, despite rather explicit instruction to do so in prompt below and in system prompt as written in `iteration_of_thought.py`.

**outcome with 4o**: Similar result, IDA prefers to solve and check solutions without initiating tool generation. Multiple attempts do eventually show tool use, however occasionally AIOT is seen to stall here. Tool correctly identifies roots but LLMA does not clearly state them in the output.

In [8]:
aiot = AIOT(
    llm=llm,
    iterations=5,
    answer_schema=QueryAnswer,
    tool_runner=tool_runner,
)

PROMPT = "Find and verify solutions for the roots of the function \
f(x) = 3x^5 - 7x^4 + 2x^3 - 5x^2 + 11sqrt(x) - 6/x + 2"

result = aiot.run(PROMPT)

result

💡 Received tool request from the brain. {'name': 'plot_function', 'description': 'A tool to plot the given function f(x) = 3*x**5 - 7*x**4 + 2*x**3 - 5*x**2 + 11*math.sqrt(x) - 6/x + 2 for x > 0, to visually analyze potential roots and get an understanding of its behavior over a domain with multiple points so that possible zero crossings can be identified.', 'input': {'function': '3*x**5 - 7*x**4 + 2*x**3 - 5*x**2 + 11*math.sqrt(x) - 6/x + 2', 'domain': {'start': 0.01, 'end': 10, 'num_points': 1000}}}
💡💡💡 Current tool context: {}
💡 Tool Generated:
 import numpy as np
import matplotlib.pyplot as plt
import math

# Define the function based on the input expression
function_expression = '3*x**5 - 7*x**4 + 2*x**3 - 5*x**2 + 11*math.sqrt(x) - 6/x + 2'

def eval_function(x):
    return 3*x**5 - 7*x**4 + 2*x**3 - 5*x**2 + 11*math.sqrt(x) - 6/x + 2

# Define the domain
start = 0.01
end = 10
num_points = 1000

# Generate the x values
x_values = np.linspace(start, end, num_points)

# Evaluate th

# Attempt #3 -- tool performs google searches

**current outcome**: kinda paradoxically, the IDA actually tells the LLMA that direct access to google search is not possible in iteration 2 -- it then proceeds to answer the question without this, never requesting any tools.

**outcome with 4o**: Tool generator opts to generate mock data instead of a working tool, similar to outcomes seen in attempt #1 with 4o.

In [9]:
aiot = AIOT(
    llm=llm,
    iterations=5,
    answer_schema=QueryAnswer,
    tool_runner=tool_runner,
)

PROMPT = "How many of the top 5 google search results for 'things to do in mexico' \
are related to food?"

result = aiot.run(PROMPT)

result

💡 Received tool request from the brain. {'name': 'GoogleSearchSimulator', 'description': "Simulates a Google search for the term 'things to do in Mexico' and returns the titles and summaries of the top 5 search results. This can help determine how many results are related to food.", 'input': {'query': 'things to do in mexico', 'top_n': 5}}
💡💡💡 Current tool context: {}
💡 Tool Generated:
 from bs4 import BeautifulSoup
import requests


def google_search_simulator(query: str, top_n: int):
    search_url = f"https://www.google.com/search?q={query}&num={top_n}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(search_url, headers=headers)
    response.raise_for_status()  # Raise an error for bad responses

    soup = BeautifulSoup(response.text, "html.parser")
    results = []

    for g in soup.find_all('div', class_='tF2Cxc')[:top_n]:
        title = g