## Run just a single tool, not the whole agent

In [None]:
# Run Code Writer & Executer
from generalist.tools.summarisers import construct_short_answer
from generalist.tools.code import write_python_eda, run_code, write_python_task

# Usually would be a specific class, but either way ends up being a string
activity = "Calculate the total time in hours it would take Eliud Kipchoge to run a distance of approximately 356,870 km using his marathon pace of roughly 2.0167 hours per 42.2 kilometers"
resources = """
[
  {
    "provided_by": "user",
    "content": "Wikipedia page for the Moon with minimum perigee value",
    "link": "https://en.wikipedia.org/wiki/Moon",
    "metadata": {}
  }
]
"""
eda_code = write_python_eda(resources)
eda_result = run_code(eda_code)
task_code = write_python_task(task=activity, eda_results=eda_result, resources=resources)
result = run_code(task_code)
short_answers = [construct_short_answer(activity, result)]

In [1]:
from generalist.tools.data_model import ContentResource
import os
import logging
from dotenv import load_dotenv

import mlflow
from huggingface_hub import snapshot_download
from datasets import load_dataset

logging.getLogger().setLevel(logging.INFO)
load_dotenv()

gaia_path = os.environ.get("HUGGING_FACE_GAIA_FOLDER_PATH")
data_dir = snapshot_download(local_dir=gaia_path,  local_files_only=True, repo_id="gaia-benchmark/GAIA", repo_type="dataset")

dataset = load_dataset(data_dir, "2023_level1", split="validation")
gaia_keys = ['task_id', 'Question', 'Level', 'Final answer', 'file_name', 'file_path', 'Annotator Metadata']

sosa_many_studio_albums_task_id = "8e867cd7-cff9-4e6c-867a-ff5ddc2550be"
running_to_the_moon_task_id = "e1fc63a2-da7a-432f-be78-7c4a95598703"
dr_who_season_9_eps_11_location_task_id = "4b6bb5f7-f634-410e-815d-e673ab7f8632"
calc_sales_xlsx_task_id = "7bd855d8-463d-4ed5-93ca-5fe35145f733"
just_running_python_task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"
evaluation_tasks = [
    just_running_python_task_id,
    calc_sales_xlsx_task_id,
    sosa_many_studio_albums_task_id,
    running_to_the_moon_task_id,
]

results = []
dataset_questions = { sample["task_id"]:sample for sample in dataset }
for sample_task_id in evaluation_tasks:
    sample = dataset_questions[sample_task_id]
    [ print(k, "=", sample[k]) for k in gaia_keys]

    mlflow.langchain.autolog()                                                 # this is needed to register traces within the experiment
    experiment_name = f"gaia_{"_".join(sample["task_id"].split("-"))}"
    logging.getLogger().setLevel(logging.INFO)

    question = sample["Question"]
    resources = []
    if sample["file_path"]:
        resource = ContentResource(
            provided_by="user",
            content="file provided with the main task",
            link=os.path.join(os.environ.get("HUGGING_FACE_GAIA_FOLDER_PATH"), sample["file_path"]),
            metadata={"note":"the file is already in the list of available resources"}
        )
        print(resource.link)
        resources.append(resource)


Returning existing local_dir `/Users/maksim.rostov/pdev/freelectron/free-generalist/evaluation/gaia` as remote repo cannot be accessed in `snapshot_download` (None).


task_id = f918266a-b3e0-4914-865d-4faa564f1aef
Question = What is the final numeric output from the attached Python code?
Level = 1
Final answer = 0
file_name = f918266a-b3e0-4914-865d-4faa564f1aef.py
file_path = 2023/validation/f918266a-b3e0-4914-865d-4faa564f1aef.py
Annotator Metadata = {'Steps': '1. Run the attached Python code', 'Number of steps': '1', 'How long did this take?': '30 seconds', 'Tools': '1. Python', 'Number of tools': '1'}
/Users/maksim.rostov/pdev/freelectron/free-generalist/evaluation/gaia/2023/validation/f918266a-b3e0-4914-865d-4faa564f1aef.py
task_id = 7bd855d8-463d-4ed5-93ca-5fe35145f733
Question = The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.
Level = 1
Final answer = 89706.00
file_name = 7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx
file_path = 2023/validation/7bd855d8-463d-4ed5-93ca-5fe35145f733