## Run just a single tool, not the whole agent

In [None]:
# Run Code Writer & Executer
from generalist.tools.summarisers import construct_short_answer
from generalist.tools.code import write_python_eda, run_code, write_python_task

# Usually would be a specific class, but either way ends up being a string
activity = "Calculate the total time in hours it would take Eliud Kipchoge to run a distance of approximately 356,870 km using his marathon pace of roughly 2.0167 hours per 42.2 kilometers"
resources = """
[
  {
    "provided_by": "user",
    "content": "Wikipedia page for the Moon with minimum perigee value",
    "link": "https://en.wikipedia.org/wiki/Moon",
    "metadata": {}
  }
]
"""
eda_code = write_python_eda(resources)
eda_result = run_code(eda_code)
task_code = write_python_task(task=activity, eda_results=eda_result, resources=resources)
result = run_code(task_code)
short_answers = [construct_short_answer(activity, result)]

In [None]:
from generalist.tools.data_model import ContentResource
import os
import logging
from dotenv import load_dotenv

import mlflow
from huggingface_hub import snapshot_download
from datasets import load_dataset

logging.getLogger().setLevel(logging.INFO)
load_dotenv()

gaia_path = os.environ.get("HUGGING_FACE_GAIA_FOLDER_PATH")
data_dir = snapshot_download(local_dir=gaia_path,  local_files_only=True, repo_id="gaia-benchmark/GAIA", repo_type="dataset")

dataset = load_dataset(data_dir, "2023_level1", split="validation")
gaia_keys = ['task_id', 'Question', 'Level', 'Final answer', 'file_name', 'file_path', 'Annotator Metadata']

sosa_many_studio_albums_task_id = "8e867cd7-cff9-4e6c-867a-ff5ddc2550be"
running_to_the_moon_task_id = "e1fc63a2-da7a-432f-be78-7c4a95598703"
dr_who_season_9_eps_11_location_task_id = "4b6bb5f7-f634-410e-815d-e673ab7f8632"
calc_sales_xlsx_task_id = "7bd855d8-463d-4ed5-93ca-5fe35145f733"
just_running_python_task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"
evaluation_tasks = [
    just_running_python_task_id,
    calc_sales_xlsx_task_id,
    sosa_many_studio_albums_task_id,
    running_to_the_moon_task_id,
]

results = []
dataset_questions = { sample["task_id"]:sample for sample in dataset }
for sample_task_id in evaluation_tasks:
    sample = dataset_questions[sample_task_id]
    [ print(k, "=", sample[k]) for k in gaia_keys]

    mlflow.langchain.autolog()                                                 # this is needed to register traces within the experiment
    experiment_name = f"gaia_{"_".join(sample["task_id"].split("-"))}"
    logging.getLogger().setLevel(logging.INFO)

    question = sample["Question"]
    resources = []
    if sample["file_path"]:
        resource = ContentResource(
            provided_by="user",
            content="file provided with the main task",
            link=os.path.join(os.environ.get("HUGGING_FACE_GAIA_FOLDER_PATH"), sample["file_path"]),
            metadata={"note":"the file is already in the list of available resources"}
        )
        print(resource.link)
        resources.append(resource)


In [2]:
from generalist.tools.text_processing import parse_resource

parse_resource("hello, use youtube to search for video related to distributed transformer training")

2026-01-18 15:36:56,745 - generalist.tools.text_processing - parse_resource:78 - INFO - - parse_resource -- JSON to parse to determine resources: Out:
{
 "provided_by": "user_task",
 "content": "user requested to search for videos related to distributed transformer training on YouTube",
 "link": "https://www.youtube.com"
} 

Note: The link provided is the general URL of YouTube since no specific video or resource was given. If a direct link to the relevant content was expected, then further clarification from the user would be needed as none was provided in the task description.


JSONDecodeError: Expecting value: line 1 column 1 (char 0)