In [1]:
import os

import datasets
from dotenv import load_dotenv
from pprint import pp

load_dotenv()

gaia_benchmark = datasets.load_dataset(
    "gaia-benchmark/GAIA",
    '2023_all',
    cache_dir="../data/gaia-benchmark",
    token=os.getenv("HF_API_TOKEN"),
    trust_remote_code=True
)

print(f'GAIA loaded successfully.\n{gaia_benchmark}\nPreview: ')
pp(gaia_benchmark['validation'][0])
pp(gaia_benchmark['test'][0])

GAIA loaded successfully.
DatasetDict({
    test: Dataset({
        features: ['task_id', 'Question', 'Level', 'Final answer', 'file_name', 'file_path', 'Annotator Metadata'],
        num_rows: 301
    })
    validation: Dataset({
        features: ['task_id', 'Question', 'Level', 'Final answer', 'file_name', 'file_path', 'Annotator Metadata'],
        num_rows: 165
    })
})
Preview: 
{'task_id': 'c61d22de-5f6c-4958-a7f6-5e9707bd3466',
 'Question': 'A paper about AI regulation that was originally submitted to '
             'arXiv.org in June 2022 shows a figure with three axes, where '
             'each axis has a label word at both ends. Which of these words is '
             'used to describe a type of society in a Physics and Society '
             'article submitted to arXiv.org on August 11, 2016?',
 'Level': '2',
 'Final answer': 'egalitarian',
 'file_name': '',
 'file_path': '',
 'Annotator Metadata': {'Steps': '1. Go to arxiv.org and navigate to the '
                       

In [None]:
import json
import time

from autono import get_openai_model

model = get_openai_model(temp=0.1, top_p=0.8)

gaia_all = gaia_benchmark['validation'].to_list()
gaia_all.extend(gaia_benchmark['test'].to_list())

tools = set()

prompt = {
    'objective': 'find out the ai-agent-tools needed to solve the <problem>',
    'outputs': 'a list of tools may be needed.',
    'output_format': 'a list in json format',
    'output_examples': ['web_surfer', 'bash']
}

for i, gaia_item in enumerate(gaia_all):
    prompt['problem'] = gaia_item['Question']
    response = model.invoke(json.dumps(prompt, ensure_ascii=False)).content
    print(f'Question {i}, Tools: {response}')
    _tools = json.loads(response[response.find('['): response.rfind(']') + 1])
    for t in _tools:
        tools.add(t)

with open(f'./tools-{time.time()}.json', 'w') as f:
    json.dump(list(tools), f)

Question 0, Tools: ```json
[
    "web_surfer",
    "arxiv_api",
    "text_analysis_tool",
    "data_extraction_tool",
    "document_search_tool"
]
```
Question 1, Tools: {
  "tools": [
    "web_surfer",
    "data_scraper",
    "text_analyzer",
    "database_query_tool"
  ]
}
Question 2, Tools: ```json
[
    "web_surfer",
    "python",
    "statistical_analysis_tool",
    "data_visualization_tool",
    "calculator"
]
```
Question 3, Tools: ```json
["code_analyzer", "text_editor", "unlambda_interpreter"]
```
Question 4, Tools: ```json
[
    "web_surfer",
    "calculator",
    "python_script",
    "data_extractor"
]
```
Question 5, Tools: ```json
[
    "data_extractor",
    "spreadsheet_analyzer",
    "text_parser",
    "query_tool"
]
```
Question 6, Tools: {
  "tools": [
    "web_surfer",
    "wikipedia_api",
    "data_extractor"
  ]
}
Question 7, Tools: ```json
[
    "web_surfer",
    "text_analyzer",
    "database_query_tool",
    "research_article_extractor"
]
```
Question 8, Tools: `