# Demo for launching an experiment set with tooling

Results setup and analysis : https://pad.numerique.gouv.fr/IktyaLVESTWyadnL0Q-0sw#


In [1]:
import os
import sys
import time

import dotenv
from IPython.display import HTML
import numpy as np
import pandas as pd
import requests

dotenv.load_dotenv("../.env")
sys.path.append("..")
from eg1.utils import log_and_raise_for_status

#EG1_API_URL = "http://localhost:8000/v1"
EG1_API_URL = "https://eg1.dev.etalab.gouv.fr/v1"
EG1_API_KEY = os.getenv("EG1_API_KEY") 
ALBERT_API_URL = "https://albert.api.etalab.gouv.fr/v1"
ALBERT_API_KEY = os.getenv("ALBERT_API_KEY")
OPENAI_URL = "https://api.openai.com/v1"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
headers = {"Authorization": f"Bearer {EG1_API_KEY}"}

In [24]:
# Designing my experiments
# --
expset_name = "mfs_tooling_v2"
expset_readme = "Evaluating tooling capabilities."
prompt_system = "Tu es un agent de l'état Français qui répond en langue française aux questions des usagers des services publiques de manière claire, concrète et concise."
metrics = ["judge_exactness", "judge_notator", "output_length", "generation_time", "nb_tool_calls", "contextual_relevancy", "faithfulness"]
common_params = {
    "dataset" : "MFS_questions_v01",
    "model": {"sampling_params" : {"temperature": 0.2}, "prompt_system": prompt_system},
    "metrics" : metrics,
    "judge_model": "gpt-4o",
}
grid_params = {
    "model": [
        {"name": "meta-llama/Llama-3.1-8B-Instruct",                 "base_url": ALBERT_API_URL, "api_key": ALBERT_API_KEY},
        {"name": "mistralai/Mistral-Small-3.1-24B-Instruct-2503",    "base_url": ALBERT_API_URL, "api_key": ALBERT_API_KEY},
    ],
}


# Duplicate with each model params
# --
prompt_suffixe = " Utilise la recherche dans les contextes du service publique systématiquement."
tools = [
    #{"_tools_": ["search_albert_collections_v0"]},
    {"_tools_": ["search_albert_collections_v1"]}, # tool_choice=required not suuported by albert-api atm..
    {"_tools_": ["search_albert_collections_v2"]},
]
for i in range(len(grid_params["model"])):
    for extra_params in tools:
        model = grid_params["model"][i]
        grid_params["model"].append(model.copy() | {"extra_params": extra_params} | {"prompt_system": prompt_system + prompt_suffixe})

# Lauching the experiment set
expset = {
    "name" : expset_name, 
    "readme": expset_readme,
    "cv": {"common_params": common_params, "grid_params": grid_params, "repeat":4}
}
response = requests.post(f'{EG1_API_URL}/experiment_set', json=expset, headers=headers)
resp = response.json()
if "id" in resp:
    expset_id = resp["id"]
    print(f'Created expset: {resp["name"]} ({resp["id"]})')
else:
    print(resp)

Created expset: mfs_tooling_v2 (40)
