In [4]:
import os
import sys
import time

import dotenv
from IPython.display import HTML
import numpy as np
import pandas as pd
import requests

dotenv.load_dotenv("../.env")
sys.path.append("..")
from evalap.utils import log_and_raise_for_status

#EG1_API_URL = "http://localhost:8000/v1"
EG1_API_URL = "https://evalap.etalab.gouv.fr/v1"
EG1_API_KEY = os.getenv("EVALAP_API_KEY") 
ALBERT_API_URL = "https://albert.api.etalab.gouv.fr/v1"
ALBERT_API_KEY = os.getenv("ALBERT_API_KEY")
ALBERT_API_URL_DEV = "https://albert.api.dev.etalab.gouv.fr/v1"
ALBERT_API_KEY_DEV = os.getenv("ALBERT_API_KEY_DEV")
MFS_API_URL = "https://franceservices.etalab.gouv.fr/api/v1"
MFS_API_KEY = os.getenv("MFS_API_KEY")
headers = {"Authorization": f"Bearer {EG1_API_KEY}"}

In [2]:
# Designing my experiments
# --
expset_name = "albert-api-rag-mfs-v2"
expset_readme = "Evaluating hybrid search on MFS dataset."
metrics = ["judge_precision", "output_length", "generation_time"]
common_params = {
    "dataset" : "MFS_questions_v01",
    "model": {"sampling_params" : {"temperature": 0.2}},
    "metrics" : metrics,
    "judge_model": "gpt-4.1",
}
grid_params = {
    "model": [
        {
            "name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", 
            "aliased_name": "albert-large-semantic-qdrant",
            "extra_params": {"search": True, "search_args": {"method": "semantic", "collections": [784,785], "k":10}},
            "base_url": ALBERT_API_URL, "api_key": ALBERT_API_KEY
        },
        {
            "name": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", 
            "aliased_name": "albert-large-semantic-elastic",
            "extra_params": {"search": True, "search_args": {"method": "semantic", "collections": [20,21], "k":10}},
            "base_url": ALBERT_API_URL_DEV, "api_key": ALBERT_API_KEY_DEV
        },
        {
            "name": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", 
            "aliased_name": "albert-large-lexical-elastic",
            "extra_params": {"search": True, "search_args": {"method": "lexical", "collections": [20,21], "k":10}},
            "base_url": ALBERT_API_URL_DEV, "api_key": ALBERT_API_KEY_DEV
        },
        {
            "name": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", 
            "aliased_name": "albert-large-hybrid-elastic",
            "extra_params": {"search": True, "search_args": {"method": "hybrid", "collections": [20,21], "k":10}},
            "base_url": ALBERT_API_URL_DEV, "api_key": ALBERT_API_KEY_DEV
        },
    ],
}



# Lauching the experiment set
expset = {
    "name" : expset_name, 
    "readme": expset_readme,
    "cv": {"common_params": common_params, "grid_params": grid_params, "repeat":5}
}
response = requests.post(f'{EG1_API_URL}/experiment_set', json=expset, headers=headers)
resp = response.json()
if "id" in resp:
    expset_id = resp["id"]
    print(f'Created expset: {resp["name"]} ({resp["id"]})')
else:
    print(resp)

Created expset: albert-api-rag-mfs-v2 (68)


In [23]:
# Patching the experiment set

common_params = {
    "dataset" : "MFS_questions_v01",
    "model": {"sampling_params" : {"temperature": 0.2}},
    "metrics" : metrics,
    "judge_model": "gpt-4.1",
}
grid_params = {
    "model": [
        {
            "name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", 
            "aliased_name": "albert-large-dry-3.2",
            "base_url": ALBERT_API_URL, "api_key": ALBERT_API_KEY
        },
        {
            "name": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", 
            "aliased_name": "albert-large-dry-3.1",
            "base_url": ALBERT_API_URL_DEV, "api_key": ALBERT_API_KEY_DEV
        },

    ],
}

expset = {

    "cv": {"common_params": common_params, "grid_params": grid_params, "repeat":5}
}
response = requests.patch(f'{EG1_API_URL}/experiment_set/{expset_id}', json=expset, headers=headers)
resp = response.json()
if "id" in resp:
    expset_id = resp["id"]
    print(f'Patched expset: {resp["name"]} ({resp["id"]})')
else:
    print(resp)

Patched expset: albert-api-rag-mfs-v2 (68)


In [13]:
expset_name = "albert-api-rag-mfs-v3"
expset_readme = "Evaluating hybrid search on MFS dataset."
metrics = ["judge_precision", "output_length", "generation_time"]
common_params = {
    "dataset" : "MFS_questions_v01",
    "model": {"sampling_params" : {"temperature": 0.2}},
    "metrics" : metrics,
    "judge_model": "gpt-4.1",
}

In [15]:
# Patching the experiment set

common_params = {
    "dataset" : "MFS_questions_v01",
    "model": {"sampling_params" : {"temperature": 0.2}},
    "metrics" : metrics,
    "judge_model": "gpt-4.1",
}
grid_params = {
    "model": [
        {
            "name": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", 
            "aliased_name": "albert-large-hybrid-web-elastic",
            "extra_params": {"search": True, "search_args": {"method": "hybrid", "web_search": True, "k":10}},
            "base_url": ALBERT_API_URL_DEV, "api_key": ALBERT_API_KEY_DEV
        },
        {
            "name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", 
            "aliased_name": "albert-large-semantic-web-qdrant",
            "extra_params": {"search": True, "search_args": {"method": "semantic", "web_search": True, "k":10}},
            "base_url": ALBERT_API_URL, "api_key": ALBERT_API_KEY
        },
        

    ],
}

expset = {

    "cv": {"common_params": common_params, "grid_params": grid_params, "repeat":5}
}
response = requests.patch(f'{EG1_API_URL}/experiment_set/{expset_id}', json=expset, headers=headers)
resp = response.json()
if "id" in resp:
    expset_id = resp["id"]
    print(f'Patched expset: {resp["name"]} ({resp["id"]})')
else:
    print(resp)

Patched expset: albert-api-rag-mfs-v3 (68)
