# Abert RAG Model Evals 

Model with RAG on service-public and travail-emploie sheets.

In [None]:
import os
import sys
import time

import dotenv
from IPython.display import HTML
import numpy as np
import pandas as pd
import requests

dotenv.load_dotenv("../.env")
sys.path.append("..")

EG1_API_URL = "http://localhost:8000/v1"
#EG1_API_URL = "http://localhost:8000/v1"
EG1_API_KEY = os.getenv("EVALAP_API_KEY") 
ALBERT_API_URL_LOCAL = "http://localhost:8002/v1"
ALBERT_API_KEY_LOCAL = os.getenv("ALBERT_API_KEY_")
ALBERT_API_URL = "https://albert.api.etalab.gouv.fr/v1"
ALBERT_API_KEY = os.getenv("ALBERT_API_KEY")
ALBERT_API_URL_DEV = "https://albert.api.dev.etalab.gouv.fr/v1"
ALBERT_API_KEY_DEV = os.getenv("ALBERT_API_KEY_DEV")
MFS_API_URL = "https://franceservices.etalab.gouv.fr/api/v1"
MFS_API_KEY = os.getenv("MFS_API_KEY")
headers = {"Authorization": f"Bearer {EG1_API_KEY}"}

In [None]:
judge_name = "gpt-4.1"
judge_api_url = "https://api.openai.com/v1"
judge_api_key = os.getenv("OPENAI_API_KEY")

In [None]:
# Design and tun the experiments
# --
expset_name = f"Albert - RAG (lexical boosting) v29-10-25"
expset_readme = f"Comparing Albert RAG Models on french administration Q/A"
metrics = ["judge_precision", "output_length", "generation_time"]
common_params = {
    "model": {"sampling_params" : {"temperature": 0.2}},
    "metrics" : metrics,
    "judge_model": {
        "name": judge_name,
        "base_url": judge_api_url,
        "api_key": judge_api_key,
    },
}
grid_params = {
    "dataset": ["MFS_questions_v01",  "Assistant IA - QA"],
    "model": [
        {
            "name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", 
            "aliased_name": "albert-large-semantic-elastic-prod",
            "extra_params": {"search": True, "search_args": {"method": "semantic", "collections": [783,784,785], "k":7}},
            "base_url": ALBERT_API_URL, "api_key": ALBERT_API_KEY
        },
        {
            "name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", 
            "aliased_name": "albert-large-hybrid-elastic-prod",
            "extra_params": {"search": True, "search_args": {"method": "hybrid", "collections": [783, 784,785], "k":5}},
            "base_url": ALBERT_API_URL, "api_key": ALBERT_API_KEY
        },
        {
            "name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", 
            "aliased_name": "albert-large-hybrid-elastic-boost",
            "extra_params": {"search": True, "search_args": {"method": "hybrid", "collections": [5,6,8], "k":5}},
            "base_url": ALBERT_API_URL_LOCAL, "api_key": ALBERT_API_KEY_LOCAL
        },
        # Raw/baseline
        {"name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "base_url": ALBERT_API_URL, "api_key": ALBERT_API_KEY},
    ],
}

# Lauching the experiment set
expset = {
    "name" : expset_name, 
    "readme": expset_readme,
    "cv": {"common_params": common_params, "grid_params": grid_params, "repeat":2}
}
response = requests.post(f'{EG1_API_URL}/experiment_set', json=expset, headers=headers)
resp = response.json()
if "id" in resp:
    expset_id = resp["id"]
    print(f'Created expset: {resp["name"]} ({resp["id"]})')
else:
    print(resp)

In [None]:
# Patching the experiment set

common_params = {
    "model": {"sampling_params" : {"temperature": 0.2}},
    "metrics" : metrics,
    "judge_model": "gpt-4.1",
}
grid_params = {
    "dataset": ["MFS_questions_v01",  "Assistant IA - QA"],
    "model": [
        {
            "name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", 
            "aliased_name": "albert-large-hybrid-elastic-boost-v4-20chunk",
            "extra_params": {"search": True, "search_args": {"method": "hybrid", "collections": [16,17,18], "k":20}},
            "base_url": ALBERT_API_URL_LOCAL, "api_key": ALBERT_API_KEY_LOCAL
        },

    ],
}

expset = {

    "cv": {"common_params": common_params, "grid_params": grid_params, "repeat":2}
}
response = requests.patch(f'{EG1_API_URL}/experiment_set/{expset_id}', json=expset, headers=headers)
resp = response.json()
if "id" in resp:
    expset_id = resp["id"]
    print(f'Patched expset: {resp["name"]} ({resp["id"]})')
else:
    print(resp)