# Demo for launching dataset experiment in EVALAP

- code: https://github.com/etalab-ia/evalap
- api documentation: https://evalap.etalab.gouv.fr/redoc

In [20]:
import os
import sys
import time

import dotenv
from IPython.display import HTML
import numpy as np
import pandas as pd
import requests

dotenv.load_dotenv("../.env")
sys.path.append("..")
from evalap.utils import log_and_raise_for_status

API_URL = "http://localhost:8000"
ALBERT_API_URL = "https://albert.api.etalab.gouv.fr/v1"
ALBERT_API_KEY = os.getenv("ALBERT_API_KEY")
MFS_API_URL = "https://franceservices.staging.etalab.gouv.fr/api/v1"
MFS_API_KEY = os.getenv("MFS_API_KEY")
OPENAI_URL = "https://api.openai.com/v1"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

## List of dataset


In [21]:
# Show available dataset
response = requests.get(f'{API_URL}/v1/datasets')
resp = response.json()
resp

[{'name': 'mfs_dataset',
  'id': 1,
  'has_query': True,
  'has_output': False,
  'has_output_true': True,
  'size': 2},
 {'name': 'MFS_questions_v01',
  'id': 2,
  'has_query': True,
  'has_output': False,
  'has_output_true': True,
  'size': 39}]

## List of metrics available for analysing datasets and their information 


- the `require` field indicates which fields is required in the dataset for this metrics to operate.
- the `type` field is ignore for now. It will be associated with the type of the observation you get in the result output later

In [38]:
# Show available metrics
# - the require fields should be an existing field to the dataset used with the metric.
# - output metric can be generated from the query (see below)
response = requests.get(f'{API_URL}/v1/metrics')
all_metrics = response.json()
df = pd.DataFrame(all_metrics).sort_values(by=["type", "name"])
df = df[df["type"] == "dataset"]
HTML(df.to_html(index=False))

name,description,type,require
judge_complexity,"[0-10] score complexity of query, thematic...",dataset,[query]


## Launching a pair of experiments on the data in a dataset

In [39]:
# Run an experiment on a dataset to obtain the metrics linked to the complexity of the question

# Designing my experiments
dataset = "MFS_questions_v01"
metrics = ["judge_complexity"]

# Lauching the experiments
experiment_ids = []
experiment = {
    "name" : f"MFS_dataset_complexity_analyze", 
    "dataset": dataset,
    "metrics": metrics,
}

response = requests.post(f'{API_URL}/v1/experiment', json=experiment)
resp = response.json()

if "id" in resp:
    experiment_ids.append(resp["id"])
    print(f'Created experiment: {resp["name"]} ({resp["id"]}), status: {resp["experiment_status"]}')
else:
    print(resp)

Created experiment: MFS_dataset_complexity_analyze (57), status: running_metrics


## Read results and showing results

In [40]:
import json
import pandas as pd
import numpy as np
import requests

# BASELINE CODE FOR DATASET METRIC 
observations_dict = {}
metric_names = set()

experiment_ids = [resp["id"]]

for exp_id in experiment_ids:
    response = requests.get(f'{API_URL}/v1/experiment/{exp_id}?with_results=true')
    experiment = response.json()
    results = experiment["results"]
    
    model_name = f"Experiment_{exp_id}"
    
    for metric_results in results:
        metric_name = metric_results["metric_name"]
        metric_names.add(metric_name)
        for idx, obs in enumerate(metric_results["observation_table"]):
            key = (model_name, idx)
            if key not in observations_dict:
                observations_dict[key] = {"model": model_name, "num_line": idx}
            
            if metric_name == "judge_complexity" and "observation" in obs and obs["observation"] is not None:
                try:
                    observation_data = json.loads(obs["observation"])
                    scores = observation_data.get("scores", {})
                    observations_dict[key].update({
                        "judge_complexity_score": obs.get("score"),
                        "thematique": observation_data.get("thematique", ""),
                        "demande_score": scores.get("demande", 0),
                        "administration_score": scores.get("administration", 0),
                        "procedure_score": scores.get("procedure", 0),
                        "global_score": scores.get("global", 0),
                        "answer": observation_data.get("answer", "")
                    })
                except json.JSONDecodeError:
                    print(f"Erreur de décodage JSON pour {model_name}, judge_complexity, ligne {idx}")
                except Exception as e:
                    print(f"Erreur inattendue pour {model_name}, judge_complexity, ligne {idx}: {str(e)}")
            else:
                observations_dict[key][metric_name] = obs.get("score")

observations_df = pd.DataFrame(list(observations_dict.values()))

columns_order = ["model", "num_line"]
metric_columns = list(metric_names - {"judge_complexity"})
columns_order.extend(metric_columns)
judge_complexity_columns = ["judge_complexity_score", "thematique", "demande_score", "administration_score", "procedure_score", "global_score", "answer"]
columns_order.extend([col for col in judge_complexity_columns if col in observations_df.columns])

for col in columns_order:
    if col not in observations_df.columns:
        observations_df[col] = np.nan

observations_df = observations_df[columns_order]

observations_df.head(2)

Unnamed: 0,model,num_line,judge_complexity_score,thematique,demande_score,administration_score,procedure_score,global_score,answer
0,Experiment_57,0,2.0,ANTS,1.0,1.0,2.0,2.0,Critère Demande : La question est claire et pr...
1,Experiment_57,1,3.0,ANTS,1.0,2.0,3.0,3.0,Critère Demande : La question est claire et pr...


### Dataviz

In [41]:
import plotly.graph_objects as go
import pandas as pd
import plotly.graph_objects as go

def create_scatter_plot(df, var1, var2):
    fig = go.Figure(data=[go.Scatter(
        x=df[var1],
        y=df[var2],
        mode='markers',
        marker=dict(
            size=10,
            color='rgba(152, 0, 0, .8)',
            line=dict(
                width=2,
                color='rgb(0, 0, 0)'
            )
        )
    )])
    
    fig.update_layout(
        title=f'Nuage de points : {var1} vs {var2}',
        xaxis_title=var1,
        yaxis_title=var2,
        showlegend=False
    )
    
    return fig


In [42]:
create_scatter_plot(observations_df, "judge_complexity_score", "demande_score")