In [None]:
%run portugal_functions.py

In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
from ctgan import CTGAN
# Standard library
from typing import List, Dict

# Scikit-learn
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import train_test_split
from sklearn.base import clone

# Conformal prediction (MAPIE + Mondrian CP)
from mapie.classification import MapieClassifier
from mapie.metrics import classification_coverage_score, classification_mean_width_score
from mapie.mondrian import MondrianCP

In [None]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
predict_students_dropout_and_academic_success = fetch_ucirepo(id=697) 
  
# data (as pandas dataframes) 
df1 = predict_students_dropout_and_academic_success.data.features 
y_uci = predict_students_dropout_and_academic_success.data.targets 

In [None]:
df1['email'] = df1.index
df1['source'] = 'real'
mapping = {'Dropout': 1, 'Graduate': 0, 'Enrolled': 0}
df1['dropout'] = y_uci['Target'].map(mapping)

In [None]:
n_clusters = 20

X = df1.drop(columns=['email', 'dropout', 'source']).fillna(0)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
clusters = kmeans.fit_predict(X_scaled)
df2 = df1.copy()
df2['cluster'] = clusters
print(pd.Series(clusters).value_counts().sort_index())

In [None]:
min_cluster_size = 150
df3 = assign_clusters_with_min_size(df1, n_clusters=20, min_cluster_size=min_cluster_size)

In [None]:
df4 = augment_minority_clusters(df2)

In [None]:
n_samples = 200

ctgan1 = CTGAN(
    epochs=300,
    batch_size=100,
    generator_dim=(256, 256),
    discriminator_dim=(256, 256),
    verbose=True
)

# 4. Entraînement
# Le modèle apprendra la distribution de vos données
X = df1.copy()
X = X.drop(columns=['email', 'source']).fillna(0)
ctgan1.fit(X)

synthetic_data = ctgan1.sample(n_samples)
synthetic_data['source'] = 'synth'

df5 = pd.concat([df1, synthetic_data], ignore_index=True)

In [None]:
tasks = [
    ("without cluster", df1, {}),
    ("without enrichissement", df3, {}),
    (("with SMOTE", df4, {})),
    ("with GAN", df5, {})
]

summary_records = []
res = []
for name, dfk, kwargs in tasks:
    # 1) Lancement de la fonction
    df_detail, df_agg, y_all, trained_clfs = run_analysis_portugal(
    df=dfk.drop(columns=['dropout']),
    y=dfk['dropout'],
    alpha=0.05,
    nan_fill=0,
    do_plot=False
)
    r =  df_detail.groupby(["method", "model", "cluster", "n_projects"]).agg(
            mean_coverage=("coverage", "mean"),
            mean_width=("width", "mean")
        ).reset_index()
    res.append(r[r['cluster']==-1])
    print(res[-1])

In [None]:
curri_cols = ['Previous qualification', "Curricular units 1st sem", "Curricular units 2nd sem"]

# curri_cols = ['Previous qualification','Admission grade', "Curricular units 1st sem", "Curricular units 2nd sem"]
dyn_cols = [
        col for col in df1.columns
        if any(col.startswith(pref) for pref in curri_cols)
        ]
static_cols = [
c for c in df1.columns
if c not in dyn_cols + ["student_id", "email", "dropout", "source", "cluster"]
]

# 3. DataFrame de base, qu’on ne modifie pas en place
base_df = df1[static_cols].copy()

# 4. Construction cumulative du dictionnaire Xt
Xt = {"t0": base_df.copy()}
cum_df = base_df.copy()
for idx, prefix in enumerate(curri_cols, start=1):
    print(prefix)
    cum_df = cum_df.copy()
    cols = [c for c in df1.columns if c.startswith(prefix)]
    cum_df[cols] = df1[cols]
    Xt[f"t{idx}"] = cum_df

# 5. Construire y pour l’horizon H
H = 1
keys = list(Xt.keys())
y = {}

# cible pour t0
if H < len(keys):
    y["t0"] = Xt[keys[H]].iloc[:, -1].copy()
else:
    y["t0"] = Xt[keys[-1]].iloc[:, -1].copy()

for i, key in enumerate(keys[1:], start=1):
    tgt = i + H
    if tgt < len(keys):
        df_tgt = Xt[keys[tgt]]
    else:
        df_tgt = Xt[keys[-1]]
    y[key] = df_tgt.iloc[:, -2].copy()

# 6. Construire X en ne gardant que les w dernières notes (fenêtre glissante)
w = 1
X = {}

# on parcourt les mêmes clés que pour y, mais on saute celles où i < w
for i, key in enumerate(keys):
    if i < w:
        continue
    # on prend les w derniers item_cols correspondant aux notes t_{i-w+1} … t_i
    window_item_cols = dyn_cols[i-w : i]
    X[key] = df1[static_cols + window_item_cols].copy()

# 7. Conversion en arrays NumPy (alignés sur les mêmes clés)
valid_keys = keys[w:]  # on commence à t{w}
X_array_hori = [X[k].values for k in valid_keys]
y_array_hori = [y[k].values for k in valid_keys]


In [None]:
covs, ecs, models2sp, witdh = [], [], [], []
X_array = X_array_hori
k = len(X_array)
keys = list(Xt.keys())
# Parcours des fenêtres temporelles
for i in tqdm(range(1, k), desc="Fenêtres en ligne"):

    H = len(keys) - i - w
    y = {}

    # cible pour t0
    if H < len(keys):
        y["t0"] = Xt[keys[H]].iloc[:, -1].copy()
    else:
        y["t0"] = Xt[keys[-1]].iloc[:, -1].copy()

    for j, key in enumerate(keys[1:], start=1):
        tgt = j + H
        if tgt < len(keys):
            df_tgt = Xt[keys[tgt]]
        else:
            df_tgt = Xt[keys[-1]]
        y[key] = df_tgt.iloc[:, -2].copy()
    y_array = [y[k].values for k in valid_keys]
# On parcourt i de 1 à len(X_array)-1 (i=0 n'a pas de passé pour entraîner)
    # --- 1) Construction du train sur les fenêtres passées ---
    X_train = np.vstack(X_array[:i])      # fenêtres 0..i-1
    y_train = np.concatenate(y_array[:i])
    # --- 2) Entraînement d'un nouveau modèle ---
    model = TwoSidedSPCI_RFQuant_Offline(alpha=0.05, w=200, random_state=0)
    model.fit(X_train, y_train)
    models2sp.append(model)
    # --- 3) Évaluation sur la fenêtre courante i ---
    X_i, y_i = X_array[i], y_array[i]
    # calcul des bornes supérieures U_t pour chaque échantillon de X_i
    L = np.array([
        model.predict_interval(x.reshape(1, -1))[0]
        for x in X_i
    ])
    U = np.array([
        model.predict_interval(x.reshape(1, -1))[1]
        for x in X_i
    ])
    covs.append(np.mean((U >= y_i) & (L <= y_i)))
    ecs.append(np.mean(np.maximum(0, y_i - U)) + np.mean(np.maximum(0, L - y_i)))
    witdh.append(np.mean(U - L))
    print(f"Fenêtre {i}: cov={covs[-1]:.4f}, excès={ecs[-1]:.4f}, witdh={witdh[-1]:.4f}")

# 4) Rapport final
report = (
    pd.DataFrame({
        "fenêtre": np.arange(1, k),
        "cov"    : covs,
        "excess" : ecs,
    })
    .round(4)
)

print(report)
print(f"\nCouverture moyenne  : {report['cov'].mean():.4f}")
print(f"Excès moyen (width) : {report['excess'].mean():.4f}")
print(report)
print(f"\nCouverture moyenne  : {report['cov'].mean():.4f}")
print(f"Excès moyen (width) : {report['excess'].mean():.4f}")

In [None]:
covs, ecs = [], []
X_array = X_array_hori
y_array = y_array_hori
U_t = []
# On parcourt i de 1 à len(X_array)-1 (i=0 n'a pas de passé pour entraîner)
for i in tqdm(range(1, len(X_array)), desc="Fenêtres en ligne"):
    # --- 1) Construction du train sur les fenêtres passées ---
    X_train = np.vstack(X_array[:i])      # fenêtres 0..i-1
    y_train = np.concatenate(y_array[:i])
    # --- 2) Entraînement d'un nouveau modèle ---
    model_one = OneSidedSPCI_LGBM_Offline(alpha=0.1, w=200, random_state=0)
    model_one.fit(X_train, y_train)

    # --- 3) Évaluation sur la fenêtre courante i ---
    X_i, y_i = X_array[i], y_array[i]
    # calcul des bornes supérieures U_t pour chaque échantillon de X_i
    U = np.array([
        model_one.predict_interval(x.reshape(1, -1))[1]
        for x in X_i
    ])
    U_t.append(U)
    # coverage & excess
    covs.append(np.mean(U >= y_i))
    ecs.append(np.mean(np.maximum(0, y_i - U)))
    print("RESULTAT :", np.mean(U >= y_i), np.mean(np.maximum(0, y_i - U)))
# --- 4) Rapport final ---
report = (
    pd.DataFrame({
        "fenêtre": np.arange(1, len(X_array)),
        "cov"    : covs,
        "excess" : ecs,
    })
    .round(4)
)

print(report)
print(f"\nCouverture moyenne  : {report['cov'].mean():.4f}")
print(f"Excès moyen (width) : {report['excess'].mean():.4f}")

In [None]:
dfng = df1.copy()
idx2 = dfng.columns.get_loc('Curricular units 1st sem (without evaluations)')
dfng.insert(idx2+1, "Curricular units 1st sem (next grade)", U_t[1])
idx1 = dfng.columns.get_loc('Previous qualification (grade)')
dfng.insert(idx1+1, "Previous qualification (next grade)", U_t[0])


In [None]:
X = dfng.drop(columns=['email', 'dropout', 'source']).fillna(0)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=20, random_state=42)
clusters = kmeans.fit_predict(X_scaled)
dfng1 = dfng.copy()
dfng1['cluster'] = clusters
print(pd.Series(clusters).value_counts().sort_index())

In [None]:
dfng2 = assign_clusters_with_min_size(dfng, n_clusters=20, min_cluster_size=min_cluster_size)

In [None]:
dfng3 = augment_minority_clusters(dfng1)

In [None]:
n_samples = 200

ctgan2 = CTGAN(
    epochs=300,
    batch_size=100,
    generator_dim=(256, 256),
    discriminator_dim=(256, 256),
    verbose=True
)

# 4. Entraînement
# Le modèle apprendra la distribution de vos données
X = dfng1.copy()
X = X.drop(columns=['email', 'source']).fillna(0)
ctgan2.fit(X)

synthetic_data = ctgan2.sample(n_samples)
synthetic_data['source'] = 'synth'

dfng4 = pd.concat([dfng1, synthetic_data], ignore_index=True)

In [None]:
tasks = [
    ("without cluster", dfng, {}),
    ("without enrichment", dfng2, {}),
    (("with SMOTE", dfng3, {})),
    ("avec GAN", dfng4, {})
]

summary_records = []
res = []
for name, dfk, kwargs in tasks:
    # 1) Lancement de la fonction
    df_detail, df_agg, y_all, trained_clfs = run_analysis_portugal(
    df=dfk.drop(columns=['dropout']),
    y=dfk['dropout'],
    alpha=0.05,
    nan_fill=0,
    do_plot=False
)
    r =  df_detail.groupby(["method", "model", "cluster", "n_projects"]).agg(
            mean_coverage=("coverage", "mean"),
            mean_width=("width", "mean")
        ).reset_index()
    res.append(r[r['cluster']==-1])
    print(res[-1])

## COMBINE

In [None]:
def split_train_cal_with_real_priority(
        idx_tmp: np.ndarray,
        y_tmp: np.ndarray,
        src_tmp: np.ndarray,
        cal_fraction: float = 0.25,  # 0.20/0.80 = 0.25 comme avant
        random_state: int = 42,
    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        """
        Retourne (idx_tr, idx_cal, y_tr, y_cal) en assurant :
        - stratification sur y
        - la calibration prélève d'abord dans source=='real' puis complète sur le reste
        """
        rng = np.random.RandomState(random_state)
        n_total = len(idx_tmp)
        n_cal = int(round(n_total * cal_fraction))

        df_tmp_split = pd.DataFrame({
            "idx": idx_tmp,
            "y": y_tmp,
            "source": src_tmp
        })

        chosen_idx = []

        # Allocation par classe pour garder l'équilibre
        for cls, grp in df_tmp_split.groupby("y"):
            n_cls = len(grp)
            n_cal_cls = int(round(n_cal * n_cls / n_total))

            grp_real = grp[grp["source"] == "real"]
            grp_other = grp[grp["source"] != "real"]

            # échantillonnage d'abord dans le réel
            take_real = min(len(grp_real), n_cal_cls)
            part_real = grp_real.sample(n=take_real, random_state=rng) if take_real > 0 else grp_real

            remaining = n_cal_cls - take_real
            if remaining > 0:
                part_other = grp_other.sample(n=min(remaining, len(grp_other)), random_state=rng) if len(grp_other) > 0 else grp_other
                chosen_idx.append(pd.concat([part_real["idx"], part_other["idx"]], ignore_index=True))
            else:
                chosen_idx.append(part_real["idx"])

        idx_cal = pd.Index(np.concatenate([c.values for c in chosen_idx])) if len(chosen_idx) else pd.Index([])

        # Si léger décalage dû aux arrondis, on complète en priorisant à nouveau 'real'
        if len(idx_cal) < n_cal:
            remaining_df = df_tmp_split[~df_tmp_split["idx"].isin(idx_cal)]
            remaining_df = pd.concat([
                remaining_df[remaining_df["source"] == "real"],
                remaining_df[remaining_df["source"] != "real"]
            ], ignore_index=True)
            add_n = min(n_cal - len(idx_cal), len(remaining_df))
            if add_n > 0:
                extra = remaining_df.sample(n=add_n, random_state=rng)["idx"]
                idx_cal = pd.Index(np.concatenate([idx_cal.values, extra.values]))

        idx_cal = np.unique(idx_cal)
        mask_tr = ~np.isin(idx_tmp, idx_cal)
        idx_tr = idx_tmp[mask_tr]

        y_tr = y_tmp[mask_tr]
        y_cal = y_tmp[np.isin(idx_tmp, idx_cal)]

        return idx_tr, idx_cal, y_tr, y_cal

In [None]:
# -----------------------------------------------------------------------------
# Configuration & constants
# -----------------------------------------------------------------------------
RANDOM_STATE: int = 42            # Ensures full reproducibility
ALPHA: float = 0.05               # Target mis-coverage level
W: int = 1                        # Sliding-window size
nan_fill = 0
threshold = 10.001
loaded_models = models2sp
# -----------------------------------------------------------------------------
# Data preparation
# -----------------------------------------------------------------------------
DF = dfng2.copy()
DF.fillna(nan_fill, inplace=True)
DF.reset_index(drop=True, inplace=True)

curri_cols = [
    'Previous qualification',
    'Curricular units 1st sem',
    'Curricular units 2nd sem'
]
static_cols = [
    c for c in DF.columns
    if c not in curri_cols + ["student_id", "email", "dropout", "source", "cluster"]
]

Y_TARGET = DF['dropout'].astype(int)
DF.drop(columns='dropout', inplace=True)

# -----------------------------------------------------------------------------
# Base models
# -----------------------------------------------------------------------------
MODELS: Dict[str, object] = {
    "RF": RandomForestClassifier(
        n_estimators=1000,
        min_samples_leaf=2,
        class_weight="balanced",
        n_jobs=-1,
        random_state=RANDOM_STATE,
    ),
    "LR": LogisticRegression(
        max_iter=1000,
        class_weight="balanced",
        n_jobs=-1,
        random_state=RANDOM_STATE,
    ),
    "GB": GradientBoostingClassifier(random_state=RANDOM_STATE),
}

# -----------------------------------------------------------------------------
# Conformal prediction evaluation loop
# -----------------------------------------------------------------------------
from tqdm import tqdm

res_fin: List[pd.DataFrame] = []

for name, base_clf in MODELS.items():
    covs_MCP, width_MCP = [], []
    covs_SPCI, width_SPCI = [], []
    covs_comb, width_comb = [], []
    covs_union,  width_union  = [], []
    for n in tqdm(range(W, len(curri_cols)), desc=name):
        gate_clf = RandomForestClassifier(
            n_estimators=300,
            max_depth=None,
            class_weight="balanced",
            random_state=RANDOM_STATE,
            n_jobs=-1,
        )
        # 1) split train / tmp / test
        idx_tmp, idx_test, y_tmp, y_test, cl_tmp, cl_test = train_test_split(
            DF.index, Y_TARGET, DF["cluster"],
            test_size=0.20,
            stratify=Y_TARGET,
            random_state=RANDOM_STATE,
        )
        src_all = DF["source"].values
        src_tmp = src_all[idx_tmp]
        idx_tr, idx_cal, y_tr, y_cal = split_train_cal_with_real_priority(
            idx_tmp=idx_tmp,
            y_tmp=y_tmp,
            src_tmp=src_tmp,
            cal_fraction=0.40 / 0.80,  # conserve la même taille de calibration qu'avant
            random_state=42,
        )
        # On aligne aussi les clusters correspondants
        mask_tr = np.isin(idx_tmp, idx_tr)
        mask_cal = np.isin(idx_tmp, idx_cal)
        cl_tr = cl_tmp[mask_tr]
        cl_cal = cl_tmp[mask_cal]
        
        idx_cal_cp, idx_cal_gate, y_cal_cp, y_cal_gate, cl_cal_cp, cl_cal_gate = train_test_split(
            idx_cal, y_cal, cl_cal,
            test_size=0.5,
            stratify=y_cal,
            random_state=RANDOM_STATE,
        )
        y_cal_cp   = np.array(y_cal_cp)
        y_cal_gate = np.array(y_cal_gate)
        y_test     = np.array(y_test)
        # mask des “réels” pour toutes les évaluations
        mask_real = DF.loc[idx_test, "source"] == "real"

        # 2) build features
        X_tr        = build_X_(DF.loc[idx_tr],       curri_cols, static_cols, n)
        X_cal_cp    = build_X_(DF.loc[idx_cal_cp],   curri_cols, static_cols, n)
        X_cal_gate  = build_X_(DF.loc[idx_cal_gate], curri_cols, static_cols, n)
        X_test      = build_X_(DF.loc[idx_test],     curri_cols, static_cols, n)
        # 3) train base clf + calibrate for MCP
        clf = clone(base_clf)
        clf.fit(X_tr, y_tr)
        calib = CalibratedClassifierCV(clf, cv="prefit", method="sigmoid") \
                    .fit(X_cal_cp, y_cal_cp)

        base_mapie = MapieClassifier(estimator=calib, method="lac", cv="prefit")
        mond_mapie = MondrianCP(mapie_estimator=base_mapie) \
                        .fit(X_cal_cp, y_cal_cp, partition=cl_cal_cp)

        # ---- MCP on TEST ----
        _, yps_van_test = mond_mapie.predict(X_test, alpha=ALPHA, partition=cl_test)
        pset_van_test = yps_van_test[:, :, 0]
        cov_van = classification_coverage_score(y_test[mask_real], pset_van_test[mask_real])
        wid_van = classification_mean_width_score(pset_van_test[mask_real])
        covs_MCP.append(cov_van)
        width_MCP.append(wid_van)
        print("MCP", cov_van, wid_van)
        # ---- SPCI on TEST ----
        model_spci = loaded_models[n - W]
        pos_test   = DF.index.get_indexer(idx_test)
        X_spci_test = X_array_hori[n - W + 1][pos_test]
        intervals = [model_spci.predict_interval(x.reshape(1, -1))
                     for x in X_spci_test]
        L_preds, U_preds = zip(*intervals)

        y_pred_bool_SPCI = np.zeros((len(intervals), 2), dtype=bool)
        for i, (L, U) in enumerate(zip(L_preds, U_preds)):
            if threshold > U:
                y_pred_bool_SPCI[i, 1] = True
            elif threshold < L:
                y_pred_bool_SPCI[i, 0] = True
            else:
                y_pred_bool_SPCI[i, :] = True

        cov_spci = classification_coverage_score(y_test[mask_real],
                                                y_pred_bool_SPCI[mask_real])
        wid_spci = classification_mean_width_score(y_pred_bool_SPCI[mask_real])
        covs_SPCI.append(cov_spci)
        width_SPCI.append(wid_spci)
        print("SPCI", cov_spci, wid_spci)
        ##UNION 
        y_pred_bool_MCP = pset_van_test.astype(bool)
        y_bool_union = y_pred_bool_MCP | y_pred_bool_SPCI
        cov_union = classification_coverage_score(
            y_test[mask_real],
            y_bool_union[mask_real]
        )
        wid_union = classification_mean_width_score(
            y_bool_union[mask_real]
        )
        covs_union.append(cov_union)
        width_union.append(wid_union)
        print("UNION :", cov_union, wid_union)

        # ---- construire la gate sur CAL_GATE ----
        #  a) MCP predictions sur X_cal_gate
        _, yps_van_gate = mond_mapie.predict(
            X_cal_gate, alpha=ALPHA, partition=cl_cal_gate
        )
        pset_cal_cls = yps_van_gate[:, :, 0]

        #  b) SPCI predictions sur X_cal_gate
        pos_cal_gate  = DF.index.get_indexer(idx_cal_gate)
        X_spci_cal    = X_array_hori[n - W + 1][pos_cal_gate]
        intervals_cal = [model_spci.predict_interval(x.reshape(1, -1))
                         for x in X_spci_cal]
        L_cal, U_cal  = zip(*intervals_cal)

        pset_cal_spc = np.zeros_like(pset_cal_cls, dtype=bool)
        for i, (L, U) in enumerate(zip(L_cal, U_cal)):
            if threshold > U:
                pset_cal_spc[i, 1] = True
            elif threshold < L:
                pset_cal_spc[i, 0] = True
            else:
                pset_cal_spc[i, :] = True

        #  c) préparer méta-features & labels pour la gate
        df_sel_arr = []
        labels_g   = []                      # ← on initialise labels_g

        for i in range(len(idx_cal_gate)):
            feat_vec = X_cal_gate[i]
            w_cls    = pset_cal_cls[i].sum()
            w_spc    = pset_cal_spc[i].sum()
            diff     = w_cls - w_spc
            err_cls  = int(y_cal_gate[i] not in np.where(pset_cal_cls[i])[0])
            err_spc  = int(y_cal_gate[i] not in np.where(pset_cal_spc[i])[0])
            if   err_cls == 0 and err_spc == 1:
                gate_y = 0
            elif err_spc == 0 and err_cls == 1:
                gate_y = 1
            elif err_cls == 0 and err_spc == 0:
                gate_y = 0 if w_cls < w_spc else 1
            else:
                gate_y = 2
            labels_g.append(gate_y)           # ← on stocke le label

            meta_vec = np.concatenate([
                 feat_vec,
                 [w_cls, w_spc, diff, err_cls, err_spc]
            ])
            df_sel_arr.append(meta_vec)

        X_gate_train = np.vstack(df_sel_arr)
        gate_clf.fit(X_gate_train, np.array(labels_g))
        # ---- appliquer la gate sur TEST ----
        meta_test_arr = []
        for i in range(len(idx_test)):
            # feat_vec est un array 1D de taille n_features
            feat_vec = X_test[i]
            w_cls = pset_van_test[i].sum()
            w_spc = y_pred_bool_SPCI[i].sum()
            diff = w_cls - w_spc
            # on concatène feat_vec et les 5 features méta
            meta_vec = np.concatenate([
                feat_vec,
                [w_cls, w_spc, diff, 0, 0]    # err_cls=0, err_spc=0
            ])
            meta_test_arr.append(meta_vec)

        # on empile en matrice (n_test × n_features_meta)
        X_gate_test = np.vstack(meta_test_arr)

        # on prédit le choix de la gate
        choices = gate_clf.predict(X_gate_test)
        pset_final = np.zeros_like(pset_van_test, dtype=bool)
        for i, choice in enumerate(choices):
            if choice == 0:
                pset_final[i] = y_pred_bool_MCP[i]
            elif choice == 1:
                pset_final[i] = y_pred_bool_SPCI[i]
            else:
                pset_final[i] = y_pred_bool_MCP[i] | y_pred_bool_SPCI[i]

        cov_c = classification_coverage_score(y_test[mask_real],
                                             pset_final[mask_real])
        wid_c = classification_mean_width_score(pset_final[mask_real])
        covs_comb.append(cov_c)
        width_comb.append(wid_c)
        print("COMBINED", cov_c, wid_c)
    # on agrège les métriques
    n_vals = list(range(W, W + len(covs_MCP)))
    df_metrics = pd.DataFrame({
        "model":             [name] * len(n_vals),
        "n":                 n_vals,
        "coverage_MCP":      covs_MCP,
        "width_MCP":         width_MCP,
        "coverage_SPCI":     covs_SPCI,
        "width_SPCI":        width_SPCI,
        "coverage_union":    covs_union, 
        "width_union":       width_union,
        "coverage_combined": covs_comb,
        "width_combined":    width_comb,
    })
    res_fin.append(df_metrics)
