In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import log_loss, accuracy_score
from sklearn.model_selection import KFold
import optuna
import optuna.visualization as vis
from optuna.samplers import TPESampler
import sys
sys.path.append("../scripts")

import data
import utils
import models
from hyperparam_spaces import PARAM_SPACES

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
optuna.logging.set_verbosity(optuna.logging.WARNING)

In [8]:
def run_trial(trial, X_train, Y_train, A_train, model_class, param_space, seed):
    """Function to run a single trial of optuna."""
    params = {}
    for name, values in param_space.items():
        if values["type"] == "int":
            values_cp = {n: v for n, v in values.items() if n != "type"}
            params[name] = trial.suggest_int(name, **values_cp)
        elif values["type"] == "categorical":
            values_cp = {n: v for n, v in values.items() if n != "type"}
            params[name] = trial.suggest_categorical(name, **values_cp)
        elif values["type"] == "float":
            values_cp = {n: v for n, v in values.items() if n != "type"}
            params[name] = trial.suggest_float(name, **values_cp)

    model = model_class(**params)
    kf = KFold(n_splits=4, shuffle=True, random_state=seed)
    val_metrics = []

    for train_index, val_index in kf.split(X_train):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        Y_train_fold, Y_val_fold = Y_train[train_index], Y_train[val_index]
        A_train_fold, A_val_fold = A_train[train_index], A_train[val_index]
        model.fit(X_train_fold, Y_train_fold, A_train_fold)
        Y_pred = model.predict_proba(X_val_fold)[:, 1] > 0.5
        val_metrics.append(
            utils.min_accuracy(Y_val_fold, Y_pred, A_val_fold)
        )

    return np.mean(val_metrics)


def optimize_model(model_name, X_train, Y_train, A_train, X_test, Y_test, A_test, model_class, param_space, n_trials, seed):
    study = optuna.create_study(direction="maximize")
    objective = lambda trial: run_trial(trial, X_train, Y_train, A_train, model_class, param_space, seed)
    study.optimize(
        objective,
        n_trials=n_trials,
        n_jobs = 10,
        show_progress_bar = True
    )
    model = model_class(**study.best_params)
    model.fit(X_train, Y_train, A_train)
    Y_pred_train = model.predict_proba(X_train)[:, 1] > 0.5
    Y_pred_test = model.predict_proba(X_test)[:, 1] > 0.5

    result_dict = {
        "model" : model_name,
        "acc_train" : accuracy_score(Y_train, Y_pred_train),
        "acc_test" : accuracy_score(Y_test, Y_pred_test),
        "min_acc_train" : utils.min_accuracy(Y_train, Y_pred_train, A_train),
        "min_acc_test" : utils.min_accuracy(Y_test, Y_pred_test, A_test),
    }

    return result_dict

def run_ablation_study(args):
    result = []
    seed = args["seed"]
    (
        X_train,
        A_train,
        Y_train,
        X_val,
        A_val,
        Y_val,
        X_test,
        A_test,
        Y_test,
    ) = data.get_strat_split(args["dataset"], args["n_groups"], 20, seed)

    # join train and validation
    X_train = np.concatenate((X_train, X_val))
    A_train = np.concatenate((A_train, A_val))
    Y_train = np.concatenate((Y_train, Y_val))


    # fitting m2fgb
    def model_class(**params):
        return models.M2FGBClassifier(random_state = seed, **params)
    result_dict = optimize_model(
        "M2FGBClassifier",
        X_train,
        Y_train,
        A_train,
        X_test,
        Y_test,
        A_test,
        model_class,
        PARAM_SPACES["M2FGBClassifier"],
        args["n_trials"],
        seed
    )
    result.append(result_dict)




    # fitting lgbm
    def model_class(**params):
        return models.LGBMClassifier(random_state = seed, **params)
    result_dict = optimize_model(
        "LGBMClassifier",
        X_train,
        Y_train,
        A_train,
        X_test,
        Y_test,
        A_test,
        model_class,
        PARAM_SPACES["LGBMClassifier"],
        args["n_trials"],
        seed
    )
    result.append(result_dict)

    


    return result


In [9]:
seed_list = [
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9
]

In [None]:
results = []

for seed in seed_list:
    results += run_ablation_study({
        "dataset" : "german",
        "n_groups" : 4,
        "n_trials" : 100,
        "seed" : seed
    })

In [13]:
results = pd.DataFrame(results).groupby("model").agg({
    "acc_train" : ["mean", "std"],
    "acc_test" : ["mean", "std"],
    "min_acc_train" : ["mean", "std"],
    "min_acc_test" : ["mean", "std"],
}).round(3)
results.to_csv("../results_aaai/experiment/ablation/german_ablation.csv")
results

Unnamed: 0_level_0,acc_train,acc_train,acc_test,acc_test,min_acc_train,min_acc_train,min_acc_test,min_acc_test
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
LGBMClassifier,0.755,0.063,0.729,0.03,0.334,0.078,0.378,0.037
M2FGBClassifier,0.339,0.127,0.342,0.125,0.712,0.106,0.707,0.102


In [None]:
results = []

for seed in seed_list:
    results += run_ablation_study({
        "dataset" : "enem",
        "n_groups" : 8,
        "n_trials" : 100,
        "seed" : seed
    })

In [19]:
results = pd.DataFrame(results).groupby("model").agg({
    "acc_train" : ["mean", "std"],
    "acc_test" : ["mean", "std"],
    "min_acc_train" : ["mean", "std"],
    "min_acc_test" : ["mean", "std"],
}).round(3)
results.to_csv("../results_aaai/experiment/ablation/enem_ablation.csv")
results

Unnamed: 0_level_0,acc_train,acc_train,acc_test,acc_test,min_acc_train,min_acc_train,min_acc_test,min_acc_test
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
LGBMClassifier,0.883,0.093,0.642,0.01,0.141,0.105,0.421,0.026
M2FGBClassifier,0.829,0.109,0.645,0.007,0.221,0.112,0.417,0.018


In [None]:
results = []

for seed in seed_list:
    results += run_ablation_study({
        "dataset" : "compas",
        "n_groups" : 4,
        "n_trials" : 100,
        "seed" : seed
    })

In [17]:
results = pd.DataFrame(results).groupby("model").agg({
    "acc_train" : ["mean", "std"],
    "acc_test" : ["mean", "std"],
    "min_acc_train" : ["mean", "std"],
    "min_acc_test" : ["mean", "std"],
}).round(3)
results.to_csv("../results_aaai/experiment/ablation/compas_ablation.csv")
results

Unnamed: 0_level_0,acc_train,acc_train,acc_test,acc_test,min_acc_train,min_acc_train,min_acc_test,min_acc_test
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
LGBMClassifier,0.565,0.033,0.563,0.028,0.489,0.054,0.491,0.05
M2FGBClassifier,0.558,0.093,0.554,0.089,0.483,0.138,0.501,0.127
