In [None]:
#requirements
import numpy as np
import time
import sklearn as sk
import MLRNN as MLR
from copy import deepcopy
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score as acc
from sklearn.metrics import roc_auc_score as auc
from sklearn.preprocessing import StandardScaler as normalize
from sklearn.model_selection import train_test_split as tts

#also torch, xgboost, catboost, lightgbm, fastai, see below

# User inputs (check this section then run all cells)

In [None]:
import importlib
if importlib.util.find_spec('torch') is None:
    raise ImportError("MLR is implemented in torch here! => conda install -c pytorch pytorch")
xgboost_available = importlib.util.find_spec('xgboost') is not None #conda install -c conda-forge xgboost
catboost_available = importlib.util.find_spec('catboost') is not None #conda install -c conda-forge catboost
lgbm_available = importlib.util.find_spec('lightgbm') is not None #conda install -c conda-forge lightgbm
mars_available = importlib.util.find_spec('pyearth') is not None #conda install -c conda-forge sklearn-contrib-py-earth
fastai_available = importlib.util.find_spec('fastai') is not None #conda install -c fastai fastai
#else excluded from the benchmark

In [None]:
input_repository = "../preprocessed_datasets/"
output_repository = "outputs/"

regression_benchmark_output_file = "regression_benchmark.csv"
classification_benchmark_output_file = "classification_benchmark.csv"
ablation_study_output_file = "ablation_study.csv"
dependance_study_output_file = "dependance_study.csv"

In [None]:
benchmark_datasets = np.arange(16) #16 for regression, 16 for classification
study_datasets = [-1, 0, 1] #Id for Seoul Bike Sharing Demand (without target log-rescale), Concrete Slump Test−3, QSAR aquatic toxicity, 
benchmark_seeds = 10
study_seeds = 100

ensemble_components = ["MLR1","MLR2"] #architectures used for meta-models            
benchmark_bagging_reps = 10 #number of estimator in each bagging model
benchmark_top_valid_cut = 5 #number of estimators aggregated when sorted by validation-set performance

In [None]:
run_regression_benchmark = True
run_classification_benchmark = True
run_ablation_study = True
run_dependance_study = True

# Load datasets

In [None]:
def dataset_loader(dataset_id, name, repository):
    return np.load(repository + name + str(dataset_id) + ".npy")
def prepare_dataset(dataset, train_size = 0.8, seed= False):
    kwargs = {}
    if seed or type(seed) == type(0):
        kwargs["random_state"] = seed
    X, y = dataset[:, :-1], dataset[:, -1]
    X = normalize().fit_transform(X)
    X_train, X_test, y_train, y_test = tts(X, y, train_size = train_size, **kwargs)
    return X_train, X_test, y_train, y_test
def get_dataset(dataset_id, name, repository, train_size = 0.8, seed = False):
    return prepare_dataset(dataset_loader(dataset_id, name, repository), train_size = train_size, seed = seed)

# Write results

In [None]:
def write_results(results, output_file, output_repository, metrics = ["R2"]):
    import os
    if output_file not in os.listdir(output_repository):
        with open(output_repository + output_file, "w") as file:
            file.write(",".join(["id","dataset","seed","category", "method","time"]+metrics))
            file.close()
    with open(output_repository + output_file, "a") as file:
        file.write("\n"+",".join(map(str,results)))
        file.close()

# Method evaluation

In [None]:
def run_experiment(methods, datasets, input_name, input_repository, output_file, output_repository, seeds = 10, regression = True):
    if regression: metrics = ["R2"]
    else: metrics = ["ACC","AUC"]
    for dataset_id in datasets:
        for seed in range(seeds):
            X_train, X_test, y_train, y_test = get_dataset(dataset_id, input_name, input_repository, train_size = 0.8, seed = False)

            for method_category, method_name, function in methods:
                exp_id = str(dataset_id)+'_'+str(seed)+"_"+str(method_category)+"_"+str(method_name)
                start_time = time.time()
                results = eval(function)(X_train, X_test, y_train, y_test, method_name, seed, regression = regression)
                end_time = time.time() - start_time
                
                result_line = [exp_id, dataset_id, seed, method_category, method_name, end_time]+results
                write_results(result_line, output_file, output_repository, metrics = metrics)

# Method categories

In [None]:
baseline_name = "Baseline"
lm_name = "GLM"
QDA_name = "QDA"
tree_name = "TREE"
ensemble_name = "RF"
spline_name = "MARS"
svm_name = "SVM"
nn_name = "NN"
xgb_name = "GBDT"
mlr_name = "MLR"

fastai_experiment = "run_fastai"
xgb_experiment = "run_xgb"
sklearn_experiment = "run_sklearn"
mlr_experiment = "run_mlr"

regressor_methods = [(baseline_name, "Intercept", sklearn_experiment),
                     (lm_name, "Ridge", sklearn_experiment),
                     (lm_name, "Lasso", sklearn_experiment),
                     (lm_name, "Enet", sklearn_experiment),
                     (tree_name, "CART", sklearn_experiment),
                     (ensemble_name, "RF", sklearn_experiment),
                     (ensemble_name, "XRF", sklearn_experiment),
                     (xgb_name, "xgb_sklearn", sklearn_experiment),
                     (svm_name, "Kernel", sklearn_experiment),
                     (svm_name, "NuSVM", sklearn_experiment),
                     (nn_name, "MLP_sklearn", sklearn_experiment),
                     (mlr_name, "MLR3", mlr_experiment),
                     (mlr_name, "MLR4", mlr_experiment)]
regressor_methods += [(spline_name, "MARS", sklearn_experiment)] * mars_available
regressor_methods += [(xgb_name, "XGBoost", xgb_experiment)] * xgboost_available
regressor_methods += [(xgb_name, "CAT", xgb_experiment)] * catboost_available
regressor_methods += [(xgb_name, "LGBM", xgb_experiment)] * lgbm_available
regressor_methods += [(nn_name, "fastai", fastai_experiment)] * fastai_available     
                    
classifier_methods = [(baseline_name, "Intercept", sklearn_experiment),
                     (lm_name, "Ridge", sklearn_experiment),
                     (lm_name, "LinearRidge", sklearn_experiment),
                     (lm_name, "Lasso", sklearn_experiment),
                     (lm_name, "Enet", sklearn_experiment),
                     (QDA_name, "QDA", sklearn_experiment),
                     (tree_name, "CART", sklearn_experiment),
                     (tree_name, "XCART", sklearn_experiment),
                     (ensemble_name, "RF", sklearn_experiment),
                     (ensemble_name, "XRF", sklearn_experiment),
                     (xgb_name, "xgb_sklearn", sklearn_experiment),
                     (xgb_name, "ADABoost", sklearn_experiment),
                     (nn_name, "MLP_sklearn", sklearn_experiment),
                     (mlr_name, "MLR3", mlr_experiment),
                     (mlr_name, "MLR4", mlr_experiment)]
classifier_methods += [(xgb_name, "XGBoost", xgb_experiment)] * xgboost_available
classifier_methods += [(xgb_name, "CAT", xgb_experiment)] * catboost_available
classifier_methods += [(xgb_name, "LGBM", xgb_experiment)] * lgbm_available
classifier_methods += [(nn_name, "fastai", fastai_experiment)] * fastai_available

## Sklearn

In [None]:
#sklearn
deterministic_methods = ["Intercept",
                         "Ridge",
                         "Lasso",
                         "Enet",
                         "LinearRidge"
                         "CART",
                         "XCART",
                         "Kernel",
                         "NuSVM",
                         "MARS",
                         "QDA",
                         "LinearRidge"]


def run_sklearn(X_train, X_test, y_train, y_test, method_name, seed, regression = True):
    if method_name in deterministic_methods: hyper_parameters = {}
    else: hyper_parameters = {"random_state" : seed}   
        
    #rename sklearn classes such that classifier and regressor have the same name
    if regression:
        from sklearn.dummy import DummyRegressor as Intercept
        from sklearn.linear_model import RidgeCV as Ridge
        from sklearn.linear_model import LassoCV as Lasso
        from sklearn.linear_model import ElasticNetCV as Enet
        from sklearn.tree import DecisionTreeRegressor as CART
        from sklearn.ensemble import RandomForestRegressor as RF
        from sklearn.ensemble import ExtraTreesRegressor as XRF
        from sklearn.ensemble import GradientBoostingRegressor as xgb_sklearn
        from sklearn.kernel_ridge import KernelRidge as Kernel
        from sklearn.svm import NuSVR as NuSVM
        from sklearn.neural_network import MLPRegressor as MLP_sklearn
        from pyearth import Earth as MARS
        
    else:                   
        from functools import partial
        from sklearn.calibration import CalibratedClassifierCV
        from sklearn.dummy import DummyClassifier as Intercept
        from sklearn.linear_model import LogisticRegressionCV as LogitCV
        Ridge = partial(LogitCV,penalty = "l2")
        Lasso = partial(LogitCV,penalty = "l1", solver = 'liblinear')
        Enet = partial(LogitCV,penalty = "elasticnet", l1_ratios = [0.5,0.9,.95], solver = 'saga')
        from sklearn.linear_model import RidgeClassifierCV
        from sklearn.utils.extmath import softmax
        class LinearRidge(RidgeClassifierCV):
            def predict_proba(self, X):
                d = self.decision_function(X)
                d_2d = np.c_[-d, d]
                return softmax(d_2d)
        from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
        from sklearn.tree import DecisionTreeClassifier as CART
        from sklearn.tree import ExtraTreeClassifier as XCART
        from sklearn.ensemble import RandomForestClassifier as RF
        from sklearn.ensemble import ExtraTreesClassifier as XRF
        from sklearn.ensemble import BaggingClassifier as Bagging
        from sklearn.ensemble import AdaBoostClassifier as ADABoost
        from sklearn.ensemble import GradientBoostingClassifier as xgb_sklearn
        from sklearn.neural_network import MLPClassifier as MLP_sklearn 
        
    if regression:
        result = eval(method_name)(**hyper_parameters).fit(X_train, y_train).score(X_test, y_test)
        return [result]
    else:
        model = eval(method_name)(**hyper_parameters).fit(X_train,y_train)
        result = model.score(X_test, y_test)
        roc = auc(y_test, model.predict_proba(X_test)[:,1])
        return [result, roc]

## Fastai

In [None]:
#fastai
from fastai.tabular.all import *
import pandas as pd
from scipy.special import expit as logistic_func
def run_fastai(X_train, X_test, y_train, y_test, method_name, seed, regression = True):
    #no simple way to set random state
    #forums.fast.ai/t/solved-reproducibility-where-is-the-randomness-coming-in/31628
    df = pd.DataFrame(X_train)
    df["target"] = y_train
    dls = TabularPandas(df, procs=[],
                       cat_names = [],
                       cont_names = range(len(df.columns)-1),
                       y_names='target',
                       splits=RandomSplitter(valid_pct=0.2)(range_of(df))).dataloaders(bs=64)
    if regression:
        learn = tabular_learner(dls, metrics=rmse)
        learn.cbs = [learn.cbs[0]]
        learn.fit_one_cycle(200)
        result = r2_score(y_test, learn.get_preds(dl=learn.dls.test_dl(pd.DataFrame(X_test)))[0].numpy())
        del learn
        return [result]
    else:
        learn = tabular_learner(dls, metrics=accuracy)
        learn.cbs = [learn.cbs[0]]
        learn.fit_one_cycle(200)
        decision = learn.get_preds(dl=learn.dls.test_dl(pd.DataFrame(X_test)))[0].numpy()
        preds = (decision>0).astype(int)
        result, roc = acc(y_test,preds), auc(y_test, logistic_func(decision))
        return [result, roc]

## GBDT

In [None]:
#xgboost, catboost, lightgbm
def run_xgb(X_train, X_test, y_train, y_test, method_name, seed, regression = True):
    if regression:
        if method_name == "XGBoost":
            from xgboost import XGBRegressor as XGB
            model = XGB(random_state = seed, objective ='reg:squarederror', verbose = False).fit(X_train,y_train)
            result = model.score(X_test, y_test)

        elif method_name == "CAT":
            from catboost import CatBoostRegressor as CAT
            model = CAT(random_seed=seed, logging_level='Silent').fit(X_train,y_train)
            result = model.score(X_test, y_test)
            
        elif method_name == "LGBM":
            from lightgbm.sklearn import LGBMRegressor as LGBM
            model = LGBM(random_state=seed).fit(X_train,y_train)
            result = r2_score(y_test, model.predict(X_test)) 
            
        del model    
        return [result]
    
    else:
        if method_name == "XGBoost":
            from xgboost import XGBClassifier as XGB
            model = XGB(random_state = seed, verbose = False).fit(X_train,y_train)
            result = model.score(X_test, y_test)
            roc = auc(y_test, model.predict_proba(X_test)[:,1])

        elif method_name == "CAT":
            from catboost import CatBoostClassifier as CAT
            model = CAT(random_seed=seed, logging_level='Silent').fit(X_train,y_train)
            result = model.score(X_test, y_test)
            roc = auc(y_test, model.predict_proba(X_test)[:,1])
            
        elif method_name == "LGBM":
            from lightgbm.sklearn import LGBMClassifier as LGBM
            model = LGBM(random_state=seed).fit(X_train,y_train)
            result = acc(y_test, model.predict(X_test))
            roc = auc(y_test, model.predict_proba(X_test)[:,1])
            
        del model
        return [result, roc]

## MLR

In [None]:
#set architectures hyper-parameters
common_parameters = {"max_runtime" : 900, "width":1024}
MLR1_parameters = deepcopy(common_parameters)
MLR1_parameters["depth"] = 1
MLR1_parameters["learning_rate"] = 1e-2
MLR1_parameters["max_iter"] = 200

MLR2_parameters = deepcopy(common_parameters)
MLR2_parameters["depth"] = 2
MLR2_parameters["learning_rate"] = 1e-3
MLR2_parameters["max_iter"] = 200

MLR3_parameters = deepcopy(common_parameters)
MLR3_parameters["depth"] = 3
MLR3_parameters["learning_rate"] = 1e-3 /3
MLR3_parameters["max_iter"] = 400

MLR4_parameters = deepcopy(common_parameters)
MLR4_parameters["depth"] = 4
MLR4_parameters["learning_rate"] = 1e-4
MLR4_parameters["max_iter"] = 400

In [None]:
def run_mlr(X_train, X_test, y_train, y_test, method_name, seed, regression = True):
    if regression:
        model = MLR.MLRNNRegressor(random_state = seed, **eval(method_name+"_parameters")).fit(X_train,y_train)
        result = model.score(X_test, y_test)
        model.delete_model_weights()
        del model
        torch.cuda.empty_cache()
        return [result]
    else:
        model = MLR.MLRNNClassifier(random_state = seed, **eval(method_name+"_parameters")).fit(X_train,y_train)
        result = model.score(X_test, y_test)
        roc = auc(y_test, model.predict_proba(X_test)[:,1])
        model.delete_model_weights()
        del model
        torch.cuda.empty_cache()
        return [result, roc]

## MLR Ensemble

In [None]:
#compute aggregated models simultaneously, based on the same set of predictions
#in our experiments, ensemble_components = ["MLR1","MLR2"]

def get_MLR_prediction(X_train, X_test, y_train, y_test, method_name, seed, regression = True):
    #for one estimator, get the validation score and for each test-set observation the predicted value 
    if regression:
        model = MLR.MLRNNRegressor(random_state = seed, **eval(method_name+"_parameters")).fit(X_train,y_train)
        prediction = model.predict(X_test).reshape(-1)
        
    else:
        model = MLR.MLRNNClassifier(random_state = seed, **eval(method_name+"_parameters")).fit(X_train,y_train)
        prediction = model.predict_proba(X_test)[:,1]
        
    validation_performance = np.max(model.record["validation"])
    model.delete_model_weights()
    del model
    torch.cuda.empty_cache()
    return prediction, validation_performance

def evaluate_MLR_prediction(y_test, prediction, exp_id, dataset_id, seed, method_category, method_name, end_time,output_file, output_repository, regression = True):
    #for an aggregated prediction vector, compute score and write results
    if regression:
        result = r2_score(y_test, prediction) 
        result_line = [exp_id, dataset_id, seed, method_category, method_name, end_time]+[result]
        write_results(result_line, output_file, output_repository, metrics = ["R2"])
    else:
        result = acc(y_test, prediction >= 0.5) 
        roc = auc(y_test, prediction) 
        result_line = [exp_id, dataset_id, seed, method_category, method_name, end_time]+[result, roc]
        write_results(result_line, output_file, output_repository, metrics = ["ACC","AUC"])
    
def run_ensemble(ensemble_components, 
                            datasets, 
                            input_name, 
                            input_repository, 
                            output_file, 
                            output_repository, 
                            seeds = 10, 
                            bagging_reps = 10,
                            top_valid_cut = 5,
                            regression = True):
    method_category = "MLR"
    for dataset_id in datasets:
        for seed in range(seeds):
            X_train, X_test, y_train, y_test = get_dataset(dataset_id, input_name, input_repository, seed = seed)
            predictions = {}
            validation_performances = {}
            for method_name in ensemble_components:
                predictions[method_name] = []
                validation_performances[method_name] = []
                exp_id = str(dataset_id)+'_'+str(seed)+"_"+str(method_category)+"_"+str(method_name)

                start_time = time.time()
                for rep in range(bagging_reps):
                    prediction, validation_performance = get_MLR_prediction(X_train, X_test, y_train, y_test, method_name, rep + seed * bagging_reps, regression = True)
                    predictions[method_name].append(prediction)
                    validation_performances[method_name].append(validation_performance)

                #Mean performance accross several models
                if regression:
                    result = np.mean([r2_score(y_test, pred) for pred in predictions[method_name]])
                    end_time = (time.time()-start_time)/bagging_reps
                    result_line = [exp_id, dataset_id, seed, method_category, method_name, end_time]+[result]
                    write_results(result_line, output_file, output_repository, metrics = ["R2"])
                else:
                    result = np.mean([acc(y_test, pred >= 0.5) for pred in predictions[method_name]])
                    roc = np.mean([auc(y_test, pred) for pred in predictions[method_name]])
                    end_time = (time.time()-start_time)/bagging_reps
                    result_line = [exp_id, dataset_id, seed, method_category, method_name, end_time]+[result, roc]
                    write_results(result_line, output_file, output_repository, metrics = ["ACC","AUC"])

                #Bagging aggregation
                prediction = np.mean(predictions[method_name], axis = 0)
                method_name = "Bagging_" + method_name
                end_time = 0
                exp_id = str(dataset_id)+'_'+str(seed)+"_"+str(method_category)+"_"+str(method_name)
                evaluate_MLR_prediction(y_test, prediction, exp_id, dataset_id, seed, method_category, method_name, end_time,output_file, output_repository, regression = regression)

            #Ensemble aggregation
            prediction = np.mean([np.mean(predictions[method_name], axis = 0) for method_name in ensemble_components], axis = 0)
            method_name = "ensemble"
            end_time = 0
            exp_id = str(dataset_id)+'_'+str(seed)+"_"+str(method_category)+"_"+str(method_name)
            evaluate_MLR_prediction(y_test, prediction, exp_id, dataset_id, seed, method_category, method_name, end_time,output_file, output_repository, regression = regression)

            #Valid model selection
            predictions = [prediction for method_name in ensemble_components for prediction in predictions[method_name]]
            validation_performances = [validation_performance for method_name in ensemble_components for validation_performance in validation_performances[method_name]]
            top_valid_performances = np.argsort(validation_performances)[-top_valid_cut:]

            #Best model
            method_name = "Best-MLR"  
            end_time = 0
            exp_id = str(dataset_id)+'_'+str(seed)+"_"+str(method_category)+"_"+str(method_name)
            prediction = predictions[top_valid_performances[-1]]
            evaluate_MLR_prediction(y_test, prediction, exp_id, dataset_id, seed, method_category, method_name, end_time,output_file, output_repository, regression = regression)

            #Top top_valid_cut models
            method_name = "Top"+str(top_valid_cut)+"-MLR"  
            end_time = 0
            exp_id = str(dataset_id)+'_'+str(seed)+"_"+str(method_category)+"_"+str(method_name)
            prediction = np.mean([predictions[index] for index in top_valid_performances], axis = 0)
            evaluate_MLR_prediction(y_test, prediction, exp_id, dataset_id, seed, method_category, method_name, end_time,output_file, output_repository, regression = regression)


# Run experiments

## Benchmark

### Regression

In [None]:
if run_regression_benchmark:
    task_name = "regression"
    regression = True

    run_experiment(regressor_methods, benchmark_datasets, task_name, input_repository, regression_benchmark_output_file, output_repository, regression = True, seeds = benchmark_seeds)
    run_ensemble(ensemble_components, benchmark_datasets, task_name, input_repository, regression_benchmark_output_file, output_repository, regression = True, seeds = benchmark_seeds, bagging_reps = benchmark_bagging_reps, top_valid_cut = benchmark_top_valid_cut)

### Classification

In [None]:
if run_classification_benchmark:
    task_name = "classification"
    regression = False

    run_experiment(classifier_methods, benchmark_datasets, task_name, input_repository, classification_benchmark_output_file, output_repository, regression = False, seeds = benchmark_seeds)
    run_ensemble(ensemble_components,  benchmark_datasets, task_name, input_repository, classification_benchmark_output_file, output_repository, regression = False, seeds = benchmark_seeds, bagging_reps = benchmark_bagging_reps, top_valid_cut = benchmark_top_valid_cut)

## Ablation study

In [None]:
NN2_parameters = deepcopy(MLR2_parameters)
NN2_parameters["ridge_init"] = False
NN2_parameters["n_permut"] = False
NN2_parameters["target_rotation_scale"] = False

NN2_Ridge_parameters = deepcopy(MLR2_parameters)
NN2_Ridge_parameters["ridge_init"] = "max_variation"
NN2_Ridge_parameters["n_permut"] = False
NN2_Ridge_parameters["target_rotation_scale"] = False

NN2_Ridge_SD_parameters = deepcopy(MLR2_parameters)
NN2_Ridge_SD_parameters["ridge_init"] = "max_variation"
NN2_Ridge_SD_parameters["n_permut"] = False
NN2_Ridge_SD_parameters["target_rotation_scale"] = 0.5

NN2_Ridge_Permut_parameters = deepcopy(MLR2_parameters)
NN2_Ridge_Permut_parameters["ridge_init"] = "max_variation"
NN2_Ridge_Permut_parameters["n_permut"] = 16
NN2_Ridge_Permut_parameters["target_rotation_scale"] = False
ablation_compared_architectures = ["NN2", "NN2_Ridge", "NN2_Ridge_Permut", "NN2_Ridge_SD", "MLR2"]

In [None]:
if run_ablation_study:
    ablation_bagging_reps = 10
    ablation_top_valid_cut = 2 #performs useless but costless aggregation between the different architectures (discarded when processing results)

    task_name = "regression"
    regression = task_name == "regression"

    run_ensemble(ablation_compared_architectures, study_datasets, task_name, input_repository, ablation_study_output_file, output_repository, seeds = study_seeds, bagging_reps = ablation_bagging_reps, top_valid_cut = ablation_top_valid_cut, regression = regression)

## Dependance study

In [None]:
def run_dependance_mlr(method_name, parameter_name, values, datasets, input_name, input_repository, output_file, output_repository, seeds = 10):
    method_category = "MLR"
    for dataset_id in datasets:
        for seed in range(seeds):
            X_train, X_test, y_train, y_test = get_dataset(dataset_id, input_name, input_repository, train_size = 0.8, seed = False)    
            for value in values:
                exp_id = str(dataset_id)+'_'+str(seed)+"_"+str(method_category)+"_"+str(method_name)+"_"+str(parameter_name)
                parameters = deepcopy(eval(method_name+"_parameters"))
                parameters.update({parameter_name : value})
                
                start_time = time.time()
                model = MLR.MLRNNRegressor(random_state = seed, **parameters).fit(X_train, y_train)
                result = model.score(X_test, y_test)
                best_iter = model.best_iter
                valid_max = model.record["validation"][model.best_iter]
                lambda_init = model.record["lambda"][0]
                model.delete_model_weights()
                del model
                torch.cuda.empty_cache()
                end_time = time.time() - start_time
                
                result_line = [exp_id, dataset_id, seed, method_category, method_name, end_time] + [parameter_name, value, result, best_iter, valid_max, lambda_init]
                write_results(result_line, output_file, output_repository, metrics = ["parameter_name", "value", "R2", "best_iter", "valid_max", "lambda_init"])

In [None]:
if run_dependance_study:
    dependance_loops = { "target_rotation_scale" : [0.,1e-1,0.5,1,1.5], #the actual value is twice this (for legacy code reasons)
    "n_permut" : [0,1,2,4,16,256,1024],
    "ridge_init" : [1e-3,1e-1,1e1,1e3,1e5,1e7,1e9, "max_variation"],
    "label_noise_scale" : [0.,1e-2, 1e-2*3, 1e-1,1e-1*3],
    "width": [16,64,256,1024,4096]}
    task_name = "regression"
    regression = task_name == "regression"
    method_name = "MLR2"

    for parameter_name, values in dependance_loops.items():
        run_dependance_mlr(method_name, parameter_name, values, study_datasets, task_name, input_repository, dependance_study_output_file, output_repository, seeds = study_seeds)

## Batch size dependance

In [None]:
def run_dependance_batchsize_mlr(method_name, values, datasets, input_name, input_repository, output_file, output_repository, seeds = 10):
    method_category = "MLR"
    parameter_name = "batch_size"
    for dataset_id in datasets:
        for seed in range(seeds):
            X_train, X_test, y_train, y_test = get_dataset(dataset_id, input_name, input_repository, train_size = 0.8, seed = False)    
            n = X_train.shape[0]
            if n < np.max(values):
                dataset_values = [value for value in values if value < n] + [n]
            else: 
                dataset_values = [value for value in values]
            for value in values:
                exp_id = str(dataset_id)+'_'+str(seed)+"_"+str(method_category)+"_"+str(method_name)+"_"+str(parameter_name)
                parameters = deepcopy(eval(method_name+"_parameters"))
                parameters.update({parameter_name : value})
                
                start_time = time.time()
                model = MLR.MLRNNRegressor(random_state = seed, **parameters).fit(X_train, y_train)
                result = model.score(X_test, y_test)
                best_iter = model.best_iter
                valid_max = model.record["validation"][model.best_iter]
                lambda_init = model.record["lambda"][0]
                model.delete_model_weights()
                del model
                torch.cuda.empty_cache()
                end_time = time.time() - start_time
                
                result_line = [exp_id, dataset_id, seed, method_category, method_name, end_time] + [parameter_name, value, result, best_iter, valid_max, lambda_init]
                write_results(result_line, output_file, output_repository, metrics = ["parameter_name", "value", "R2", "best_iter", "valid_max", "lambda_init", ])

In [None]:
if run_dependance_study:
    task_name = "regression"
    regression = task_name == "regression"
    method_name = "MLR2"
    values = [1,16,32,64,128,256,512,1024,2048,4096,8192, 16384]
    run_dependance_batchsize_mlr(method_name, values, study_datasets, task_name, input_repository, dependance_study_output_file, output_repository, seeds = study_seeds)

# Print results

In [None]:
import pandas as pd
class_dic = {'fastai':"NN",
    "MLP_sklearn":"NN", 
    'MLR1':"MLR", 
    'Bagging_MLR1':"MLR", 
    'MLR2':"MLR", 
    'Bagging_MLR2':"MLR",
    'ensemble':"MLR", 
    'Best-MLR':"MLR", 
    'Top5-MLR':"MLR", 
    'MLR3':"MLR", 
    'MLR4':"MLR",
    'XGBoost':"GBDT", 
    'CAT':"GBDT", 
    'LGBM':"GBDT",
    'Ridge':"LM", 
    'Lasso':"LM", 
    'Enet':"LM",
    'LinearRidge':"LM", 
    'QDA':"QDA", 
    'CART':"TREE", 
    'XCART':"TREE", 
    'RF':"RF", 
    'XRF':"RF", 
    'Bagging':"RF",
    'ADABoost':"GBDT", 
    'xgb_sklearn':"GBDT", 
    'Intercept':"Baseline", 
    'MARS':"MARS", 
    "NuSVM":"SVM",
    "Kernel":"SVM"}


def get_result_table(result_file, metric, reference = "ensemble"):
    q_values = [0.90,0.95,0.98]
    kept_columns = ["class"] + [metric+col for col in ["","_std", "_rank","_rank_std","_PMA","_PMA_std"]]+[metric+"_P"+str(q) for q in q_values]
    
    df2 = pd.read_csv(result_file)
    df2["class"] = [class_dic[method] for method in df2["method"].values]
    
    #Compute PMA
    df_max = df2.groupby(["dataset","seed"]).max()
    df2.set_index(["dataset","seed"],inplace = True)
    df2[metric+"_max"] = df_max[metric]
    df2.reset_index(inplace = True)
    df2[metric+"_PMA"] = df2[metric]/df2[metric+"_max"]

    #Compute P90, P95, P98
    df_p = df2.groupby(["dataset","seed","class"]).max().reset_index()
    for q in q_values:
        df_p[metric+"_P"+str(q)] = (df_p[metric+"_PMA"] > q).astype(int)
    df_p = df_p.groupby('class').mean()

    #Use ensemble(MLR1+MLR2) results accross all seeds as a baseline to mesure standard deviation for all methods
    ensemble_ref = df2[df2["method"]==reference].set_index(["dataset","seed"])
    df2.set_index(["dataset","seed"], inplace = True)
    df2[metric+"_ref"] = ensemble_ref[metric]
    df2[metric+"_std"] = df2[metric].values-df2[metric+"_ref"].values
    df2.reset_index(inplace = True)
    df_mean_seed = df2.groupby(["dataset","method"]).mean().reset_index()
    df_mean_seed["class"] = [class_dic[method] for method in df_mean_seed["method"].values]
    df_mean_seed.sort_values(["dataset","class",metric],inplace = True)
    df_mean_seed_max_class = df_mean_seed.groupby(["dataset","class"]).last().reset_index()
    df_mean_seed_max_class.set_index("method",inplace = True)
    df_mean_seed_max_class[metric + "_std"] = df2.groupby(["method"]).std()[metric+"_std"]
    df_mean_seed_max_class[metric+"_PMA"] = df2.groupby("method").mean()[metric+"_PMA"]
    df_mean_seed_max_class[metric+"_PMA_std"] = df2.groupby("method").std()[metric+"_PMA"]
    df_mean_seed_max_class.reset_index(inplace = True)
    df_mean_seed_max_class_mean_ds = df_mean_seed_max_class.groupby(["class"]).mean().reset_index()
    
    #Compute Friedman Rank
    df_rank = df2.groupby(["dataset","seed","class"]).max().reset_index().sort_values(["dataset","seed",metric],ascending = False)
    df_rank[metric+"_rank"]= np.arange(len(df_rank))%len(df_rank["class"].unique()) +1
    df_mean_seed_max_class_mean_ds.set_index("class", inplace = True)
    df_mean_seed_max_class_mean_ds[metric+"_rank"] = df_rank.groupby("class").mean()[metric+"_rank"]
    df_mean_seed_max_class_mean_ds[metric+"_rank_std"] = df_rank.groupby("class").std()[metric+"_rank"]
    for q in q_values:
        df_mean_seed_max_class_mean_ds[metric+"_P"+str(q)] = df_p[metric+"_P"+str(q)]
    df_mean_seed_max_class_mean_ds.reset_index(inplace = True)
    
    #return results with only usefull columns
    return df_mean_seed_max_class_mean_ds[kept_columns]

## Regression Benchmark results

In [None]:
if run_regression_benchmark:
    metric = "R2"
    result_file = regression_benchmark_output_file #"regression_benchmark.csv"
    output_file = "processed_"+metric+"_"+result_file
    get_result_table(output_repository+result_file, metric).to_csv(output_repository+output_file)

## Classification Benchmark results

In [None]:
if run_classification_benchmark:
    metric = "ACC"
    result_file = classification_benchmark_output_file #"classification_benchmark.csv"
    output_file = "processed_"+metric+"_"+result_file
    get_result_table(output_repository+result_file, metric).to_csv(output_repository+output_file)

In [None]:
if run_classification_benchmark:
    metric = "AUC"
    result_file = classification_benchmark_output_file #"classification_benchmark.csv"
    output_file = "processed_"+metric+"_"+result_file
    get_result_table(output_repository+result_file, metric).to_csv(output_repository+output_file)

## Ablation results

In [None]:
def process_ablation_results(result_file, metric, reference = 'Bagging_MLR2'):
    df = pd.read_csv(result_file)
    
    #keep only single and bagging estimators (i.e. no ensemble)
    kept_methods = [prefix + method_name  for method_name in ["NN2", "NN2_Ridge", "NN2_Ridge_Permut", "NN2_Ridge_SD", "MLR2"] for prefix in ["", "Bagging_"]]
    df = df[np.isin(df["method"].values,kept_methods)]
    
    #use MLR2_Bagging as a reference to compute result variation accross all seeds
    df.set_index(["dataset","seed"], inplace=True)
    df["ref"] = df[df["method"] == reference][metric]
    df.reset_index(inplace=True)
    df['std'] = df[metric] - df["ref"]
    
    #Average accross all seeds
    mean_df = df.groupby(["method"]).mean()
    std_df = df.groupby(["method"]).std()
    mean_df["std"] = std_df["std"]
    return mean_df.reset_index()[["method", metric, "std"]]

In [None]:
if run_ablation_study:
    result_file = ablation_study_output_file #"ablation_study.csv"
    output_file = "processed_" + result_file
    metric = "R2"
    process_ablation_results(output_repository+result_file, metric).to_csv(output_repository+output_file)

## Dependance results

In [None]:
if run_dependance_study:
    result_file = dependance_study_output_file #"dependance_study.csv"
    output_file = "processed_" + result_file
    kept_columns = ["R2", "time", "best_iter", "valid_max", "lambda_init"]
    file = pd.read_csv(output_repository+result_file,delimiter = ",").groupby(["parameter_name","dataset","value"]).mean()[kept_columns].to_csv(output_repository+output_file)