# Experiment 4 on real datasets.
**Authors:**
* José Ángel Martín-Baos
* Julio Alberto López-Gomez
* Luis Rodríguez-Benítez
* Tim Hillel
* Ricardo García-Ródenas

## Imports and function definitions

In [1]:
%load_ext autoreload
%autoreload 2
## Import packages
import pandas as pd  # For file input/output
from scipy import optimize
from scipy.optimize._numdiff import approx_derivative
import sys

# append a new directory to sys.path
sys.path.append('../rumboost and RUMs/')

import time
import numpy as np
import pickle
import copy
import gc
import matplotlib.pyplot as plt
#import shap
from rumbooster import rum_train
from utils import bio_to_rumboost
from datasets import load_preprocess_LPMC, load_preprocess_SwissMetro
from models import LPMC, LPMC_normalised, LPMC_nested_normalised, SwissMetro, SwissMetro_normalised
from benchmarks import return_dataset, prepare_model, estimate_models, prepare_labels, predict_test, predict_proba
import lightgbm as lgb
import torch

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, recall_score, accuracy_score

from sklearn.model_selection import StratifiedShuffleSplit

# Load common functions for the experiments
from expermients_functions import *

In [2]:
## Import the Classification models
# from Models.MNL import MNL
# from Models.SVM import SVM
# from Models.RandomForest import RandomForest
from Models.LightGBM import LightGBM
from Models.NN import NN
from Models.DNN import DNN
from Models.ResLogit import ResLogit
#from Models.CNN import CNN
# from Models.ResNet import ResNet




In [3]:
# Customize matplotlib
tex_fonts = {
    # Use LaTeX to write all text
    "text.usetex": True, 
    "font.family": "serif",
    # Use 14pt font in plots, to match 10pt font in document
    "axes.labelsize": 14,
    "font.size": 14,
    # Make the legend/label fonts a little smaller
    "legend.fontsize": 12,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12
}

plt.rcParams.update(tex_fonts)

## Experiment initialization

### Experiment parameters

In [4]:
## Experiment parameters
data_dir = "../Data/Datasets/preprocessed/"
adjusted_hyperparms_dir = "../Data/adjusted-hyperparameters/"
train_suffix = "_train.csv"
test_suffix = "_test.csv"
hyperparameters_suffix = "_hyperparameters"
reset_crossval_indices = 0 # Set to 0 for reproducibility of the experiment over multiple executions
partial_results_dir = "../Data/Results-RealDatasets/"

recompute_Experiment_4 = True

rounding = 2

CV = 5 # Number of cross-validation
n_iter = 20 #  Number of iterations used on the random search
average_tech = "macro" #"micro"

hyperparameters_suffix = hyperparameters_suffix +'_'+ str(n_iter) + '.csv'

model_type_to_class = {"LightGBM": LightGBM,
                       "NN": NN,
                       "DNN": DNN,
                       "ResLogit": "ResLogit"
                      }

STATIC_PARAMS = {'n_jobs': -1}

### Load the data

In [63]:
datasets = {"LPMC": {
                  "name": "LPMC",
                  "mode_var": "travel_mode",
                  "individual_id": "household_id",
                  "scaled_fetures": ['day_of_week', 'start_time_linear', 'age', 'car_ownership',
                                     'distance', 'dur_walking', 'dur_cycling', 'dur_pt_access', 'dur_pt_rail',
                                     'dur_pt_bus', 'dur_pt_int_waiting', 'dur_pt_int_walking', 'pt_n_interchanges',
                                     'dur_driving', 'cost_transit', 'cost_driving_fuel'],
                  "alt_names": ["Walk", "Bike", "Public transport", "Car"]
                }
}

In [64]:
## Load the data
def load_data(dataset_id, dataset):
    train = pd.read_csv(data_dir + dataset_id + train_suffix, sep=',')
    final_test = pd.read_csv(data_dir + dataset_id + test_suffix, sep=',')

    # Divide the dataset into charasteristics and target variable
    X = train.loc[:, train.columns != dataset["mode_var"]]
    y = train[dataset["mode_var"]]
    final_test_X = final_test.loc[:, final_test.columns != dataset["mode_var"]]
    final_test_y = final_test[dataset["mode_var"]]

    alts = list(y.unique()) # List containing al the modes (alternatives) in the dataset

    # Extract the individual ID to later group observations using it
    groups = np.array(X[dataset["individual_id"]].values)
    X = X.drop(columns=dataset["individual_id"])
    final_test_X = final_test_X.drop(columns=dataset["individual_id"])

    # Load the hyperparameters
    try:
        adjusted_hyperparameters_file = pd.read_csv(adjusted_hyperparms_dir + dataset_id + hyperparameters_suffix , index_col=0)
        hyperparameters = adjusted_hyperparameters_file.to_dict()
    except (OSError, IOError) as e:
        print("Error while loading best_hyperparameters for dataset {} - {}...".format(dataset_id, n_iter))
        pass

    return (X, y, final_test_X, final_test_y, alts, groups, hyperparameters)

In [65]:
## Pre-compute all the data for the experiments
def pre_compute_data(X, v1_name, v2_name, d_1, d_2):
    # Obtain delta 
    X_std = np.std(X[[v1_name, v2_name]]).to_numpy()
    v1_delta = d_1 * X_std[0]
    v2_delta = d_2 * X_std[1]

    # Obtain pre-computed datasets for numerical differentiation 
    # Numerical differentiation for v_1
    X_v1_minus_d, X_v1_plus_d = X.copy(), X.copy()
    X_v1_minus_d[v1_name] = X_v1_minus_d[v1_name] - v1_delta
    X_v1_plus_d[v1_name] = X_v1_plus_d[v1_name] + v1_delta

    # Numerical differentiation for v_2
    X_v2_minus_d, X_v2_plus_d = X.copy(), X.copy()
    X_v2_minus_d[v2_name] = X_v2_minus_d[v2_name] - v2_delta
    X_v2_plus_d[v2_name] = X_v2_plus_d[v2_name] + v2_delta

    return (X_v1_minus_d, X_v1_plus_d, X_v2_minus_d, X_v2_plus_d)

In [66]:
def alt_spec_data(dataset):

    dataset['zeros'] = np.zeros_like(dataset['distance'])

    dataset_alt_spec = np.stack(
        [dataset[['distance',  'zeros',    'zeros',    'zeros',    'dur_walking',  'zeros',        'zeros',        'zeros',        'zeros',        'zeros',        'zeros',                'zeros',                'zeros',            'zeros',        'zeros',            'zeros',            'zeros',                    'age', 'female', 'start_time_linear', 'day_of_week', 'car_ownership', 'driving_license', 'purpose_B', 'purpose_HBE', 'purpose_HBO', 'purpose_HBW', 'purpose_NHBO', 'fueltype_Average', 'fueltype_Diesel', 'fueltype_Hybrid', 'fueltype_Petrol']].values,
        dataset[['zeros',      'distance', 'zeros',    'zeros',    'zeros',        'dur_cycling',  'zeros',        'zeros',        'zeros',        'zeros',        'zeros',                'zeros',                'zeros',            'zeros',        'zeros',            'zeros',            'zeros',                    'age', 'female', 'start_time_linear', 'day_of_week', 'car_ownership', 'driving_license', 'purpose_B', 'purpose_HBE', 'purpose_HBO', 'purpose_HBW', 'purpose_NHBO', 'fueltype_Average', 'fueltype_Diesel', 'fueltype_Hybrid', 'fueltype_Petrol']].values,
        dataset[['zeros',      'zeros',    'distance', 'zeros',    'zeros',        'zeros',        'dur_pt_access','zeros',        'dur_pt_rail',  'dur_pt_bus',   'dur_pt_int_waiting',   'dur_pt_int_walking',   'pt_n_interchanges','cost_transit', 'zeros',            'zeros',            'zeros',                    'age', 'female', 'start_time_linear', 'day_of_week', 'car_ownership', 'driving_license', 'purpose_B', 'purpose_HBE', 'purpose_HBO', 'purpose_HBW', 'purpose_NHBO', 'fueltype_Average', 'fueltype_Diesel', 'fueltype_Hybrid', 'fueltype_Petrol']].values,
        dataset[['zeros',      'zeros',    'zeros',    'distance', 'zeros',        'zeros',        'zeros',        'dur_driving',  'zeros',        'zeros',        'zeros',                'zeros',                'zeros',            'zeros',        'cost_driving_fuel','congestion_charge','driving_traffic_percent',  'age', 'female', 'start_time_linear', 'day_of_week', 'car_ownership', 'driving_license', 'purpose_B', 'purpose_HBE', 'purpose_HBO', 'purpose_HBW', 'purpose_NHBO', 'fueltype_Average', 'fueltype_Diesel', 'fueltype_Hybrid', 'fueltype_Petrol']].values]
        )
    
    dataset_alt_spec = np.swapaxes(dataset_alt_spec, 0, 2)
    dataset_alt_spec = np.swapaxes(dataset_alt_spec, 0, 1)

    return dataset_alt_spec

In [67]:
## Create the classifier
def create_classifier(classifier, hyperparameters, dataset, X, y, for_CV=False):
    clf_hyperparameters = copy.deepcopy(hyperparameters)
    integer_params = ['max_bin','min_data_in_leaf','num_leaves','num_iterations','hidden_layer_sizes', 'epochs', 'batch_size']
    float_params = ['bagging_fraction','feature_fraction','lambda_l1','lambda_l2','min_gain_to_split','min_sum_hessian_in_leaf']
    choice_params = {"learning_rate": ["adaptive"], # NN
                    "max_iter": [10000000], # NN
                    "tol": [1e-3], # NN
                    "input_dim": [X.shape[1]], # DNN, CNN, ResNet
                    "output_dim": [y.nunique()], # DNN, CNN, ResNet
                    #"depth": [2,3,4,5,6,7,8,9,10], # DNN
                    "depth": [4, 8, 16, 32],
                    #"drop": hyperopt.hp.choice('drop', [0.5, 0.3, 0.1]),
                    # TODO: Consider adding the activation functions for the hidden layers (thanh, ReLU, LeakyReLU, etc.)
                    "epochs": [200],
                    "width": [25,50,100,150,200], # DNN, ResNet
                    "drop": [0.1, 0.01, 1e-5], # DNN, ResNet
                    "activation": ["tanh"], # NN
                    "solver": ["lbfgs","sgd","adam"], # NN
                    "batch_size": [64,128,256], # NN, DNN, CNN,
                    "bagging_freq": [1, 5, 10]
                    }

    static_params = copy.deepcopy(STATIC_PARAMS)
    
    for k in list(clf_hyperparameters[classifier].keys()):
        if k.startswith('_'):
            del clf_hyperparameters[classifier][k]
            continue
        if np.isnan(clf_hyperparameters[classifier][k]):
            del clf_hyperparameters[classifier][k]
            continue
        if k in integer_params:
            clf_hyperparameters[classifier][k] = int(clf_hyperparameters[classifier][k])
        if k in float_params:
            clf_hyperparameters[classifier][k] = clf_hyperparameters[classifier][k]
        if k in choice_params.keys():
            clf_hyperparameters[classifier][k] = choice_params[k][int(clf_hyperparameters[classifier][k])]

    params = {**clf_hyperparameters[classifier], **static_params}
    if classifier not in ["RUMBoost", "ResLogit"]:
        base_clf = model_type_to_class[classifier](**params)
    
        return base_clf
    
    return params

## Experiment 4.1: Which is the best model?

In [68]:
## Construct Experiment 4 - Accuracy, GMPCA and Time Tables
def construct_experiment_4_accuracy_table(Experiment_4_CV_scores, Experiment_4_test_scores):
    columns = ["Accuracy", "GMPCA"]

    # Compute the mean of all the stored results for all the models and construct the final table
    train_scores_df = {}
    test_scores_df = {}
    time_scores_df = {}

    Experiment_4_CV_scores_mean = copy.deepcopy(Experiment_4_CV_scores)
    Experiment_4_test_scores_round = copy.deepcopy(Experiment_4_test_scores)
    for k_clf in model_type_to_class.keys():
        for k_dataset in Experiment_4_CV_scores_mean[k_clf].keys():
            for k_score in Experiment_4_CV_scores_mean[k_clf][k_dataset].keys():
                if k_score in columns + ['Estimation time']:
                    Experiment_4_CV_scores_mean[k_clf][k_dataset][k_score] = np.round(np.mean(Experiment_4_CV_scores_mean[k_clf][k_dataset][k_score]), rounding)
                    Experiment_4_test_scores_round[k_clf][k_dataset][k_score] = np.round(Experiment_4_test_scores_round[k_clf][k_dataset][k_score], rounding)
        
        train_scores_df[k_clf] = pd.DataFrame(Experiment_4_CV_scores_mean[k_clf]).T[columns]
        test_scores_df[k_clf] = pd.DataFrame(Experiment_4_test_scores_round[k_clf]).T[columns]
        time_scores_df[k_clf] = pd.DataFrame(Experiment_4_CV_scores_mean[k_clf]).T['Estimation time']
        
    Experiment_4_CV_table = pd.concat(train_scores_df, axis=1)
    Experiment_4_test_table = pd.concat(test_scores_df, axis=1)
    Experiment_4_time_table = pd.concat(time_scores_df, axis=1)

    return (Experiment_4_CV_table, Experiment_4_test_table, Experiment_4_time_table)

## Execute the experiment

In [69]:
## Execute experiments

## Initialize dictionaries to store partial results
# Load the previous experiment data (deserialize)
try:
    with open(partial_results_dir + '/Experiment_4_CV_scores.pickle', 'rb') as handle:
        Experiment_4_CV_scores = pickle.load(handle)
except:
    Experiment_4_CV_scores = {}
try:
    with open(partial_results_dir + '/Experiment_4_test_scores.pickle', 'rb') as handle:
        Experiment_4_test_scores = pickle.load(handle)
except:
    Experiment_4_test_scores = {}

n_epochs = 0

for dataset_id, dataset in datasets.items():
    dataset_name = dataset["name"]
    print("\n--- {} (ID: {})".format(dataset_name, dataset_id))

    # Load the data and the hyperparameters
    X, y, final_test_X, final_test_y, alts, groups, hyperparameters = load_data(dataset_id, dataset)

    # Obtain datasets for K-Fold cross validation (the same fold splits are used across all the iterations for all models)
    train_indices = []
    test_indices = []
    crossval_pickle_file = data_dir+dataset_id+"_crossval.pickle"
    try:
        train_indices, test_indices = pickle.load(open(crossval_pickle_file, "rb"))
        if reset_crossval_indices == 1: # Reset the indices
            raise FileNotFoundError
    except (OSError, IOError) as e:
        print("Recomputing Cross-val indices...")
        for (train_index, test_index) in stratified_group_k_fold(X, y, groups, k=CV, seed=1):
            train_indices.append(train_index)
            test_indices.append(test_index)
        pickle.dump([train_indices, test_indices], open(crossval_pickle_file, "wb"))


    # Get results for the selected classifier
    for classifier in model_type_to_class.keys():
        print("\n\t--- {}".format(classifier))
        sys.stdout.flush()
        it_time_init = time.perf_counter()

        # Create dictionary to store the results
        if not classifier in Experiment_4_CV_scores.keys():
            Experiment_4_CV_scores[classifier] = {}
        if not classifier in Experiment_4_test_scores.keys():
            Experiment_4_test_scores[classifier] = {}

        if recompute_Experiment_4==True or not (dataset_name in Experiment_4_CV_scores[classifier].keys()) or not (dataset_name in Experiment_4_test_scores[classifier].keys()):
            # Create dictionary to store the results
            Experiment_4_CV_scores[classifier][dataset_name] = {}
            Experiment_4_CV_scores[classifier][dataset_name]['Accuracy'] = []
            Experiment_4_CV_scores[classifier][dataset_name]['F1'] = []
            Experiment_4_CV_scores[classifier][dataset_name]['Recall'] = []
            Experiment_4_CV_scores[classifier][dataset_name]['GMPCA'] = []
            Experiment_4_CV_scores[classifier][dataset_name]['Estimation time'] = []
            Experiment_4_test_scores[classifier][dataset_name] = {}

            ## Applying k-Fold Cross Validation over training set
            for iteration in range(0, len(train_indices)):
                print("\t\t CV it: {}".format(iteration))
                sys.stdout.flush()

                # Create the classifier
                if classifier == "ResLogit":
                    params = create_classifier(classifier, hyperparameters, dataset, X, y, for_CV=True)
                else:
                    clf = create_classifier(classifier, hyperparameters, dataset, X, y, for_CV=True)

                # Obtain training and testing data for this iteration (split of de k-Fold)
                X_train, X_test = X.loc[train_indices[iteration]], X.loc[test_indices[iteration]]
                y_train, y_test = y.loc[train_indices[iteration]], y.loc[test_indices[iteration]]

                # Scale the data
                scaler = StandardScaler()
                scaler.fit(X_train[dataset["scaled_fetures"]])
                X_train.loc[:, dataset["scaled_fetures"]] = scaler.transform(X_train[dataset["scaled_fetures"]])
                X_test.loc[:, dataset["scaled_fetures"]] = scaler.transform(X_test[dataset["scaled_fetures"]])

                # Balance dataset
                #X_train, y_train = balance(X_train, y_train, X_train.shape[0], len(dataset["alt_names"]))
                
                time_ini = time.perf_counter()
                if classifier == "ResLogit":
                    X_train_res = alt_spec_data(X_train)
                    X_test_res = alt_spec_data(X_test)
                    
                    clf = ResLogit(X_train_res, y_train, X_train_res.shape[1], params["output_dim"], n_layers=params['depth'], batch_size=params['batch_size'], epochs=params['epochs'], device=torch.device('cpu'))
                    #clf = ResLogit(X_train, y_train, params["input_dim"], params["output_dim"], n_layers=4, batch_size=264, epochs=params['epochs'], device=torch.device('cuda'))
                    _, epochs, _ = clf.train(X_train_res, y_train, X_test_res, y_test, valid_iter=1)
                    proba = clf.predict_validate(X_test_res)
                    n_epochs += epochs
                    
                    #proba = np.where(proba < 1e-5, 1e-5, proba)
                    y_score = np.argmax(proba, axis=1)
                else:
                    clf.fit(X_train, y_train)
                    y_score = clf.predict(X_test)
                    proba = clf.predict_proba(X_test)
                elapsed_time = time.perf_counter() - time_ini

                

                # Compute the accuracy results
                Experiment_4_CV_scores[classifier][dataset_name]['Accuracy'] = np.append(Experiment_4_CV_scores[classifier][dataset_name]['Accuracy'], accuracy_score(y_test, y_score)*100)
                Experiment_4_CV_scores[classifier][dataset_name]['F1'] = np.append(Experiment_4_CV_scores[classifier][dataset_name]['F1'], f1_score(y_test, y_score, average=average_tech)*100)
                Experiment_4_CV_scores[classifier][dataset_name]['Recall'] = np.append(Experiment_4_CV_scores[classifier][dataset_name]['Recall'], recall_score(y_test, y_score, average=average_tech)*100)
                Experiment_4_CV_scores[classifier][dataset_name]['GMPCA'] = np.append(Experiment_4_CV_scores[classifier][dataset_name]['GMPCA'], GMPCA(proba, y_test.values)*100)
                Experiment_4_CV_scores[classifier][dataset_name]['Estimation time'] = np.append(Experiment_4_CV_scores[classifier][dataset_name]['Estimation time'], elapsed_time)

                del clf
                gc.collect()

            ## Out-of-sample results
            # Create the classifier
            if classifier == "ResLogit":
                params = create_classifier(classifier, hyperparameters, dataset, X, y)
                n_epochs = n_epochs // CV
            else:
                clf = create_classifier(classifier, hyperparameters, dataset, X, y)
            fitted = True

            # Scale the data
            scaler = StandardScaler()
            scaler.fit(X[dataset["scaled_fetures"]])
            X_scaled = X.copy()
            final_test_X_scaled = final_test_X.copy()
            X_scaled.loc[:, dataset["scaled_fetures"]] = scaler.transform(X_scaled[dataset["scaled_fetures"]])
            final_test_X_scaled.loc[:, dataset["scaled_fetures"]] = scaler.transform(final_test_X_scaled[dataset["scaled_fetures"]])

            # Balance dataset
            #X_scaled_balanced, y_balanced = balance(X_scaled, y, X_scaled.shape[0], len(dataset["alt_names"]))

            # Fit the classifier on training set
            time_ini = time.perf_counter()
            if classifier == "ResLogit":
                X_scaled_res = alt_spec_data(X_scaled)
                final_test_X_scaled_res = alt_spec_data(final_test_X_scaled)
                clf = ResLogit(X_scaled_res, y, X_scaled_res.shape[1], params["output_dim"], n_layers=params['depth'], batch_size=params['batch_size'], epochs=n_epochs, device=torch.device('cpu'))
                _, _, _ = clf.train(X_scaled_res, y, None, None)
                proba = clf.predict_validate(final_test_X_scaled_res)
                #proba = np.where(proba < 1e-5, 1e-5, proba)
                y_score = np.argmax(proba, axis=1)
            else:
                clf.fit(X_scaled, y)
                y_score = clf.predict(final_test_X_scaled)
                proba = clf.predict_proba(final_test_X_scaled)

            elapsed_time = time.perf_counter() - time_ini
            fitted = True
            
            
            # Compute the accuracy results
            Experiment_4_test_scores[classifier][dataset_name]['Accuracy'] = accuracy_score(final_test_y, y_score)*100
            Experiment_4_test_scores[classifier][dataset_name]['F1'] = f1_score(final_test_y, y_score, average=average_tech)*100
            Experiment_4_test_scores[classifier][dataset_name]['Recall'] = recall_score(final_test_y, y_score, average=average_tech)*100
            Experiment_4_test_scores[classifier][dataset_name]['GMPCA'] = GMPCA(proba, final_test_y.values)*100
            Experiment_4_test_scores[classifier][dataset_name]['Estimation time'] = elapsed_time

            # ## Market shares
            # Experiment_4_CV_scores[classifier][dataset_name]['Market_shares'] = np.round(np.sum(clf.predict_proba(X_scaled), axis=0)/X_scaled.shape[0] * 100, 3)
            # Experiment_4_test_scores[classifier][dataset_name]['Market_shares'] = np.round(np.sum(clf.predict_proba(final_test_X_scaled), axis=0)/final_test_X_scaled.shape[0] * 100, 3)
            
            # ## WTP
            # Experiment_4_CV_scores[classifier][dataset_name]["WTP_history"] = None
            # Experiment_4_test_scores[classifier][dataset_name]["WTP_history"] = None
            # if dataset["WTP"] is not None:
            #     Experiment_4_CV_scores[classifier][dataset_name]["WTP_history"] = {}
            #     Experiment_4_CV_scores[classifier][dataset_name]["n_WTP_nan"] = 0
            #     Experiment_4_CV_scores[classifier][dataset_name]["n_WTP_inf"] = 0
            #     Experiment_4_test_scores[classifier][dataset_name]["WTP_history"] = {}
            #     Experiment_4_test_scores[classifier][dataset_name]["n_WTP_nan"] = 0
            #     Experiment_4_test_scores[classifier][dataset_name]["n_WTP_inf"] = 0

            #     for alt in dataset["WTP"].keys():
            #         v1_name, v2_name, d_1, d_2 = dataset["WTP"][alt]

            #         # WTP over training set 
            #         filtered_WTP, n_WTP_nan, n_WTP_inf = compute_WTP(clf, dataset, X, v1_name, v2_name, d_1, d_2, scaler)
            #         Experiment_4_CV_scores[classifier][dataset_name]["n_WTP_nan"] += n_WTP_nan
            #         Experiment_4_CV_scores[classifier][dataset_name]["n_WTP_inf"] += n_WTP_inf
            #         Experiment_4_CV_scores[classifier][dataset_name]["WTP_history"][dataset["alt_names"][alt]] = filtered_WTP

            #         # WTP over test set 
            #         filtered_WTP, n_WTP_nan, n_WTP_inf = compute_WTP(clf, dataset, final_test_X, v1_name, v2_name, d_1, d_2, scaler)
            #         Experiment_4_test_scores[classifier][dataset_name]["n_WTP_nan"] += n_WTP_nan
            #         Experiment_4_test_scores[classifier][dataset_name]["n_WTP_inf"] += n_WTP_inf
            #         Experiment_4_test_scores[classifier][dataset_name]["WTP_history"][dataset["alt_names"][alt]] = filtered_WTP

            del clf 
            gc.collect()

        # Store the partial experiment data (serialize)
        with open(partial_results_dir + 'Experiment_4_CV_scores.pickle', 'wb') as handle:
            pickle.dump(Experiment_4_CV_scores, handle, protocol=pickle.HIGHEST_PROTOCOL)
        with open(partial_results_dir + 'Experiment_4_test_scores.pickle', 'wb') as handle:
            pickle.dump(Experiment_4_test_scores, handle, protocol=pickle.HIGHEST_PROTOCOL)

        print("\t    + Elapsed: {} seconds".format(np.round(time.perf_counter()-it_time_init), 2))



--- LPMC (ID: LPMC)

	--- ResLogit
		 CV it: 0
Epoch 1: valid_loss = 0.7972640144569566
          train_loss = 0.9620987111121247
Epoch 2: valid_loss = 0.7269777059413449
          train_loss = 0.7635602600430759
Epoch 3: valid_loss = 0.7018058236436087
          train_loss = 0.7258568796197554
Epoch 4: valid_loss = 0.6935461562854836
          train_loss = 0.7119344251738464
Epoch 5: valid_loss = 0.6862455663606766
          train_loss = 0.7041301078232852
Epoch 6: valid_loss = 0.6813018753691024
          train_loss = 0.6984554352766105
Epoch 7: valid_loss = 0.6778471896654652
          train_loss = 0.6949305066688698
Epoch 8: valid_loss = 0.6763336133859664
          train_loss = 0.6923468506865209
Epoch 9: valid_loss = 0.6757879939840455
          train_loss = 0.6904557271874698
Epoch 10: valid_loss = 0.6734899252645855
          train_loss = 0.6889812533514382
Epoch 11: valid_loss = 0.67288687342384
          train_loss = 0.6875375285138196
Epoch 12: valid_loss = 0.67316196619562

## Export LaTeX tables/figures

In [5]:
try:
    with open(partial_results_dir + '/Experiment_4_CV_scores.pickle', 'rb') as handle:
        Experiment_4_CV_scores = pickle.load(handle)
except:
    Experiment_4_CV_scores = {}
try:
    with open(partial_results_dir + '/Experiment_4_test_scores.pickle', 'rb') as handle:
        Experiment_4_test_scores = pickle.load(handle)
except:
    Experiment_4_test_scores = {}

In [6]:
Experiment_4_test_scores

{'LightGBM': {'LPMC': {'Accuracy': 74.76443768996961,
   'F1': 55.77533290152894,
   'Recall': 55.86976689126527,
   'GMPCA': 52.00953889879196,
   'Estimation time': 5.394302400000015,
   'Market_shares': array([17.001,  2.857, 36.225, 43.917]),
   'WTP_history': None}},
 'NN': {'LPMC': {'Accuracy': 74.22492401215806,
   'F1': 55.252585435855494,
   'Recall': 55.510437716485306,
   'GMPCA': 51.34357600821821,
   'Estimation time': 11.689457000000175,
   'Market_shares': array([16.941,  2.9  , 36.089, 44.069]),
   'WTP_history': None}},
 'DNN': {'LPMC': {'Accuracy': 74.36170212765958,
   'F1': 55.48164394231152,
   'Recall': 55.92791501690747,
   'GMPCA': 50.99265789019044,
   'Estimation time': 4.8106472999998005,
   'Market_shares': array([17.711,  2.784, 35.031, 44.474], dtype=float32),
   'WTP_history': None}},
 0: {'RUMBoost': {'SwissMetro': {'Accuracy': 67.99116997792495,
    'F1': 49.43119812699326,
    'Recall': 49.125855481671614,
    'GMPCA': 46.97805026510724,
    'Estimatio

In [70]:
# Obtain accuracy tables
Experiment_4_CV_table, Experiment_4_test_table, Experiment_4_time_table = construct_experiment_4_accuracy_table(Experiment_4_CV_scores, Experiment_4_test_scores)

# Obtain market shares table over the test set
#Experiment_4_MS_table = construct_experiment_4_market_shares_table(Experiment_4_test_scores, datasets)

In [71]:
Experiment_4_CV_table

Unnamed: 0_level_0,ResLogit,ResLogit
Unnamed: 0_level_1,Accuracy,GMPCA
LPMC,73.64,50.99


In [72]:
Experiment_4_test_table

Unnamed: 0_level_0,ResLogit,ResLogit
Unnamed: 0_level_1,Accuracy,GMPCA
LPMC,73.25,50.4


In [73]:
Experiment_4_time_table

Unnamed: 0,ResLogit
LPMC,95.72


In [None]:
try:
    with open(partial_results_dir + '/Experiment_4_CV_scores_all.pickle', 'rb') as handle:
        Experiment_4_CV_scores = pickle.load(handle)
except:
    Experiment_4_CV_scores = {}
try:
    with open(partial_results_dir + '/Experiment_4_test_scores_all.pickle', 'rb') as handle:
        Experiment_4_test_scores = pickle.load(handle)
except:
    Experiment_4_test_scores = {}

In [None]:
Experiment_4_test_scores

{'LightGBM': {'LPMC': {'Accuracy': 74.76443768996961,
   'F1': 55.77533290152894,
   'Recall': 55.86976689126527,
   'GMPCA': 52.00953889879196,
   'Estimation time': 5.394302400000015,
   'Market_shares': array([17.001,  2.857, 36.225, 43.917]),
   'WTP_history': None}},
 'NN': {'LPMC': {'Accuracy': 74.22492401215806,
   'F1': 55.252585435855494,
   'Recall': 55.510437716485306,
   'GMPCA': 51.34357600821821,
   'Estimation time': 11.689457000000175,
   'Market_shares': array([16.941,  2.9  , 36.089, 44.069]),
   'WTP_history': None}},
 'DNN': {'LPMC': {'Accuracy': 74.36170212765958,
   'F1': 55.48164394231152,
   'Recall': 55.92791501690747,
   'GMPCA': 50.99265789019044,
   'Estimation time': 4.8106472999998005,
   'Market_shares': array([17.711,  2.784, 35.031, 44.474], dtype=float32),
   'WTP_history': None}},
 0: {'RUMBoost': {'SwissMetro': {'Accuracy': 67.99116997792495,
    'F1': 49.43119812699326,
    'Recall': 49.125855481671614,
    'GMPCA': 46.97805026510724,
    'Estimatio

In [None]:
# Obtain accuracy tables
Experiment_4_CV_table, Experiment_4_test_table, Experiment_4_time_table = construct_experiment_4_accuracy_table(Experiment_4_CV_scores, Experiment_4_test_scores)

# Obtain market shares table over the test set
#Experiment_4_MS_table = construct_experiment_4_market_shares_table(Experiment_4_test_scores, datasets)

In [None]:
Experiment_4_CV_table

Unnamed: 0_level_0,ResLogit,ResLogit
Unnamed: 0_level_1,Accuracy,GMPCA
LPMC,73.64,50.99


In [None]:
Experiment_4_test_table

Unnamed: 0_level_0,ResLogit,ResLogit
Unnamed: 0_level_1,Accuracy,GMPCA
LPMC,73.25,50.4


In [None]:
Experiment_4_time_table

Unnamed: 0,ResLogit
LPMC,95.72
