# Step 2: Preprocessing & Classification model
This section will load up the defined settings from the pickles directory and run the machine learning pipeline with the help of the `pycaret` library and save respective data.

In [1]:
# importing all packages needed in this section
import pandas as pd
import os
import sys 

from sklearn.metrics import classification_report
from pycaret.classification import *

# utility functions for the experiment
sys.path.append('../src')

from mlflow_manager import MLFlowManager
from utils import getPicklesFromDir, getExperimentConfig, run_pycaret_setup, translate_model_name

# Get global experiment settings
config = getExperimentConfig()
folders = config['folders']
# get a list of all settings for the datasets prepared beforehand
dataset_settings = getPicklesFromDir(folders['settings_dir'])  

In [2]:
import time

start_time = time.perf_counter()

dataset_settings pickle is saved as follows:
```
"meta_data": meta_dataset,  # contains information about the dataset, including path
"setup_param": setup_param, # contains all the setup parameters for pycaret setup() function
"sdg_param": sdg_param,     # contains all sdg parameters for the CTGAN() function

```

In [3]:
for settings in dataset_settings:
    # get path
    dataset_path = f"{folders['real_dir']}{settings['meta']['filename']}"
    # run setup function
    s = run_pycaret_setup(dataset_path, settings['setup_param'])
    
    USI = s.get_config('USI')

    
    # Init experiment logging
    experiment_name = f"{settings['meta']['id']}-{settings['meta']['name']}"
    mlflow = MLFlowManager(experiment_name)
    
    logg_tags = {
        'USI': USI,
        'Dataset ID': settings['meta']['id'],
        'Dataset Type': 'original'
    }
    
    mlflow.start_run("Original data models", tags=logg_tags)
    
    # for each defined model in the global config
    # create specified model and tune it
    for ml_model in config['clf']['ml_models']:
        
        model_name = f"{settings['meta']['id']}-{translate_model_name(ml_model)}"
        logg_tags['model']=ml_model
        
        mlflow.start_run(model_name, tags=logg_tags, nested=True)

        # create & tune model
        model = s.create_model(ml_model)
        tuned_model = s.tune_model( model, **config['clf']['tuning_param'] )
        
        # get validation results
        val_df = s.pull()
        val_score = {}
        val_score['Accuracy'] = val_df['Accuracy']['Mean']
        val_score['F1-score'] = val_df['F1']['Mean']
        val_score['AUC']      = val_df['AUC']['Mean']
        val_score['Kappa']    = val_df['Kappa']['Mean']
        val_score['MCC']      = val_df['MCC']['Mean']
        
        # test the model on the holdout-data
        holdout_score = s.predict_model(estimator=tuned_model)
        #metrics =  classification_report(y_true=y_test, y_pred=y_pred, output_dict=True, digits=4)
        #metrics_df = pd.DataFrame(metrics).transpose()
        
        # log parameters     
        mlflow.log_params(tuned_model.get_params())
        # log performance
        mlflow.log_metrics(val_score)
        mlflow.log_score_report_to_html(val_df, "Validation")
        mlflow.log_score_report_to_html(holdout_score, "Holdout")
        # log model
        mlflow.log_model(model=tuned_model)
        
        mlflow.end_run()
        
    # Save model details on the model with best accurracy under the the 'Original data models' run
    best_run = mlflow.get_best_run_by_metric(metric_name='Accuracy')
    
    mlflow.log_params(best_run.data.params)
    mlflow.log_metrics(best_run.data.metrics)
    mlflow.log_tag('model run name', best_run.data.tags['mlflow.runName'])
    mlflow.log_tag('model', best_run.data.tags['model'])
    mlflow.log_tag('model run id', best_run.info.run_id)
    
    mlflow.end_run()          

Unnamed: 0,Description,Value
0,Session id,6544
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 9)"
4,Transformed data shape,"(768, 9)"
5,Transformed train set shape,"(614, 9)"
6,Transformed test set shape,"(154, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.8818,0.6364,0.7778,0.7,0.5592,0.5654
1,0.7258,0.7807,0.5455,0.6316,0.5854,0.3822,0.3845
2,0.8226,0.8875,0.6364,0.8235,0.7179,0.5916,0.6021
3,0.7097,0.7773,0.4545,0.625,0.5263,0.3245,0.333
4,0.7213,0.7762,0.5238,0.6111,0.5641,0.3611,0.3634
5,0.8033,0.8726,0.6667,0.7368,0.7,0.5542,0.5557
6,0.7869,0.8417,0.4762,0.8333,0.6061,0.4745,0.5094
7,0.7705,0.7429,0.5238,0.7333,0.6111,0.4547,0.4676
8,0.7705,0.8762,0.5714,0.7059,0.6316,0.4676,0.4731
9,0.7705,0.8393,0.7143,0.6522,0.6818,0.5029,0.5042


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.8818,0.6364,0.7778,0.7,0.5592,0.5654
1,0.7258,0.783,0.5455,0.6316,0.5854,0.3822,0.3845
2,0.8226,0.8875,0.6364,0.8235,0.7179,0.5916,0.6021
3,0.7097,0.775,0.4545,0.625,0.5263,0.3245,0.333
4,0.7213,0.7798,0.5238,0.6111,0.5641,0.3611,0.3634
5,0.8033,0.8726,0.6667,0.7368,0.7,0.5542,0.5557
6,0.7869,0.8393,0.4762,0.8333,0.6061,0.4745,0.5094
7,0.7705,0.7429,0.5238,0.7333,0.6111,0.4547,0.4676
8,0.7869,0.8762,0.619,0.7222,0.6667,0.5114,0.5147
9,0.7705,0.8393,0.7143,0.6522,0.6818,0.5029,0.5042


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7258,0.7869,0.5455,0.6316,0.5854,0.3822,0.3845
1,0.6774,0.7614,0.5,0.55,0.5238,0.2807,0.2815
2,0.7581,0.8455,0.5,0.7333,0.5946,0.4308,0.4469
3,0.7097,0.7812,0.5455,0.6,0.5714,0.3527,0.3536
4,0.6557,0.6798,0.4286,0.5,0.4615,0.2107,0.2121
5,0.7541,0.8155,0.4762,0.7143,0.5714,0.4085,0.425
6,0.6721,0.7512,0.2857,0.5455,0.375,0.1812,0.1986
7,0.7705,0.7518,0.6667,0.6667,0.6667,0.4917,0.4917
8,0.7705,0.8036,0.6667,0.6667,0.6667,0.4917,0.4917
9,0.8197,0.8214,0.7143,0.75,0.7317,0.596,0.5964


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7742,0.8727,0.4545,0.8333,0.5882,0.4506,0.4899
1,0.7419,0.7693,0.4545,0.7143,0.5556,0.3861,0.4057
2,0.7903,0.8909,0.4545,0.9091,0.6061,0.484,0.538
3,0.7097,0.8295,0.4545,0.625,0.5263,0.3245,0.333
4,0.7213,0.7286,0.381,0.6667,0.4848,0.3128,0.3358
5,0.7377,0.8107,0.381,0.7273,0.5,0.345,0.3781
6,0.7541,0.8262,0.3333,0.875,0.4828,0.3615,0.434
7,0.7049,0.7429,0.4762,0.5882,0.5263,0.3155,0.3192
8,0.7541,0.8423,0.4286,0.75,0.5455,0.3936,0.4226
9,0.7541,0.8119,0.5238,0.6875,0.5946,0.4227,0.4308


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6935,0.0,0.3636,0.6154,0.4571,0.2628,0.2805
1,0.7097,0.0,0.4091,0.6429,0.5,0.3094,0.3251
2,0.7742,0.0,0.5,0.7857,0.6111,0.4629,0.4863
3,0.7258,0.0,0.5455,0.6316,0.5854,0.3822,0.3845
4,0.6066,0.0,0.5238,0.44,0.4783,0.1663,0.1679
5,0.7049,0.0,0.6667,0.56,0.6087,0.3747,0.3784
6,0.8033,0.0,0.6667,0.7368,0.7,0.5542,0.5557
7,0.6885,0.0,0.2857,0.6,0.3871,0.2121,0.2383
8,0.8033,0.0,0.6667,0.7368,0.7,0.5542,0.5557
9,0.7049,0.0,0.7143,0.5556,0.625,0.388,0.3963


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7419,0.0,0.8182,0.6,0.6923,0.479,0.4961
1,0.7258,0.0,0.7727,0.5862,0.6667,0.4411,0.4533
2,0.8226,0.0,0.7273,0.7619,0.7442,0.6085,0.6089
3,0.7097,0.0,0.7727,0.5667,0.6538,0.4139,0.4287
4,0.5738,0.0,0.5714,0.4138,0.48,0.1343,0.1393
5,0.7049,0.0,0.7143,0.5556,0.625,0.388,0.3963
6,0.7377,0.0,0.7619,0.5926,0.6667,0.456,0.4658
7,0.7213,0.0,0.7143,0.5769,0.6383,0.4158,0.4221
8,0.7705,0.0,0.8571,0.6207,0.72,0.5338,0.5539
9,0.7377,0.0,0.8571,0.5806,0.6923,0.4781,0.5057


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7742,0.8318,0.6818,0.6818,0.6818,0.5068,0.5068
1,0.7903,0.7932,0.6364,0.7368,0.6829,0.5275,0.5307
2,0.8065,0.8989,0.5909,0.8125,0.6842,0.5496,0.5641
3,0.7903,0.8085,0.7273,0.6957,0.7111,0.5467,0.547
4,0.7541,0.7238,0.5714,0.6667,0.6154,0.4362,0.439
5,0.8033,0.8375,0.6667,0.7368,0.7,0.5542,0.5557
6,0.7869,0.856,0.5238,0.7857,0.6286,0.4874,0.5071
7,0.6557,0.7173,0.4762,0.5,0.4878,0.2288,0.2289
8,0.7705,0.8554,0.5238,0.7333,0.6111,0.4547,0.4676
9,0.7705,0.8524,0.6667,0.6667,0.6667,0.4917,0.4917


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7581,0.8477,0.7727,0.6296,0.6939,0.4973,0.5044
1,0.8065,0.8091,0.6364,0.7778,0.7,0.5592,0.5654
2,0.8226,0.8909,0.6364,0.8235,0.7179,0.5916,0.6021
3,0.7258,0.8648,0.5455,0.6316,0.5854,0.3822,0.3845
4,0.7541,0.7202,0.619,0.65,0.6341,0.4491,0.4494
5,0.7705,0.8548,0.619,0.6842,0.65,0.4799,0.4812
6,0.8361,0.8667,0.5714,0.9231,0.7059,0.6008,0.634
7,0.6721,0.7464,0.4762,0.5263,0.5,0.257,0.2577
8,0.7869,0.8702,0.5238,0.7857,0.6286,0.4874,0.5071
9,0.7541,0.819,0.7143,0.625,0.6667,0.4732,0.4759


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7742,0.85,0.7273,0.6667,0.6957,0.5167,0.5179
1,0.7903,0.8193,0.6818,0.7143,0.6977,0.5373,0.5377
2,0.7903,0.8648,0.5455,0.8,0.6486,0.5067,0.5256
3,0.7258,0.7875,0.6364,0.6087,0.6222,0.4072,0.4074
4,0.7049,0.7583,0.619,0.5652,0.5909,0.3609,0.3618
5,0.8361,0.8679,0.7143,0.7895,0.75,0.6285,0.6302
6,0.8361,0.8488,0.5714,0.9231,0.7059,0.6008,0.634
7,0.6885,0.7393,0.4762,0.5556,0.5128,0.2859,0.2877
8,0.7705,0.8452,0.5238,0.7333,0.6111,0.4547,0.4676
9,0.7705,0.8095,0.7143,0.6522,0.6818,0.5029,0.5042


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6452,0.8568,0.0,0.0,0.0,0.0,0.0
1,0.6452,0.8227,0.0,0.0,0.0,0.0,0.0
2,0.6452,0.8455,0.0,0.0,0.0,0.0,0.0
3,0.6452,0.8426,0.0,0.0,0.0,0.0,0.0
4,0.6557,0.6815,0.0,0.0,0.0,0.0,0.0
5,0.6557,0.8077,0.0,0.0,0.0,0.0,0.0
6,0.6557,0.8,0.0,0.0,0.0,0.0,0.0
7,0.6557,0.7452,0.0,0.0,0.0,0.0,0.0
8,0.6557,0.8827,0.0,0.0,0.0,0.0,0.0
9,0.6557,0.8083,0.0,0.0,0.0,0.0,0.0


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


In [11]:
s.get_leaderboard()

---

### Notice 
Following cells until end of section (i.e. section 3.0) contains experimental code that will not be run.

In [16]:
### Following shows which are models are natively available in the pycaret library
# It is possible to add estimators
all_models = models()
display(all_models)

Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
lr,Logistic Regression,sklearn.linear_model._logistic.LogisticRegression,True
knn,K Neighbors Classifier,sklearn.neighbors._classification.KNeighborsClassifier,True
nb,Naive Bayes,sklearn.naive_bayes.GaussianNB,True
dt,Decision Tree Classifier,sklearn.tree._classes.DecisionTreeClassifier,True
svm,SVM - Linear Kernel,sklearn.linear_model._stochastic_gradient.SGDClassifier,True
rbfsvm,SVM - Radial Kernel,sklearn.svm._classes.SVC,False
gpc,Gaussian Process Classifier,sklearn.gaussian_process._gpc.GaussianProcessClassifier,False
mlp,MLP Classifier,sklearn.neural_network._multilayer_perceptron.MLPClassifier,False
ridge,Ridge Classifier,sklearn.linear_model._ridge.RidgeClassifier,True
rf,Random Forest Classifier,sklearn.ensemble._forest.RandomForestClassifier,True


In [17]:
##### How to get logs, can pass experiment name to get_logs()
#get_logs()[[
#    'tags.mlflow.runName',
#    'metrics.Accuracy',
#    'metrics.F1',
#    'metrics.Prec',
#    'metrics.Recall',
#    'tags.Source']].sort_values('metrics.Accuracy', ascending=False)

In [18]:
# Need to define param that should be explored, define which method e.g. grid_search vs random vs optuna
# default search method: random grid search
# Todo: lookup default search range parameters

# uses the best model to optimze
#tuned = tune_model(clf, optimize='Accuracy', n_iter=10)

---

# Step 3: SDG

In [None]:
# necessary imports for the section
from sdv.tabular import CTGAN

import pickle
import pandas as pd 
import os 
import sys

sys.path.append('../src')
from utils import (getPicklesFromDir, 
                   getExperimentConfig, 
                   extract_loss_info_from_stdout, 
                   create_loss_plot)

from mlflow_manager import MLFlowManager

# Get global experiment settings
config = getExperimentConfig()
# Get folders
folders = config['folders']
# Get dataset specific settings
dataset_settings = getPicklesFromDir(folders['settings_dir'])

Psuedo code:

```
for each pickle (setting):
    for each varying quality:
        create model with sdg_param and quality
        train model with original_data
        generate num_SD synthetic datasets with:
            num_rows=SD_size_by_factor * len(original_dataset)
        
        save synthetic dataset in dataset folder
        save SDG
```

In [None]:
%%capture stdout_loss

# get settings
quality_params = config['ctgan_param']['quality_params']
sd_size_factor = config['ctgan_param']['sd_size_factor']
num_SD = config['ctgan_param']['num_sd']


# run SDG generation
# for each dataset specific settings
for s_index, settings in enumerate(dataset_settings):
    # Init experiment logging
    experiment_name = f"{settings['meta']['id']}-SDG-models"
    mlflow = MLFlowManager(experiment_name)
    
    # load original dataset
    original_data = pd.read_csv(folders['real_dir'] + settings['meta']['filename'])  
    
    # get the size to generate the synthetic data
    original_data_size = len(original_data)
    sd_size = original_data_size * sd_size_factor
    
    logg_tags = {'Source': settings['meta']['id']}
    
    # loop through the different quality parameters for the SDG
    for quality in quality_params:
        
        sdg_name = f"S{settings['meta']['id']}{quality}"
        log_run = mlflow.start_run(sdg_name, tags=logg_tags)
        mlflow.log_params(quality_params[quality])
        
        # creates model with sdg_param and quality_param as parameters
        print("#START#")   # for capturing loss info
        
        model = CTGAN(**settings['sdg_param'], **quality_params[quality])
        
        print(sdg_name)   # for capturing loss info
        model.fit(original_data)
        
        # saves the SDG model using cloudpickle
        model_file = f"{sdg_name}.pkl"
        model.save(model_file) 
        mlflow.log_artifact(model_file)
        
        if os.path.isfile(model_file):
            os.remove(model_file)
        
        print("#END#")   # for capturing loss info
        mlflow.end_run()
        
        # create num_SD SDGs and synthetic datasets for validating results
        for itr in range(num_SD):
            
            # creates Synthetic dataset name, using datset id, quality key, and itr number 
            # e.g. SD1Q1_2 means SDG trained on datset D1 with quality Q1 and copy num 2
            SD_name = f"S{settings['meta']['id']}{quality}_{str(itr)}"
            
            # relative file path for the synthetic dataset
            sd_path = f"{folders['sd_dir']}{SD_name}.csv"
            
            # generate synthetic data
            synthetic_data = model.sample(num_rows=sd_size)
            
            # save the synthetic dataset
            synthetic_data.to_csv(sd_path, index=False)
            
    mlflow.end_run()

In [None]:
""" The Loss values captured from the cell above's standard output will 
be used to create the generator vs discriminator loss plots.
"""
loss_values = extract_loss_info_from_stdout(stdout_loss.stdout)

for sdg_id in loss_values:
    fig = create_loss_plot(sdg_id, loss_values[sdg_id])
    # Save the plot to correct mlflow log
    run=mlflow.load_run_by_name(sdg_id)
    path=run.info.artifact_uri.replace("file:///", "")
    #save plot
    fig.savefig(f"{path}/{sdg_id}_loss_plot.png")
    #save data
    loss_values.to_csv(f"{path}/{sdg_id}.csv", index=False)


---

# Step 4: Create models with SD

In [None]:
import pandas as pd 
import os 
import sys 
import pickle


from sklearn.metrics import (classification_report, 
                             roc_auc_score, 
                             matthews_corrcoef,
                             cohen_kappa_score)
sys.path.append('../src')
from utils import (getExperimentConfig, 
                   getPicklesFromDir, 
                   run_pycaret_setup, 
                   translate_model_name,
                   get_synthetic_filepaths_from_original_data_id,
                   convert_and_clean_dict)

from mlflow_manager import MLFlowManager

# Get global variables for the experiment
config = getExperimentConfig()
# Get folders
folders = config['folders']
# Load dataset specific settings (from the real-data)
dataset_settings = getPicklesFromDir(folders['settings_dir'])

In [23]:
# TODO: consider testing, and consider extracting hyperparameters, consider how to summarize
for settings in dataset_settings:
    # update system_log name
    settings['setup_param']['system_log'] = folders['log_dir']+"Step4_SD"
    # disable saving train-test split data (to save space)
    settings['setup_param']['log_data'] = False
    
    # Get experiment logging
    experiment_name = f"{settings['meta']['id']}-{settings['meta']['name']}"
    mlflow = MLFlowManager(experiment_name)
    
    best_model_from_original_data = mlflow.get_best_model_hyperparameters()

    original_data = pd.read_csv(f"{folders['real_dir']}{settings['meta']['filename']}")
    y_test = original_data[settings['meta']['target']]
    x_test = original_data.drop(columns=[settings['meta']['target']])

    logg_tags = {'Dataset Type': 'synthetic'}
    mlflow.start_run('Synthetic data models', tags=logg_tags)
    
    synthetic_datasets = get_synthetic_filepaths_from_original_data_id(settings['meta']['id'])

    for sd_filename in synthetic_datasets:
        
        sd_id = os.path.splitext(sd_filename)[0]
        sd_path = folders['sd_dir']+sd_filename
        
        s = run_pycaret_setup(sd_path, settings['setup_param'])
        USI = s.get_config('USI')
        
        # Add custom tags to the logg, defining dataset type, and Id
        logg_tags = {
            'Dataset Type': 'synthetic',
            'Dataset ID': sd_id,
            'USI': USI
        }
        run_name = f"{sd_id}-models"
        mlflow.start_run(run_name, tags=logg_tags, nested=True)

        # Test the model with best performance from best original dataset
        ml_model = best_model_from_original_data['model']
        hyperparameters = best_model_from_original_data['params']
        hyperparameters = convert_and_clean_dict(hyperparameters)
        model_name = f"{sd_id}-Orignal_{translate_model_name(ml_model)}"
        logg_tags['model']=model_name
        
        mlflow.start_run(model_name, tags=logg_tags, nested=True)

        # create & tune model
        tmp_model = s.create_model(ml_model)
        model=tmp_model.set_params(**hyperparameters)
        tuned_model = s.create_model(model)
    
        # get validation results
        val_df = s.pull()
        val_score = {}
        val_score['val_Accuracy'] = val_df['Accuracy']['Mean']
        val_score['val_F1-score'] = val_df['F1']['Mean']


        pred_model = s.predict_model(estimator=tuned_model, data=x_test)
        y_pred = pred_model['prediction_label']

        metrics =  classification_report(y_true=y_test, y_pred=y_pred, output_dict=True, digits=4)
        test_score = pd.DataFrame.from_dict(metrics).transpose()
        
        test_metrics = {
            "Accuracy": metrics['accuracy'],
            "F1-score": metrics['macro avg']['f1-score'],
            "MCC": matthews_corrcoef(y_true=y_test, y_pred=y_pred),
            "Kappa": cohen_kappa_score(y1=y_test, y2=y_pred)
        }

        # If there is a prediction_score in the from predict_model (sometimes there isn't)
        if 'prediction_score' in pred_model.columns:
            y_pred_score = pred_model['prediction_score']
            # If multiclass classification, set argument multi_class='one-vs-one'
            if original_data[settings['meta']['target']].nunique() > 2:
                m_class = 'ovo'
            else:
                m_class = 'raise'
            test_metrics['AUC'] = roc_auc_score(y_true=y_test, y_score=y_pred_score, multi_class=m_class)


        # log parameters     
        mlflow.log_params(tuned_model.get_params())
        # log performance
        mlflow.log_metric_report(test_metrics)
        mlflow.log_metrics(val_score)
        mlflow.log_score_report_to_html(val_df, "Validation")
        mlflow.log_score_report_to_html(test_score, "Test_score")
        # log model
        mlflow.log_model(model=tuned_model)
        # end logging run for the model
        mlflow.end_run()
        
        # Start testing all models
        for ml_model in config['clf']['ml_models']:
            #start log run
            logg_tags['model']=ml_model
            model_name = f"{sd_id}-{translate_model_name(ml_model)}"
            mlflow.start_run(model_name, tags=logg_tags, nested=True)
            
            # create & tune model
            model = s.create_model(ml_model)
            tuned_model = s.tune_model(model, **config['clf']['tuning_param']) 
        
            # get validation results
            val_df = s.pull()
            val_score = {}
            val_score['val_Accuracy'] = val_df['Accuracy']['Mean']
            val_score['val_F1-score'] = val_df['F1']['Mean']


            pred_model = s.predict_model(estimator=tuned_model, data=x_test)
            y_pred = pred_model['prediction_label']

            metrics =  classification_report(y_true=y_test, y_pred=y_pred, output_dict=True, digits=4)
            test_score = pd.DataFrame.from_dict(metrics).transpose()
            
            test_metrics = {
                "Accuracy": metrics['accuracy'],
                "F1-score": metrics['macro avg']['f1-score'],
                "MCC": matthews_corrcoef(y_true=y_test, y_pred=y_pred),
                "Kappa": cohen_kappa_score(y1=y_test, y2=y_pred)
            }

            # If there is a prediction_score in the from predict_model (sometimes there isn't)
            if 'prediction_score' in pred_model.columns:
                y_pred_score = pred_model['prediction_score']
                # If multiclass classification, set argument multi_class='one-vs-one'
                if original_data[settings['meta']['target']].nunique() > 2:
                    m_class = 'ovo'
                else:
                    m_class = 'raise'
                test_metrics['AUC'] = roc_auc_score(y_true=y_test, y_score=y_pred_score, multi_class=m_class)


            # log parameters     
            mlflow.log_params(tuned_model.get_params())
            # log performance
            mlflow.log_metric_report(test_metrics)
            mlflow.log_metrics(val_score)
            mlflow.log_score_report_to_html(val_df, "Validation")
            mlflow.log_score_report_to_html(test_score, "Test_score")
            # log model
            mlflow.log_model(model=tuned_model)
            # end logging run for the model
            mlflow.end_run()
            
        # end logging run for SD_id
        #TODO: save best model for each sd_id-models
        # Save model details on the model with best accurracy under the the 'Original data models' run
        #best_run = mlflow.get_best_run_by_metric(metric_name='Accuracy')

        #mlflow.log_params(best_run.data.params)
        #mlflow.log_metrics(best_run.data.metrics)
        #mlflow.log_tag('model run name', best_run.data.tags['mlflow.runName'])
        #mlflow.log_tag('model', best_run.data.tags['model'])
        #mlflow.log_tag('model run id', best_run.info.run_id)
    
        mlflow.end_run()

    # end logging for the synthetic datasets based on original id
    mlflow.end_run()          

Unnamed: 0,Description,Value
0,Session id,559
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(922, 9)"
4,Transformed data shape,"(1230, 9)"
5,Transformed train set shape,"(922, 9)"
6,Transformed test set shape,"(308, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6452,0.6994,0.5435,0.6757,0.6024,0.2888,0.2944
1,0.6452,0.6574,0.5556,0.6579,0.6024,0.2861,0.2895
2,0.6413,0.6624,0.5111,0.6765,0.5823,0.2785,0.2869
3,0.5761,0.6274,0.5111,0.575,0.5412,0.1498,0.1507
4,0.6957,0.7745,0.7333,0.6735,0.7021,0.3922,0.3937
5,0.6957,0.7447,0.7111,0.6809,0.6957,0.3916,0.392
6,0.587,0.6411,0.6667,0.566,0.6122,0.1766,0.1794
7,0.6304,0.6842,0.6,0.6279,0.6136,0.2598,0.2601
8,0.7065,0.8156,0.7333,0.6875,0.7097,0.4136,0.4145
9,0.6304,0.6903,0.7111,0.6038,0.6531,0.2633,0.2674


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6452,0.6994,0.5435,0.6757,0.6024,0.2888,0.2944
1,0.6559,0.6588,0.5556,0.6757,0.6098,0.3073,0.312
2,0.6413,0.6629,0.5111,0.6765,0.5823,0.2785,0.2869
3,0.5761,0.6274,0.5111,0.575,0.5412,0.1498,0.1507
4,0.6957,0.774,0.7333,0.6735,0.7021,0.3922,0.3937
5,0.6957,0.7452,0.7111,0.6809,0.6957,0.3916,0.392
6,0.587,0.6411,0.6667,0.566,0.6122,0.1766,0.1794
7,0.6304,0.6837,0.6,0.6279,0.6136,0.2598,0.2601
8,0.7065,0.8156,0.7333,0.6875,0.7097,0.4136,0.4145
9,0.6304,0.6908,0.7111,0.6038,0.6531,0.2633,0.2674


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.6526,0.6882,0.5693,0.619,0.5932,0.291,0.2917


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7527,0.7479,0.6304,0.8286,0.716,0.5041,0.5189
1,0.6667,0.7301,0.5778,0.6842,0.6265,0.3294,0.3332
2,0.6196,0.6771,0.5556,0.625,0.5882,0.237,0.2384
3,0.5978,0.6191,0.5111,0.6053,0.5542,0.1926,0.1949
4,0.663,0.6842,0.6667,0.6522,0.6593,0.3261,0.3262
5,0.6522,0.7012,0.6444,0.6444,0.6444,0.304,0.304
6,0.5761,0.5844,0.6667,0.5556,0.6061,0.1554,0.1584
7,0.6087,0.6274,0.5778,0.6047,0.5909,0.2163,0.2165
8,0.6848,0.7456,0.6222,0.7,0.6588,0.3678,0.37
9,0.6413,0.6738,0.6444,0.6304,0.6374,0.2826,0.2827


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7312,0.8205,0.6304,0.7838,0.6988,0.4612,0.4701
1,0.7097,0.7486,0.6222,0.7368,0.6747,0.4159,0.4208
2,0.6957,0.7504,0.6,0.7297,0.6585,0.3887,0.3948
3,0.6522,0.6771,0.6222,0.6512,0.6364,0.3034,0.3036
4,0.6848,0.7239,0.7778,0.6481,0.7071,0.3719,0.3792
5,0.6739,0.7579,0.6667,0.6667,0.6667,0.3475,0.3475
6,0.6522,0.6714,0.6889,0.6327,0.6596,0.3053,0.3065
7,0.6413,0.7069,0.7111,0.6154,0.6598,0.2846,0.288
8,0.6848,0.7584,0.6222,0.7,0.6588,0.3678,0.37
9,0.6522,0.6818,0.6667,0.6383,0.6522,0.3047,0.305


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,K Neighbors Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6237,0.0,0.413,0.7037,0.5205,0.2439,0.2675
1,0.5376,0.0,0.8444,0.5135,0.6387,0.0926,0.1171
2,0.6196,0.0,0.4222,0.6786,0.5205,0.2326,0.2507
3,0.5543,0.0,0.6444,0.537,0.5859,0.1121,0.1142
4,0.5109,0.0,0.7556,0.5,0.6018,0.0318,0.0366
5,0.4891,0.0,0.8,0.4865,0.605,-0.0084,-0.0107
6,0.5543,0.0,0.6444,0.537,0.5859,0.1121,0.1142
7,0.6087,0.0,0.6,0.6,0.6,0.217,0.217
8,0.6413,0.0,0.8222,0.5968,0.6916,0.288,0.3096
9,0.5543,0.0,0.7556,0.5312,0.6239,0.1162,0.1274


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6237,0.0,0.587,0.6279,0.6067,0.2467,0.2472
1,0.6452,0.0,0.5778,0.65,0.6118,0.2871,0.2888
2,0.6522,0.0,0.4667,0.7241,0.5676,0.2987,0.319
3,0.5761,0.0,0.5111,0.575,0.5412,0.1498,0.1507
4,0.7065,0.0,0.7778,0.6731,0.7216,0.4147,0.4196
5,0.6848,0.0,0.7333,0.66,0.6947,0.3708,0.373
6,0.6087,0.0,0.7111,0.5818,0.64,0.2207,0.2261
7,0.6522,0.0,0.6889,0.6327,0.6596,0.3053,0.3065
8,0.7065,0.0,0.7333,0.6875,0.7097,0.4136,0.4145
9,0.6522,0.0,0.7778,0.614,0.6863,0.3079,0.3189


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,SVM - Linear Kernel,0.6753,0.6662,0.5839,0.6504,0.6154,0.3359,0.3374


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6882,0.768,0.587,0.7297,0.6506,0.375,0.3822
1,0.6882,0.7255,0.5556,0.7353,0.6329,0.3709,0.3819
2,0.6957,0.7806,0.5778,0.7429,0.65,0.3881,0.3977
3,0.5978,0.6397,0.6,0.587,0.5934,0.1957,0.1957
4,0.7065,0.7664,0.7778,0.6731,0.7216,0.4147,0.4196
5,0.6739,0.7816,0.6,0.6923,0.6429,0.3457,0.3487
6,0.6304,0.7035,0.7556,0.5965,0.6667,0.2647,0.2741
7,0.7174,0.7608,0.7556,0.6939,0.7234,0.4356,0.4372
8,0.7826,0.8229,0.7333,0.8049,0.7674,0.5642,0.5663
9,0.6196,0.6887,0.6222,0.6087,0.6154,0.2391,0.2392


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6882,0.7283,0.6087,0.7179,0.6588,0.3753,0.3796
1,0.6882,0.6935,0.6222,0.7,0.6588,0.3735,0.3757
2,0.6739,0.7835,0.5333,0.7273,0.6154,0.3438,0.3563
3,0.6196,0.6494,0.6222,0.6087,0.6154,0.2391,0.2392
4,0.7283,0.8142,0.8222,0.6852,0.7475,0.4586,0.4675
5,0.7717,0.7872,0.8444,0.7308,0.7835,0.5448,0.5512
6,0.6522,0.7187,0.7556,0.6182,0.68,0.3073,0.3148
7,0.663,0.7489,0.8,0.6207,0.699,0.3299,0.3437
8,0.7065,0.7792,0.7333,0.6875,0.7097,0.4136,0.4145
9,0.5761,0.6624,0.6889,0.5536,0.6139,0.1562,0.1608


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6989,0.7562,0.587,0.75,0.6585,0.3964,0.4059
1,0.6882,0.7722,0.6222,0.7,0.6588,0.3735,0.3757
2,0.7065,0.7797,0.6222,0.7368,0.6747,0.4108,0.4157
3,0.5978,0.6203,0.5333,0.6,0.5647,0.1934,0.1945
4,0.7174,0.8024,0.7556,0.6939,0.7234,0.4356,0.4372
5,0.6739,0.7801,0.6667,0.6667,0.6667,0.3475,0.3475
6,0.663,0.7069,0.7333,0.6346,0.6804,0.328,0.3318
7,0.6739,0.7608,0.7333,0.6471,0.6875,0.3494,0.3524
8,0.7174,0.826,0.6667,0.7317,0.6977,0.4334,0.4351
9,0.6196,0.6969,0.7111,0.5926,0.6465,0.242,0.2467


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6774,0.7766,0.5652,0.7222,0.6341,0.3533,0.3618
1,0.6989,0.7537,0.5556,0.7576,0.641,0.3922,0.4062
2,0.6522,0.7612,0.5333,0.6857,0.6,0.3007,0.3082
3,0.6196,0.6402,0.5778,0.619,0.5977,0.2377,0.2382
4,0.75,0.8165,0.8667,0.6964,0.7723,0.5024,0.5172
5,0.6957,0.7853,0.6889,0.6889,0.6889,0.391,0.391
6,0.6196,0.6761,0.7111,0.5926,0.6465,0.242,0.2467
7,0.6739,0.7664,0.7778,0.6364,0.7,0.3506,0.3591
8,0.7609,0.8435,0.7111,0.7805,0.7442,0.5206,0.5226
9,0.6304,0.7092,0.6889,0.6078,0.6458,0.2626,0.2649


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Gradient Boosting Classifier,0.9481,0.9923,0.9635,0.9231,0.9429,0.8953,0.896


Unnamed: 0,Description,Value
0,Session id,4656
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(922, 9)"
4,Transformed data shape,"(1230, 9)"
5,Transformed train set shape,"(922, 9)"
6,Transformed test set shape,"(308, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6344,0.6153,0.3953,0.68,0.5,0.2425,0.2647
1,0.6559,0.693,0.5349,0.6571,0.5897,0.2988,0.3035
2,0.7065,0.7765,0.5814,0.7353,0.6494,0.4029,0.4111
3,0.6522,0.7038,0.5581,0.6486,0.6,0.2954,0.298
4,0.6413,0.7129,0.5581,0.6316,0.5926,0.2744,0.2761
5,0.7174,0.7986,0.619,0.7222,0.6667,0.4239,0.4277
6,0.663,0.7238,0.5952,0.641,0.6173,0.317,0.3177
7,0.7065,0.8167,0.7381,0.6596,0.6966,0.4142,0.4166
8,0.6957,0.6943,0.6667,0.6667,0.6667,0.3867,0.3867
9,0.6957,0.7867,0.6667,0.6667,0.6667,0.3867,0.3867


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6344,0.6158,0.3953,0.68,0.5,0.2425,0.2647
1,0.6559,0.6926,0.5349,0.6571,0.5897,0.2988,0.3035
2,0.6957,0.776,0.5814,0.7143,0.641,0.3817,0.3878
3,0.6522,0.7038,0.5581,0.6486,0.6,0.2954,0.298
4,0.6413,0.7119,0.5581,0.6316,0.5926,0.2744,0.2761
5,0.7174,0.7986,0.619,0.7222,0.6667,0.4239,0.4277
6,0.663,0.7238,0.5952,0.641,0.6173,0.317,0.3177
7,0.7065,0.8162,0.7381,0.6596,0.6966,0.4142,0.4166
8,0.6957,0.6943,0.6667,0.6667,0.6667,0.3867,0.3867
9,0.6957,0.7871,0.6667,0.6667,0.6667,0.3867,0.3867


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.6818,0.6903,0.4958,0.6082,0.5463,0.3052,0.309


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6452,0.6963,0.4186,0.6923,0.5217,0.266,0.2873
1,0.6882,0.7716,0.5116,0.7333,0.6027,0.3592,0.375
2,0.6522,0.6884,0.4884,0.6774,0.5676,0.2892,0.3001
3,0.6087,0.6502,0.4419,0.6129,0.5135,0.2004,0.2079
4,0.7065,0.7226,0.6047,0.7222,0.6582,0.4046,0.4095
5,0.75,0.8086,0.7619,0.7111,0.7356,0.4991,0.5001
6,0.6522,0.675,0.5238,0.6471,0.5789,0.2882,0.2929
7,0.7174,0.7514,0.7381,0.6739,0.7045,0.4348,0.4364
8,0.6087,0.611,0.4524,0.5938,0.5135,0.1961,0.2012
9,0.6196,0.6226,0.5714,0.5854,0.5783,0.2319,0.2319


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6344,0.6912,0.2791,0.8,0.4138,0.2295,0.297
1,0.6667,0.7572,0.4419,0.7308,0.5507,0.3105,0.3354
2,0.6739,0.7663,0.3953,0.8095,0.5312,0.3239,0.3729
3,0.6413,0.7259,0.4186,0.6923,0.5217,0.2617,0.2829
4,0.6848,0.7005,0.4884,0.75,0.5915,0.3531,0.3747
5,0.7174,0.7586,0.5476,0.7667,0.6389,0.4172,0.4331
6,0.6848,0.7226,0.4762,0.7407,0.5797,0.3461,0.3678
7,0.7391,0.7788,0.6429,0.75,0.6923,0.4682,0.4724
8,0.6196,0.636,0.4524,0.6129,0.5205,0.2169,0.2238
9,0.6413,0.7026,0.4762,0.6452,0.5479,0.2617,0.27


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,K Neighbors Classifier,0.724,0.7747,0.4202,0.7576,0.5405,0.3657,0.3981


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6129,0.0,0.3488,0.6522,0.4545,0.1952,0.2182
1,0.6344,0.0,0.4419,0.6552,0.5278,0.2475,0.2603
2,0.7174,0.0,0.7674,0.6735,0.7174,0.4372,0.4409
3,0.587,0.0,0.3256,0.6087,0.4242,0.1461,0.1635
4,0.5217,0.0,0.2558,0.4783,0.3333,0.0112,0.0126
5,0.7065,0.0,0.619,0.7027,0.6582,0.4029,0.4054
6,0.6304,0.0,0.4286,0.6429,0.5143,0.2348,0.2474
7,0.6848,0.0,0.7619,0.6275,0.6882,0.3755,0.3827
8,0.6522,0.0,0.619,0.619,0.619,0.299,0.299
9,0.6087,0.0,0.619,0.5652,0.5909,0.2174,0.2182


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6452,0.0,0.3953,0.7083,0.5075,0.2635,0.291
1,0.6559,0.0,0.5116,0.6667,0.5789,0.2965,0.3039
2,0.6739,0.0,0.4884,0.7241,0.5833,0.3317,0.3491
3,0.6522,0.0,0.5349,0.6571,0.5897,0.2933,0.298
4,0.6957,0.0,0.6047,0.7027,0.65,0.3834,0.3868
5,0.7174,0.0,0.6429,0.7105,0.675,0.4261,0.4278
6,0.6739,0.0,0.5952,0.6579,0.625,0.3378,0.3391
7,0.7283,0.0,0.7143,0.6977,0.7059,0.4534,0.4535
8,0.6739,0.0,0.6429,0.6429,0.6429,0.3429,0.3429
9,0.6739,0.0,0.6429,0.6429,0.6429,0.3429,0.3429


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,SVM - Linear Kernel,0.6818,0.6396,0.4538,0.6207,0.5243,0.2938,0.3019


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6774,0.7247,0.4186,0.7826,0.5455,0.3293,0.3682
1,0.6774,0.7816,0.5349,0.697,0.6053,0.3404,0.349
2,0.7065,0.7617,0.5581,0.75,0.64,0.4012,0.4137
3,0.6848,0.7468,0.5814,0.6944,0.6329,0.3605,0.3649
4,0.6848,0.7523,0.5581,0.7059,0.6234,0.3587,0.366
5,0.7174,0.795,0.6667,0.7,0.6829,0.4283,0.4287
6,0.6848,0.7712,0.5476,0.697,0.6133,0.3537,0.361
7,0.7717,0.845,0.7857,0.7333,0.7586,0.5426,0.5438
8,0.7391,0.7243,0.6667,0.7368,0.7,0.4702,0.4721
9,0.7609,0.7919,0.6905,0.7632,0.725,0.5144,0.5164


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6452,0.7098,0.4186,0.6923,0.5217,0.266,0.2873
1,0.7312,0.7609,0.6047,0.7647,0.6753,0.4513,0.4603
2,0.7283,0.7803,0.6279,0.75,0.6835,0.4487,0.4541
3,0.7174,0.7297,0.6047,0.7429,0.6667,0.4258,0.4326
4,0.663,0.7352,0.5581,0.6667,0.6076,0.3164,0.3202
5,0.7391,0.801,0.6905,0.725,0.7073,0.4723,0.4727
6,0.7391,0.7921,0.619,0.7647,0.6842,0.4662,0.4737
7,0.7609,0.8138,0.8095,0.7083,0.7556,0.5235,0.528
8,0.7174,0.7676,0.6905,0.6905,0.6905,0.4305,0.4305
9,0.6957,0.7852,0.6667,0.6667,0.6667,0.3867,0.3867


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.8734,0.9489,0.7983,0.8636,0.8297,0.7292,0.7306


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6559,0.7219,0.4651,0.6897,0.5556,0.2918,0.3069
1,0.6989,0.8014,0.5349,0.7419,0.6216,0.3824,0.3965
2,0.7174,0.7935,0.6047,0.7429,0.6667,0.4258,0.4326
3,0.6739,0.7594,0.5581,0.6857,0.6154,0.3375,0.3429
4,0.6739,0.7375,0.5349,0.697,0.6053,0.3356,0.3441
5,0.7174,0.8062,0.619,0.7222,0.6667,0.4239,0.4277
6,0.6739,0.731,0.5238,0.6875,0.5946,0.3301,0.3386
7,0.75,0.8271,0.7143,0.7317,0.7229,0.4952,0.4953
8,0.7391,0.7419,0.619,0.7647,0.6842,0.4662,0.4737
9,0.75,0.8095,0.7143,0.7317,0.7229,0.4952,0.4953


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6237,0.647,0.4884,0.6176,0.5455,0.2318,0.2364
1,0.7097,0.7656,0.6047,0.7222,0.6582,0.4093,0.4142
2,0.7609,0.8173,0.6977,0.7692,0.7317,0.5169,0.519
3,0.6522,0.728,0.6512,0.6222,0.6364,0.3034,0.3036
4,0.6196,0.6972,0.5581,0.6,0.5783,0.2326,0.2331
5,0.7065,0.7671,0.6905,0.6744,0.6824,0.4097,0.4098
6,0.663,0.6929,0.5714,0.6486,0.6076,0.3144,0.3164
7,0.7609,0.839,0.7381,0.7381,0.7381,0.5181,0.5181
8,0.6196,0.679,0.5714,0.5854,0.5783,0.2319,0.2319
9,0.6957,0.7871,0.6429,0.675,0.6585,0.3843,0.3847


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Gradient Boosting Classifier,0.8864,0.9494,0.8067,0.8889,0.8458,0.7562,0.7584


Unnamed: 0,Description,Value
0,Session id,4162
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(922, 9)"
4,Transformed data shape,"(1230, 9)"
5,Transformed train set shape,"(922, 9)"
6,Transformed test set shape,"(308, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6882,0.6806,0.4878,0.7143,0.5797,0.3455,0.3615
1,0.6667,0.7261,0.5122,0.6562,0.5753,0.3078,0.3142
2,0.6522,0.6743,0.4634,0.6552,0.5429,0.2752,0.286
3,0.7174,0.7505,0.6,0.7059,0.6486,0.4149,0.4187
4,0.7065,0.7688,0.575,0.697,0.6301,0.3906,0.3955
5,0.6848,0.6938,0.5,0.6897,0.5797,0.3376,0.3488
6,0.7391,0.7183,0.525,0.8077,0.6364,0.4469,0.4721
7,0.6739,0.7062,0.625,0.625,0.625,0.3365,0.3365
8,0.663,0.6947,0.575,0.6216,0.5974,0.3084,0.3091
9,0.6522,0.6904,0.5,0.625,0.5556,0.2756,0.2802


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6882,0.6811,0.4878,0.7143,0.5797,0.3455,0.3615
1,0.6667,0.7261,0.5122,0.6562,0.5753,0.3078,0.3142
2,0.6522,0.6729,0.4634,0.6552,0.5429,0.2752,0.286
3,0.7174,0.7495,0.6,0.7059,0.6486,0.4149,0.4187
4,0.7065,0.7697,0.575,0.697,0.6301,0.3906,0.3955
5,0.6848,0.6938,0.5,0.6897,0.5797,0.3376,0.3488
6,0.7391,0.7188,0.525,0.8077,0.6364,0.4469,0.4721
7,0.6739,0.7062,0.625,0.625,0.625,0.3365,0.3365
8,0.663,0.6947,0.575,0.6216,0.5974,0.3084,0.3091
9,0.6522,0.6904,0.5,0.625,0.5556,0.2756,0.2802


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.6818,0.6953,0.4839,0.6383,0.5505,0.3114,0.3185


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7097,0.7303,0.5366,0.7333,0.6197,0.3939,0.4065
1,0.7527,0.822,0.5854,0.8,0.6761,0.4837,0.4992
2,0.5217,0.5285,0.2683,0.44,0.3333,-0.0065,-0.0069
3,0.663,0.7034,0.5,0.6452,0.5634,0.2962,0.3025
4,0.6739,0.6846,0.65,0.619,0.6341,0.3403,0.3407
5,0.5978,0.6514,0.475,0.5429,0.5067,0.1698,0.1708
6,0.5978,0.6269,0.35,0.56,0.4308,0.1447,0.1543
7,0.663,0.6526,0.55,0.6286,0.5867,0.3044,0.3063
8,0.6087,0.5993,0.5,0.5556,0.5263,0.1946,0.1953
9,0.6522,0.6529,0.475,0.6333,0.5429,0.2713,0.2786


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7419,0.7927,0.5122,0.84,0.6364,0.454,0.4874
1,0.6882,0.7908,0.4146,0.7727,0.5397,0.3349,0.3721
2,0.5761,0.6528,0.2439,0.5556,0.339,0.0921,0.1091
3,0.6957,0.7687,0.525,0.7,0.6,0.3624,0.3722
4,0.7609,0.7591,0.7,0.7368,0.7179,0.5106,0.5111
5,0.6522,0.7188,0.5,0.625,0.5556,0.2756,0.2802
6,0.6957,0.7067,0.425,0.7727,0.5484,0.3469,0.3822
7,0.6304,0.6913,0.45,0.6,0.5143,0.2257,0.2318
8,0.6848,0.6524,0.5,0.6897,0.5797,0.3376,0.3488
9,0.6196,0.674,0.475,0.5758,0.5205,0.21,0.2127


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,K Neighbors Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6129,0.0,0.6585,0.551,0.6,0.2307,0.2341
1,0.6022,0.0,0.6098,0.5435,0.5747,0.2033,0.2045
2,0.5652,0.0,0.3415,0.5185,0.4118,0.0896,0.0945
3,0.5109,0.0,0.0,0.0,0.0,-0.107,-0.2103
4,0.7391,0.0,0.65,0.7222,0.6842,0.463,0.4649
5,0.6304,0.0,0.275,0.6875,0.3929,0.1921,0.2339
6,0.6739,0.0,0.25,1.0,0.4,0.2737,0.3982
7,0.5435,0.0,0.075,0.375,0.125,-0.0233,-0.0372
8,0.6087,0.0,0.3,0.6,0.4,0.1551,0.1757
9,0.5652,0.0,0.175,0.5,0.2593,0.0437,0.0557


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6667,0.0,0.4878,0.6667,0.5634,0.3041,0.3138
1,0.6882,0.0,0.561,0.6765,0.6133,0.3559,0.3602
2,0.6413,0.0,0.439,0.6429,0.5217,0.2507,0.2624
3,0.663,0.0,0.55,0.6286,0.5867,0.3044,0.3063
4,0.7174,0.0,0.675,0.675,0.675,0.425,0.425
5,0.6522,0.0,0.575,0.6053,0.5897,0.2882,0.2885
6,0.6848,0.0,0.575,0.6571,0.6133,0.3493,0.3515
7,0.6848,0.0,0.675,0.6279,0.6506,0.3642,0.3649
8,0.6196,0.0,0.625,0.5556,0.5882,0.237,0.2384
9,0.6304,0.0,0.45,0.6,0.5143,0.2257,0.2318


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,SVM - Linear Kernel,0.6948,0.6565,0.4597,0.6786,0.5481,0.3303,0.3446


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7849,0.8279,0.6098,0.8621,0.7143,0.5499,0.5711
1,0.7204,0.7892,0.5854,0.7273,0.6486,0.421,0.4278
2,0.5978,0.6284,0.4146,0.5667,0.4789,0.164,0.1694
3,0.7717,0.8094,0.7,0.7568,0.7273,0.5315,0.5327
4,0.7065,0.7736,0.725,0.6444,0.6824,0.4114,0.4138
5,0.7174,0.7726,0.75,0.6522,0.6977,0.4348,0.4385
6,0.6957,0.713,0.575,0.6765,0.6216,0.3699,0.3733
7,0.7391,0.8094,0.725,0.6905,0.7073,0.4723,0.4727
8,0.663,0.7284,0.525,0.6364,0.5753,0.3003,0.3041
9,0.6739,0.7514,0.575,0.6389,0.6053,0.3288,0.3301


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7849,0.8302,0.6829,0.8,0.7368,0.5569,0.5619
1,0.7634,0.8011,0.7561,0.7209,0.7381,0.5226,0.5231
2,0.7174,0.7135,0.6098,0.7143,0.6579,0.4197,0.4235
3,0.7065,0.8019,0.725,0.6444,0.6824,0.4114,0.4138
4,0.6957,0.8034,0.775,0.62,0.6889,0.3981,0.4077
5,0.7174,0.7897,0.725,0.6591,0.6905,0.4316,0.4332
6,0.6957,0.7875,0.55,0.6875,0.6111,0.3661,0.3723
7,0.7174,0.8038,0.825,0.6346,0.7174,0.4442,0.4596
8,0.6196,0.7111,0.625,0.5556,0.5882,0.237,0.2384
9,0.7065,0.7423,0.725,0.6444,0.6824,0.4114,0.4138


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.7825,0.8679,0.7581,0.7176,0.7373,0.5519,0.5525


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7957,0.8372,0.7073,0.8056,0.7532,0.5802,0.5838
1,0.7312,0.8021,0.6585,0.7105,0.6835,0.4505,0.4515
2,0.6304,0.6934,0.4634,0.6129,0.5278,0.2337,0.2399
3,0.6957,0.7938,0.65,0.65,0.65,0.3808,0.3808
4,0.6848,0.7837,0.7,0.6222,0.6588,0.3678,0.37
5,0.6522,0.7558,0.6,0.6,0.6,0.2923,0.2923
6,0.75,0.7726,0.575,0.7931,0.6667,0.4747,0.4904
7,0.7065,0.7952,0.625,0.6757,0.6494,0.3977,0.3986
8,0.6739,0.7197,0.55,0.6471,0.5946,0.3249,0.3279
9,0.6848,0.7832,0.575,0.6571,0.6133,0.3493,0.3515


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7097,0.7613,0.6341,0.6842,0.6582,0.4065,0.4074
1,0.6989,0.7824,0.561,0.697,0.6216,0.3764,0.3826
2,0.6848,0.7073,0.5122,0.7,0.5915,0.3448,0.356
3,0.6304,0.7139,0.475,0.5938,0.5278,0.2303,0.2342
4,0.7391,0.824,0.7,0.7,0.7,0.4692,0.4692
5,0.6196,0.7325,0.55,0.5641,0.557,0.2237,0.2238
6,0.7391,0.8005,0.575,0.7667,0.6571,0.4535,0.4657
7,0.6957,0.8106,0.625,0.6579,0.641,0.3772,0.3775
8,0.6957,0.7269,0.525,0.7,0.6,0.3624,0.3722
9,0.7391,0.7851,0.65,0.7222,0.6842,0.463,0.4649


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Gradient Boosting Classifier,0.8799,0.952,0.8065,0.885,0.8439,0.7466,0.7487


Unnamed: 0,Description,Value
0,Session id,3807
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(922, 9)"
4,Transformed data shape,"(1230, 9)"
5,Transformed train set shape,"(922, 9)"
6,Transformed test set shape,"(308, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.828,0.8784,0.8085,0.8444,0.8261,0.656,0.6566
1,0.8065,0.876,0.766,0.8372,0.8,0.6132,0.6155
2,0.7283,0.8162,0.6087,0.8,0.6914,0.4565,0.4702
3,0.8478,0.9121,0.9574,0.7895,0.8654,0.6941,0.7113
4,0.837,0.8955,0.7234,0.9444,0.8193,0.6754,0.6954
5,0.8696,0.9475,0.9149,0.8431,0.8776,0.7385,0.7413
6,0.8804,0.9541,0.8936,0.875,0.8842,0.7606,0.7608
7,0.8478,0.9333,0.8511,0.8511,0.8511,0.6955,0.6955
8,0.9022,0.9239,0.8936,0.913,0.9032,0.8043,0.8045
9,0.8696,0.9414,0.9362,0.8302,0.88,0.7383,0.7447


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8172,0.8784,0.7872,0.8409,0.8132,0.6346,0.6359
1,0.8065,0.8756,0.766,0.8372,0.8,0.6132,0.6155
2,0.7283,0.8162,0.6087,0.8,0.6914,0.4565,0.4702
3,0.837,0.9135,0.9362,0.7857,0.8544,0.6724,0.6857
4,0.837,0.8955,0.7234,0.9444,0.8193,0.6754,0.6954
5,0.8913,0.948,0.9149,0.8776,0.8958,0.7823,0.783
6,0.8804,0.9541,0.8936,0.875,0.8842,0.7606,0.7608
7,0.8478,0.9333,0.8511,0.8511,0.8511,0.6955,0.6955
8,0.913,0.9239,0.8936,0.9333,0.913,0.8262,0.827
9,0.8696,0.9418,0.9362,0.8302,0.88,0.7383,0.7447


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.7825,0.8583,0.7194,0.7812,0.7491,0.5577,0.5591


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7742,0.8446,0.7234,0.8095,0.764,0.5489,0.5521
1,0.7957,0.8525,0.7447,0.8333,0.7865,0.5918,0.5953
2,0.7283,0.7812,0.6304,0.7838,0.6988,0.4565,0.4655
3,0.8587,0.9298,0.9362,0.8148,0.8713,0.7163,0.7248
4,0.8043,0.8927,0.7021,0.8919,0.7857,0.6104,0.6252
5,0.837,0.8967,0.8298,0.8478,0.8387,0.6739,0.6741
6,0.8587,0.9392,0.8085,0.9048,0.8539,0.7179,0.7222
7,0.8043,0.8903,0.766,0.8372,0.8,0.6092,0.6116
8,0.8478,0.9338,0.8298,0.8667,0.8478,0.6958,0.6965
9,0.8587,0.9111,0.8936,0.84,0.866,0.7169,0.7184


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7849,0.8686,0.7447,0.814,0.7778,0.5702,0.5724
1,0.8065,0.8684,0.766,0.8372,0.8,0.6132,0.6155
2,0.7174,0.7817,0.6087,0.7778,0.6829,0.4348,0.4454
3,0.8261,0.9158,0.8723,0.8039,0.8367,0.6514,0.6538
4,0.8478,0.9168,0.7447,0.9459,0.8333,0.6969,0.7139
5,0.8696,0.9414,0.8511,0.8889,0.8696,0.7393,0.74
6,0.8804,0.9641,0.8085,0.95,0.8736,0.7615,0.7705
7,0.837,0.9234,0.7872,0.881,0.8315,0.6745,0.6785
8,0.8804,0.9447,0.8085,0.95,0.8736,0.7615,0.7705
9,0.8587,0.9385,0.8936,0.84,0.866,0.7169,0.7184


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,K Neighbors Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.0,0.766,0.8372,0.8,0.6132,0.6155
1,0.7527,0.0,0.7021,0.7857,0.7416,0.5059,0.5088
2,0.6957,0.0,0.6522,0.7143,0.6818,0.3913,0.3928
3,0.8478,0.0,0.9149,0.8113,0.86,0.6946,0.7007
4,0.7935,0.0,0.7234,0.85,0.7816,0.5881,0.595
5,0.7935,0.0,0.8723,0.7593,0.8119,0.5854,0.5923
6,0.8261,0.0,0.7447,0.8974,0.814,0.6533,0.6634
7,0.8152,0.0,0.8298,0.8125,0.8211,0.6301,0.6302
8,0.7935,0.0,0.766,0.8182,0.7912,0.5873,0.5886
9,0.8587,0.0,0.8723,0.8542,0.8632,0.7171,0.7173


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.0,0.7872,0.8222,0.8043,0.613,0.6136
1,0.7849,0.0,0.766,0.8,0.7826,0.57,0.5706
2,0.7174,0.0,0.6087,0.7778,0.6829,0.4348,0.4454
3,0.8696,0.0,0.9574,0.8182,0.8824,0.738,0.7495
4,0.8804,0.0,0.8085,0.95,0.8736,0.7615,0.7705
5,0.8696,0.0,0.9149,0.8431,0.8776,0.7385,0.7413
6,0.8804,0.0,0.8936,0.875,0.8842,0.7606,0.7608
7,0.8587,0.0,0.8723,0.8542,0.8632,0.7171,0.7173
8,0.9022,0.0,0.8936,0.913,0.9032,0.8043,0.8045
9,0.8804,0.0,0.9362,0.8462,0.8889,0.7602,0.7647


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,SVM - Linear Kernel,0.7792,0.7758,0.741,0.763,0.7518,0.5531,0.5533


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8172,0.8943,0.8085,0.8261,0.8172,0.6345,0.6346
1,0.7742,0.8677,0.8298,0.75,0.7879,0.5478,0.551
2,0.7609,0.7973,0.7174,0.7857,0.75,0.5217,0.5237
3,0.8587,0.9035,0.9362,0.8148,0.8713,0.7163,0.7248
4,0.8478,0.9085,0.7447,0.9459,0.8333,0.6969,0.7139
5,0.8804,0.9574,0.9362,0.8462,0.8889,0.7602,0.7647
6,0.8913,0.9461,0.8723,0.9111,0.8913,0.7827,0.7835
7,0.8804,0.9518,0.8936,0.875,0.8842,0.7606,0.7608
8,0.8804,0.9499,0.8723,0.8913,0.8817,0.7609,0.761
9,0.8696,0.9478,0.9362,0.8302,0.88,0.7383,0.7447


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8172,0.8996,0.8085,0.8261,0.8172,0.6345,0.6346
1,0.7849,0.8825,0.8085,0.7755,0.7917,0.5696,0.5702
2,0.7609,0.8181,0.7174,0.7857,0.75,0.5217,0.5237
3,0.8478,0.8946,0.9362,0.8,0.8627,0.6944,0.7052
4,0.8261,0.9026,0.7872,0.8605,0.8222,0.6527,0.6551
5,0.9022,0.9504,0.9362,0.88,0.9072,0.804,0.8057
6,0.9239,0.9631,0.9362,0.9167,0.9263,0.8477,0.8479
7,0.8804,0.947,0.8723,0.8913,0.8817,0.7609,0.761
8,0.8696,0.9499,0.8936,0.8571,0.875,0.7388,0.7395
9,0.8804,0.9461,0.9362,0.8462,0.8889,0.7602,0.7647


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.8864,0.9672,0.8561,0.8881,0.8718,0.7698,0.7702


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8495,0.8839,0.8511,0.8511,0.8511,0.6989,0.6989
1,0.8065,0.877,0.8085,0.8085,0.8085,0.6129,0.6129
2,0.7391,0.8417,0.6957,0.7619,0.7273,0.4783,0.4801
3,0.8478,0.8983,0.9149,0.8113,0.86,0.6946,0.7007
4,0.8261,0.9073,0.7447,0.8974,0.814,0.6533,0.6634
5,0.8587,0.9466,0.8936,0.84,0.866,0.7169,0.7184
6,0.8804,0.9707,0.8511,0.9091,0.8791,0.7611,0.7627
7,0.837,0.9437,0.8723,0.82,0.8454,0.6733,0.6747
8,0.8913,0.9385,0.9149,0.8776,0.8958,0.7823,0.783
9,0.8478,0.9437,0.9149,0.8113,0.86,0.6946,0.7007


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.8932,0.8723,0.7736,0.82,0.6123,0.6175
1,0.7634,0.8876,0.8298,0.7358,0.78,0.5262,0.5306
2,0.7826,0.8171,0.8043,0.7708,0.7872,0.5652,0.5658
3,0.8478,0.895,0.9574,0.7895,0.8654,0.6941,0.7113
4,0.8478,0.9012,0.8511,0.8511,0.8511,0.6955,0.6955
5,0.8696,0.9556,0.9362,0.8302,0.88,0.7383,0.7447
6,0.8913,0.9636,0.9362,0.8627,0.898,0.7821,0.7851
7,0.8804,0.9504,0.9149,0.86,0.8866,0.7604,0.762
8,0.8913,0.9504,0.9574,0.8491,0.9,0.7819,0.7887
9,0.837,0.9442,0.9362,0.7857,0.8544,0.6724,0.6857


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Gradient Boosting Classifier,0.8669,0.9596,0.8921,0.8267,0.8581,0.7331,0.735


Unnamed: 0,Description,Value
0,Session id,2436
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(922, 9)"
4,Transformed data shape,"(1230, 9)"
5,Transformed train set shape,"(922, 9)"
6,Transformed test set shape,"(308, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7097,0.7857,0.6136,0.7297,0.6667,0.4129,0.4178
1,0.6774,0.7797,0.6591,0.6591,0.6591,0.353,0.353
2,0.6413,0.7481,0.5909,0.6341,0.6118,0.2792,0.2798
3,0.6957,0.8282,0.6744,0.6744,0.6744,0.3887,0.3887
4,0.7717,0.8187,0.7442,0.7619,0.7529,0.5409,0.541
5,0.7283,0.8363,0.6744,0.725,0.6988,0.4519,0.4528
6,0.8152,0.8728,0.7674,0.825,0.7952,0.6273,0.6286
7,0.6848,0.813,0.6512,0.6667,0.6588,0.366,0.3661
8,0.7935,0.8657,0.814,0.7609,0.7865,0.587,0.5882
9,0.7283,0.8377,0.6977,0.7143,0.7059,0.4534,0.4535


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7204,0.7853,0.6364,0.7368,0.6829,0.4353,0.4391
1,0.6774,0.7792,0.6591,0.6591,0.6591,0.353,0.353
2,0.6413,0.7476,0.5909,0.6341,0.6118,0.2792,0.2798
3,0.6957,0.8282,0.6744,0.6744,0.6744,0.3887,0.3887
4,0.7717,0.8187,0.7442,0.7619,0.7529,0.5409,0.541
5,0.7283,0.8363,0.6744,0.725,0.6988,0.4519,0.4528
6,0.8152,0.8723,0.7674,0.825,0.7952,0.6273,0.6286
7,0.6848,0.8125,0.6512,0.6667,0.6588,0.366,0.3661
8,0.7935,0.8662,0.814,0.7609,0.7865,0.587,0.5882
9,0.7283,0.8367,0.6977,0.7143,0.7059,0.4534,0.4535


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.6883,0.7859,0.6142,0.624,0.619,0.3553,0.3554


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6774,0.7083,0.6591,0.6591,0.6591,0.353,0.353
1,0.7097,0.8003,0.5909,0.7429,0.6582,0.4115,0.4197
2,0.6848,0.7069,0.6136,0.6923,0.6506,0.3654,0.3676
3,0.7174,0.7558,0.7209,0.6889,0.7045,0.434,0.4344
4,0.6957,0.734,0.6279,0.6923,0.6585,0.3852,0.3867
5,0.7065,0.7352,0.5814,0.7353,0.6494,0.4029,0.4111
6,0.7717,0.8189,0.7209,0.775,0.747,0.5396,0.5407
7,0.6848,0.7197,0.5581,0.7059,0.6234,0.3587,0.366
8,0.7609,0.8531,0.7674,0.7333,0.75,0.5211,0.5216
9,0.7609,0.8318,0.7209,0.7561,0.7381,0.5183,0.5188


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6989,0.7811,0.5909,0.7222,0.65,0.3904,0.3965
1,0.7419,0.8163,0.6136,0.7941,0.6923,0.4763,0.4881
2,0.663,0.7396,0.5455,0.6857,0.6076,0.319,0.3254
3,0.75,0.8526,0.6744,0.7632,0.716,0.4943,0.4973
4,0.7717,0.8154,0.7209,0.775,0.747,0.5396,0.5407
5,0.6739,0.7736,0.5814,0.6757,0.625,0.3394,0.3424
6,0.75,0.859,0.6512,0.7778,0.7089,0.4928,0.4988
7,0.7283,0.7867,0.6279,0.75,0.6835,0.4487,0.4541
8,0.8043,0.8522,0.814,0.7778,0.7955,0.6081,0.6087
9,0.75,0.837,0.6744,0.7632,0.716,0.4943,0.4973


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,K Neighbors Classifier,0.724,0.8035,0.5906,0.6944,0.6383,0.4175,0.4211


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6667,0.0,0.7727,0.6182,0.6869,0.3398,0.3495
1,0.7097,0.0,0.6591,0.7073,0.6824,0.4156,0.4165
2,0.6196,0.0,0.5227,0.6216,0.5679,0.2326,0.2354
3,0.7826,0.0,0.7442,0.7805,0.7619,0.5621,0.5627
4,0.6522,0.0,0.5116,0.6667,0.5789,0.2913,0.2987
5,0.6957,0.0,0.6744,0.6744,0.6744,0.3887,0.3887
6,0.6848,0.0,0.6744,0.6591,0.6667,0.3678,0.3679
7,0.7065,0.0,0.5814,0.7353,0.6494,0.4029,0.4111
8,0.6848,0.0,0.5116,0.7333,0.6027,0.3549,0.3708
9,0.6848,0.0,0.814,0.625,0.7071,0.3784,0.394


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6774,0.0,0.6136,0.675,0.6429,0.35,0.3513
1,0.7204,0.0,0.6818,0.7143,0.6977,0.4379,0.4383
2,0.6522,0.0,0.5682,0.6579,0.6098,0.299,0.3017
3,0.663,0.0,0.6512,0.6364,0.6437,0.3242,0.3242
4,0.7391,0.0,0.7209,0.7209,0.7209,0.476,0.476
5,0.6848,0.0,0.6279,0.675,0.6506,0.3642,0.3649
6,0.75,0.0,0.7209,0.7381,0.7294,0.4971,0.4973
7,0.7174,0.0,0.6744,0.7073,0.6905,0.4307,0.4312
8,0.7391,0.0,0.7674,0.7021,0.7333,0.479,0.4808
9,0.75,0.0,0.7442,0.7273,0.7356,0.4986,0.4987


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,SVM - Linear Kernel,0.7078,0.6985,0.6457,0.6457,0.6457,0.3971,0.3971


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6667,0.7846,0.6591,0.6444,0.6517,0.3322,0.3323
1,0.7419,0.8186,0.7273,0.7273,0.7273,0.4824,0.4824
2,0.6848,0.7498,0.6136,0.6923,0.6506,0.3654,0.3676
3,0.7717,0.8263,0.7209,0.775,0.747,0.5396,0.5407
4,0.7717,0.8336,0.7674,0.75,0.7586,0.5422,0.5423
5,0.7065,0.8047,0.6512,0.7,0.6747,0.408,0.4089
6,0.8478,0.8652,0.814,0.8537,0.8333,0.6935,0.6941
7,0.7174,0.8237,0.5814,0.7576,0.6579,0.4242,0.435
8,0.7283,0.8472,0.7907,0.68,0.7312,0.4596,0.4649
9,0.75,0.8384,0.6744,0.7632,0.716,0.4943,0.4973


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7097,0.7774,0.7273,0.6809,0.7033,0.4197,0.4206
1,0.7419,0.8224,0.75,0.7174,0.7333,0.4836,0.484
2,0.6522,0.7424,0.6136,0.6429,0.6279,0.3017,0.302
3,0.7174,0.7893,0.6977,0.6977,0.6977,0.4324,0.4324
4,0.8152,0.8234,0.8372,0.7826,0.809,0.6304,0.6318
5,0.6848,0.8049,0.6744,0.6591,0.6667,0.3678,0.3679
6,0.8043,0.8538,0.7674,0.8049,0.7857,0.6059,0.6065
7,0.7065,0.8178,0.5814,0.7353,0.6494,0.4029,0.4111
8,0.6957,0.8258,0.7674,0.6471,0.7021,0.3956,0.4016
9,0.7283,0.8244,0.6977,0.7143,0.7059,0.4534,0.4535


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7204,0.769,0.75,0.6875,0.7174,0.4418,0.4435
1,0.7312,0.8275,0.75,0.7021,0.7253,0.4627,0.4636
2,0.6957,0.7666,0.6364,0.7,0.6667,0.3878,0.3893
3,0.8043,0.8396,0.7907,0.7907,0.7907,0.607,0.607
4,0.7826,0.8505,0.7442,0.7805,0.7619,0.5621,0.5627
5,0.7717,0.8453,0.7209,0.775,0.747,0.5396,0.5407
6,0.8152,0.8533,0.814,0.7955,0.8046,0.6294,0.6295
7,0.6848,0.8116,0.5349,0.7188,0.6133,0.3568,0.3679
8,0.7391,0.8358,0.7674,0.7021,0.7333,0.479,0.4808
9,0.7826,0.8614,0.7209,0.7949,0.7561,0.5609,0.563


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7097,0.7699,0.7045,0.6889,0.6966,0.4183,0.4184
1,0.7204,0.7681,0.7727,0.68,0.7234,0.4431,0.4468
2,0.6522,0.7098,0.6136,0.6429,0.6279,0.3017,0.302
3,0.7391,0.7793,0.7209,0.7209,0.7209,0.476,0.476
4,0.7283,0.8367,0.6512,0.7368,0.6914,0.4503,0.453
5,0.7174,0.804,0.6977,0.6977,0.6977,0.4324,0.4324
6,0.7391,0.8282,0.6512,0.7568,0.7,0.4715,0.4757
7,0.6413,0.7048,0.5116,0.6471,0.5714,0.2702,0.2757
8,0.6413,0.7399,0.6977,0.6,0.6452,0.2867,0.29
9,0.7717,0.8249,0.7674,0.75,0.7586,0.5422,0.5423


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Gradient Boosting Classifier,0.8636,0.9441,0.8504,0.8244,0.8372,0.7199,0.7202


Unnamed: 0,Description,Value
0,Session id,1829
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(922, 9)"
4,Transformed data shape,"(1230, 9)"
5,Transformed train set shape,"(922, 9)"
6,Transformed test set shape,"(308, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8495,0.9191,0.8478,0.8478,0.8478,0.6989,0.6989
1,0.7634,0.8719,0.7174,0.7857,0.75,0.5264,0.5284
2,0.7609,0.8615,0.6889,0.7949,0.7381,0.5202,0.5247
3,0.7826,0.8355,0.8667,0.7358,0.7959,0.5667,0.5754
4,0.8261,0.9362,0.8913,0.7885,0.8367,0.6522,0.6578
5,0.8478,0.9301,0.8478,0.8478,0.8478,0.6957,0.6957
6,0.8261,0.914,0.8478,0.8125,0.8298,0.6522,0.6528
7,0.8478,0.9045,0.8043,0.881,0.8409,0.6957,0.6983
8,0.8478,0.9173,0.8696,0.8333,0.8511,0.6957,0.6963
9,0.8478,0.9452,0.7609,0.9211,0.8333,0.6957,0.7064


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8495,0.9195,0.8478,0.8478,0.8478,0.6989,0.6989
1,0.7634,0.8719,0.7174,0.7857,0.75,0.5264,0.5284
2,0.7609,0.8619,0.6889,0.7949,0.7381,0.5202,0.5247
3,0.7826,0.8355,0.8667,0.7358,0.7959,0.5667,0.5754
4,0.8261,0.9357,0.8913,0.7885,0.8367,0.6522,0.6578
5,0.8478,0.9301,0.8478,0.8478,0.8478,0.6957,0.6957
6,0.8261,0.914,0.8478,0.8125,0.8298,0.6522,0.6528
7,0.8478,0.905,0.8043,0.881,0.8409,0.6957,0.6983
8,0.8478,0.9173,0.8696,0.8333,0.8511,0.6957,0.6963
9,0.8478,0.9452,0.7609,0.9211,0.8333,0.6957,0.7064


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.7825,0.8712,0.7537,0.7481,0.7509,0.5579,0.5579


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7849,0.8543,0.7826,0.7826,0.7826,0.5698,0.5698
1,0.8065,0.8173,0.8043,0.8043,0.8043,0.6129,0.6129
2,0.7935,0.8418,0.7556,0.8095,0.7816,0.5862,0.5874
3,0.7609,0.8165,0.8222,0.7255,0.7708,0.5229,0.5274
4,0.8261,0.8937,0.8913,0.7885,0.8367,0.6522,0.6578
5,0.7935,0.8795,0.8043,0.7872,0.7957,0.587,0.5871
6,0.9022,0.9501,0.8913,0.9111,0.9011,0.8043,0.8045
7,0.8804,0.9286,0.8478,0.907,0.8764,0.7609,0.7625
8,0.8696,0.8897,0.8478,0.8864,0.8667,0.7391,0.7398
9,0.8478,0.9402,0.7391,0.9444,0.8293,0.6957,0.7127


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.9033,0.8261,0.7917,0.8085,0.613,0.6136
1,0.8387,0.8323,0.8478,0.8298,0.8387,0.6775,0.6776
2,0.8043,0.843,0.7778,0.814,0.7955,0.6081,0.6087
3,0.7826,0.8556,0.8667,0.7358,0.7959,0.5667,0.5754
4,0.8478,0.9376,0.913,0.8077,0.8571,0.6957,0.7016
5,0.8261,0.897,0.8261,0.8261,0.8261,0.6522,0.6522
6,0.8804,0.9475,0.8696,0.8889,0.8791,0.7609,0.761
7,0.8696,0.957,0.8261,0.9048,0.8636,0.7391,0.7419
8,0.8913,0.9277,0.8913,0.8913,0.8913,0.7826,0.7826
9,0.8804,0.953,0.7826,0.973,0.8675,0.7609,0.7759


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,K Neighbors Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8172,0.0,0.7609,0.8537,0.8046,0.6339,0.6376
1,0.7742,0.0,0.7391,0.7907,0.764,0.548,0.5492
2,0.7174,0.0,0.6,0.7714,0.675,0.4318,0.4425
3,0.7935,0.0,0.7556,0.8095,0.7816,0.5862,0.5874
4,0.8587,0.0,0.8696,0.8511,0.8602,0.7174,0.7176
5,0.7826,0.0,0.7391,0.8095,0.7727,0.5652,0.5674
6,0.8152,0.0,0.8913,0.7736,0.8283,0.6304,0.6379
7,0.7935,0.0,0.7609,0.814,0.7865,0.587,0.5882
8,0.8261,0.0,0.8696,0.8,0.8333,0.6522,0.6547
9,0.8261,0.0,0.7826,0.8571,0.8182,0.6522,0.6547


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.828,0.0,0.8043,0.8409,0.8222,0.6557,0.6563
1,0.7957,0.0,0.7609,0.814,0.7865,0.5911,0.5923
2,0.7717,0.0,0.7333,0.7857,0.7586,0.5426,0.5438
3,0.7826,0.0,0.8444,0.7451,0.7917,0.5662,0.5711
4,0.8478,0.0,0.913,0.8077,0.8571,0.6957,0.7016
5,0.8587,0.0,0.8478,0.8667,0.8571,0.7174,0.7176
6,0.8587,0.0,0.8478,0.8667,0.8571,0.7174,0.7176
7,0.8478,0.0,0.7826,0.9,0.8372,0.6957,0.7016
8,0.8696,0.0,0.8913,0.8542,0.8723,0.7391,0.7398
9,0.8696,0.0,0.7826,0.9474,0.8571,0.7391,0.7506


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,SVM - Linear Kernel,0.7825,0.78,0.7612,0.7445,0.7528,0.5586,0.5587


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.9207,0.8478,0.78,0.8125,0.6132,0.6155
1,0.8065,0.8723,0.8043,0.8043,0.8043,0.6129,0.6129
2,0.7717,0.879,0.7333,0.7857,0.7586,0.5426,0.5438
3,0.7935,0.8813,0.8889,0.7407,0.8081,0.5885,0.6
4,0.8478,0.9409,0.8696,0.8333,0.8511,0.6957,0.6963
5,0.8478,0.9435,0.8478,0.8478,0.8478,0.6957,0.6957
6,0.8913,0.97,0.8913,0.8913,0.8913,0.7826,0.7826
7,0.8587,0.9468,0.8043,0.9024,0.8506,0.7174,0.7217
8,0.8261,0.9126,0.8261,0.8261,0.8261,0.6522,0.6522
9,0.8587,0.9601,0.7609,0.9459,0.8434,0.7174,0.7315


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7957,0.849,0.8696,0.7547,0.8081,0.592,0.5988
1,0.7742,0.778,0.8043,0.7551,0.7789,0.5486,0.5498
2,0.7283,0.7643,0.8444,0.6786,0.7525,0.4591,0.4727
3,0.7065,0.7764,0.9333,0.6364,0.7568,0.4185,0.4693
4,0.7935,0.866,0.8261,0.7755,0.8,0.587,0.5882
5,0.837,0.879,0.8478,0.8298,0.8387,0.6739,0.6741
6,0.8478,0.9031,0.9348,0.7963,0.86,0.6957,0.7064
7,0.7935,0.8355,0.8913,0.7455,0.8119,0.587,0.5985
8,0.8043,0.827,0.8696,0.7692,0.8163,0.6087,0.6139
9,0.7609,0.8308,0.6739,0.8158,0.7381,0.5217,0.5298


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7957,0.907,0.7826,0.8,0.7912,0.5913,0.5914
1,0.8172,0.8848,0.7826,0.8372,0.809,0.6341,0.6354
2,0.8261,0.8832,0.7778,0.8537,0.814,0.6514,0.6538
3,0.8261,0.8671,0.8667,0.7959,0.8298,0.6527,0.6551
4,0.8478,0.9395,0.8478,0.8478,0.8478,0.6957,0.6957
5,0.8804,0.9466,0.8478,0.907,0.8764,0.7609,0.7625
6,0.9239,0.9792,0.9783,0.8824,0.9278,0.8478,0.8529
7,0.8696,0.9627,0.8478,0.8864,0.8667,0.7391,0.7398
8,0.8043,0.9159,0.8043,0.8043,0.8043,0.6087,0.6087
9,0.8587,0.9575,0.7609,0.9459,0.8434,0.7174,0.7315


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8387,0.907,0.8261,0.8444,0.8352,0.6773,0.6775
1,0.7742,0.8696,0.7174,0.8049,0.7586,0.5478,0.551
2,0.8043,0.8816,0.7778,0.814,0.7955,0.6081,0.6087
3,0.8043,0.8837,0.8889,0.7547,0.8163,0.61,0.6194
4,0.8913,0.9405,0.9565,0.8462,0.898,0.7826,0.7894
5,0.8804,0.9499,0.8913,0.8723,0.8817,0.7609,0.761
6,0.8696,0.966,0.8913,0.8542,0.8723,0.7391,0.7398
7,0.8804,0.9442,0.8478,0.907,0.8764,0.7609,0.7625
8,0.837,0.923,0.8261,0.8444,0.8352,0.6739,0.6741
9,0.837,0.9612,0.7391,0.9189,0.8193,0.6739,0.6872


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Gradient Boosting Classifier,0.9156,0.9656,0.9179,0.8913,0.9044,0.8289,0.8291


---

# Step 5: Measure Population Fidelity (PF)

In [24]:
import pandas as pd 
import sys
import os

sys.path.append('../src')
from PF_metrics import compute_all_pf_measures
from utils import (getExperimentConfig, 
                   getPicklesFromDir, 
                   get_synthetic_filepaths_from_original_data_id)

config = getExperimentConfig()
folders = config['folders']

settings = getPicklesFromDir(folders['settings_dir'])

In [25]:
pf_measure_columns = [
    'DatasetName',
    'pMSE',
    'pMSE_time',
    's_pMSE',
    's_pMSE_time',
    'Cluster_1',   # num of clusters = 1% of dataset_size
    'Cluster_1_time',   # num of clusters = 1% of dataset_size
    #'Cluster_5',     # num of clusters = 5% of dataset_size
    #'Cluster_10',    # num of clusters = 10% of dataset_size
    'BNLogLikelihood',
    'BNLogLikelihood_time',
    'GMLogLikelihood',
    'GMLogLikelihood_time',
    'ContinousKLDivergence',
    'ContinousKLDivergence_time',
    'DiscreteKLDivergence',
    'DiscreteKLDivergence_time',
    'KSComplement',
    'KSComplement_time',
    'CSTest',
    'CSTest_time',
    'CrossClassification', #Cross-classification 
    'CrossClassification_time', #Cross-classification 
]


result_df = pd.DataFrame(columns=pf_measure_columns)

for dataset_setting in settings:
    original_data = pd.read_csv(folders['real_dir']+dataset_setting['meta']['filename'])
    
    metadata = dataset_setting['meta']['meta_data']
    original_data_id = dataset_setting['meta']['id']
    
    synthetic_datasets = get_synthetic_filepaths_from_original_data_id(original_data_id)
    
    
    for sd_filename in synthetic_datasets:
        
        sd_id = os.path.splitext(sd_filename)[0]
        sd_path = folders['sd_dir']+sd_filename

        synthetic_data = pd.read_csv(sd_path)
        
        pf_measures = compute_all_pf_measures(original_data=original_data,
                                              synthetic_data=synthetic_data,
                                              metadata=metadata,
                                              SD_id=sd_id)
        
        result_df = pd.concat([result_df, pf_measures], axis=0, ignore_index=True)


# save the results
result_df.to_csv('../data/pf_measures.csv', index=False)
result_df.to_html('../data/pf_measures.html')

num samples data: 1536, num_klusters:15
(1536, 9)
<class 'pandas.core.frame.DataFrame'>
num samples data: 1536, num_klusters:15
(1536, 9)
<class 'pandas.core.frame.DataFrame'>
num samples data: 1536, num_klusters:15
(1536, 9)
<class 'pandas.core.frame.DataFrame'>
num samples data: 1536, num_klusters:15
(1536, 9)
<class 'pandas.core.frame.DataFrame'>
num samples data: 1536, num_klusters:15
(1536, 9)
<class 'pandas.core.frame.DataFrame'>
num samples data: 1536, num_klusters:15
(1536, 9)
<class 'pandas.core.frame.DataFrame'>


In [26]:
display(result_df)

Unnamed: 0,DatasetName,pMSE,pMSE_time,s_pMSE,s_pMSE_time,Cluster_1,Cluster_1_time,BNLogLikelihood,BNLogLikelihood_time,GMLogLikelihood,...,ContinousKLDivergence,ContinousKLDivergence_time,DiscreteKLDivergence,DiscreteKLDivergence_time,KSComplement,KSComplement_time,CSTest,CSTest_time,CrossClassification,CrossClassification_time
0,SD0Q1_0,0.230725,0.021544,223.564075,0.025691,22.642776,11.974487,-0.751548,0.144707,-28.667452,...,0.377548,0.082617,,0.000251,0.534993,0.005441,0.724537,0.000837,0.433939,0.094462
1,SD0Q1_1,0.225934,0.018198,210.366665,0.018104,21.753316,13.063949,-0.730436,0.03063,-32.838181,...,0.321741,0.083083,,0.001183,0.522461,0.009147,0.778423,0.00132,0.438625,0.120435
2,SD0Q1_2,0.23165,0.018921,219.655623,0.021202,21.88399,11.873773,-0.712572,0.034391,-32.815682,...,0.330569,0.080604,,0.000289,0.515625,0.007362,0.824878,0.001012,0.464286,0.101539
3,SD0Q2_0,0.030482,0.020946,26.331309,0.025975,1.566102,15.072511,-0.766164,0.039285,-27.012169,...,0.652129,0.079691,,0.000292,0.856934,0.00717,0.687998,0.001047,0.647746,0.12827
4,SD0Q3_0,0.054103,0.026947,45.735094,0.022852,4.88365,18.09395,-0.736932,0.030407,-32.410436,...,0.461794,0.08338,,0.000414,0.813802,0.007361,0.761715,0.001036,0.623423,0.101744
5,SD0Q4_0,0.020818,0.020946,21.065784,0.018255,1.546847,18.042187,-0.757232,0.032635,-27.525399,...,0.69772,0.084665,,0.000225,0.885254,0.00522,0.710248,0.000824,0.657321,0.104943


In [27]:
import time
end_time = time.perf_counter()

timed = end_time-start_time

print(f"Experiment took {timed} seconds")
print(f"Which is {timed/60} minutes.")

Experiment took 3114.8467543 seconds
Which is 51.91411257166667 minutes.


#### TODO:
Need to fix some error with applying higher number of groups of clusters to the cluster analysis metric

In [28]:
#from PF_metrics import *
#clus = cluster_metric(pd.read_csv('../data/real/diabetes.csv'), 
#                      synthetic_data, 
#                      num_clusters=69, 
#                      metadata=settings[0]['meta']['meta_data'])

num samples data: 1536, num_klusters:69
(1536, 9)
<class 'pandas.core.frame.DataFrame'>


ValueError: Clustering algorithm could not initialize. Consider assigning the initial clusters manually.