In [1]:
import pandas as pd 
import os 
import sys 
import pickle
from pycaret.classification import *

sys.path.append('../src')
from utils import getExperimentConfig, getPicklesFromDir, run_pycaret_setup

In [2]:
# Get global variables for the experiment
config = getExperimentConfig()
# Get folders
folders = config['folders']
# Load dataset specific settings (from the real-data)
dataset_settings = getPicklesFromDir(folders['settings_dir'])

In [3]:
dataset_settings[0]

{'meta': {'name': 'Diabetes',
  'id': 'D0',
  'filename': 'diabetes.csv',
  'target': 'Outcome',
  'ordinal_features': None,
  'numeric_features': ['DiabetesPedigreeFunction',
   'BMI',
   'Insulin',
   'Glucose',
   'Age',
   'SkinThickness',
   'BloodPressure',
   'Pregnancies'],
  'text_features': None,
  'categorical_features': None,
  'sd_meta_list': [{'id': 'SD0Q1_0',
    'path': '../data/synthetic/SD0Q1_0.csv',
    'sdg_params': {'epochs': 300, 'batch_size': 50}},
   {'id': 'SD0Q1_1',
    'path': '../data/synthetic/SD0Q1_1.csv',
    'sdg_params': {'epochs': 300, 'batch_size': 50}}]},
 'setup_param': {'target': 'Outcome',
  'train_size': 0.8,
  'fold_strategy': 'stratifiedkfold',
  'fold': 10,
  'ordinal_features': None,
  'numeric_features': ['DiabetesPedigreeFunction',
   'BMI',
   'Insulin',
   'Glucose',
   'Age',
   'SkinThickness',
   'BloodPressure',
   'Pregnancies'],
  'text_features': None,
  'categorical_features': None,
  'imputation_type': 'simple',
  'numeric_imputa

Psuedo code for how to test the synthetic datasets,


```
for each real_dataset in sd_meta:
    Get real_dataset settings and parameters
    
    in setup_param, set 'log_data' = false, to disable saving the train test split, no need to save it
    in setup_param update 'system_log' to S + current-value
    in setup_param, set 'experiment_custom_tags' to 'Synthetic'
    
    for each sd_meta_info in sd_meta[real_dataset]:
    
        update 'experiment_name' in in setup_param to the current sd_id
        
        run setup()
        for each ml in ml_models:
        
            clf = run create_model(ml)
            run tune_model(clf, optimize='Accuracy', n_iter)
            
            test?
        
    
    
```

In [4]:
for settings in dataset_settings:
    # update system_log name
    settings['setup_param']['system_log'] = folders['log_dir']+"Step4_SD"
    # disable saving train-test split data (to save space)
    settings['setup_param']['log_data'] = False

    for sd_meta in settings['meta']['sd_meta_list']:
        # Add custom tags to the logg, defining dataset type, Id and SDG hyperparameters
        settings['setup_param']['experiment_custom_tags'] = {
            'Dataset Type': 'Synthetic',
            'Dataset ID': sd_meta['id'],
            'SDG param': sd_meta['sdg_params']
        }

        run_pycaret_setup(sd_meta['path'], settings['setup_param'])

        for ml_model in config['clf']['ml_models']:
            # create then tune the model
            model = create_model(ml_model)
            tuned_model = tune_model(model, **config['clf']['tuning_param'])

Unnamed: 0,Description,Value
0,Session id,6988
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 10)"
4,Transformed data shape,"(768, 10)"
5,Transformed train set shape,"(614, 10)"
6,Transformed test set shape,"(154, 10)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.7778,0.4118,0.7778,0.5385,0.4303,0.4652
1,0.6935,0.6275,0.2778,0.4545,0.3448,0.1598,0.168
2,0.7742,0.7639,0.4444,0.6667,0.5333,0.3922,0.4062
3,0.7903,0.774,0.3889,0.7778,0.5185,0.403,0.4425
4,0.7541,0.7139,0.4118,0.5833,0.4828,0.3277,0.3363
5,0.8525,0.8489,0.5882,0.8333,0.6897,0.5966,0.6122
6,0.7869,0.6578,0.2941,0.8333,0.4348,0.3386,0.4086
7,0.7869,0.7861,0.2941,0.8333,0.4348,0.3386,0.4086
8,0.7705,0.7513,0.4706,0.6154,0.5333,0.3847,0.3908
9,0.8033,0.7928,0.4706,0.7273,0.5714,0.4513,0.4693


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.7791,0.4118,0.7778,0.5385,0.4303,0.4652
1,0.6935,0.6263,0.2778,0.4545,0.3448,0.1598,0.168
2,0.7742,0.7626,0.4444,0.6667,0.5333,0.3922,0.4062
3,0.7903,0.7727,0.3889,0.7778,0.5185,0.403,0.4425
4,0.7377,0.7126,0.4118,0.5385,0.4667,0.2968,0.3015
5,0.8525,0.8489,0.5882,0.8333,0.6897,0.5966,0.6122
6,0.7869,0.6564,0.2941,0.8333,0.4348,0.3386,0.4086
7,0.8033,0.7861,0.3529,0.8571,0.5,0.4029,0.4645
8,0.7705,0.7513,0.4706,0.6154,0.5333,0.3847,0.3908
9,0.8033,0.7928,0.4706,0.7273,0.5714,0.4513,0.4693


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7903,0.7235,0.4118,0.7,0.5185,0.3958,0.4186
1,0.6613,0.6465,0.1667,0.3333,0.2222,0.0356,0.039
2,0.8065,0.7001,0.4444,0.8,0.5714,0.4593,0.4924
3,0.7742,0.7923,0.3889,0.7,0.5,0.3692,0.3958
4,0.6721,0.5956,0.2941,0.3846,0.3333,0.121,0.123
5,0.7213,0.7574,0.3529,0.5,0.4138,0.2381,0.2443
6,0.7705,0.6664,0.2941,0.7143,0.4167,0.3034,0.3498
7,0.8033,0.7152,0.4118,0.7778,0.5385,0.4281,0.4631
8,0.7377,0.6932,0.4706,0.5333,0.5,0.3232,0.3243
9,0.7541,0.7861,0.3529,0.6,0.4444,0.2999,0.3173


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.7948,0.2941,1.0,0.4545,0.3769,0.4819
1,0.6774,0.6755,0.1111,0.3333,0.1667,0.0252,0.031
2,0.7419,0.7879,0.2778,0.625,0.3846,0.2508,0.2838
3,0.8065,0.8662,0.3333,1.0,0.5,0.4151,0.5118
4,0.7377,0.7005,0.1176,0.6667,0.2,0.127,0.1968
5,0.7869,0.8623,0.2941,0.8333,0.4348,0.3386,0.4086
6,0.7869,0.7099,0.2941,0.8333,0.4348,0.3386,0.4086
7,0.7869,0.8249,0.2353,1.0,0.381,0.3074,0.4262
8,0.7705,0.8075,0.3529,0.6667,0.4615,0.3328,0.36
9,0.7541,0.8449,0.1765,0.75,0.2857,0.2009,0.2785


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7419,0.7098,0.4118,0.5385,0.4667,0.3004,0.3051
1,0.6774,0.6263,0.3333,0.4286,0.375,0.1622,0.1645
2,0.7258,0.7576,0.3889,0.5385,0.4516,0.2751,0.2816
3,0.8387,0.7992,0.5556,0.8333,0.6667,0.5658,0.5861
4,0.7377,0.7126,0.5294,0.5294,0.5294,0.3476,0.3476
5,0.8033,0.8008,0.5882,0.6667,0.625,0.4924,0.4941
6,0.7213,0.6671,0.3529,0.5,0.4138,0.2381,0.2443
7,0.8033,0.7594,0.5294,0.6923,0.6,0.4726,0.4801
8,0.7541,0.7701,0.5294,0.5625,0.5455,0.3771,0.3775
9,0.8361,0.8155,0.6471,0.7333,0.6875,0.577,0.5791


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7581,0.715,0.4118,0.5833,0.4828,0.3309,0.3395
1,0.6613,0.6301,0.2778,0.3846,0.3226,0.1045,0.107
2,0.7258,0.7563,0.3889,0.5385,0.4516,0.2751,0.2816
3,0.8548,0.7967,0.5556,0.9091,0.6897,0.602,0.6331
4,0.7377,0.7126,0.5294,0.5294,0.5294,0.3476,0.3476
5,0.8197,0.8021,0.5294,0.75,0.6207,0.507,0.5202
6,0.7541,0.6644,0.3529,0.6,0.4444,0.2999,0.3173
7,0.8033,0.7607,0.4706,0.7273,0.5714,0.4513,0.4693
8,0.7541,0.7741,0.5294,0.5625,0.5455,0.3771,0.3775
9,0.8361,0.8155,0.5882,0.7692,0.6667,0.5605,0.5694


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7258,0.0,0.6471,0.5,0.5641,0.3689,0.3754
1,0.6452,0.0,0.0,0.0,0.0,-0.118,-0.168
2,0.629,0.0,0.0556,0.1429,0.08,-0.0986,-0.1159
3,0.7097,0.0,0.3333,0.5,0.4,0.2185,0.2263
4,0.7869,0.0,0.4118,0.7,0.5185,0.3933,0.4161
5,0.7869,0.0,0.3529,0.75,0.48,0.3671,0.4084
6,0.7377,0.0,0.4706,0.5333,0.5,0.3232,0.3243
7,0.7541,0.0,0.5294,0.5625,0.5455,0.3771,0.3775
8,0.7377,0.0,0.4706,0.5333,0.5,0.3232,0.3243
9,0.6885,0.0,0.0588,0.25,0.0952,-0.0122,-0.017


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7903,0.0,0.2941,0.8333,0.4348,0.3404,0.4103
1,0.6613,0.0,0.2222,0.3636,0.2759,0.0713,0.075
2,0.7742,0.0,0.4444,0.6667,0.5333,0.3922,0.4062
3,0.8226,0.0,0.4444,0.8889,0.5926,0.4948,0.5434
4,0.7213,0.0,0.3529,0.5,0.4138,0.2381,0.2443
5,0.8197,0.0,0.4706,0.8,0.5926,0.4866,0.5149
6,0.7869,0.0,0.2941,0.8333,0.4348,0.3386,0.4086
7,0.7869,0.0,0.4118,0.7,0.5185,0.3933,0.4161
8,0.7869,0.0,0.5294,0.6429,0.5806,0.4396,0.4433
9,0.7869,0.0,0.4118,0.7,0.5185,0.3933,0.4161


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.8065,0.4118,0.7778,0.5385,0.4303,0.4652
1,0.6935,0.654,0.2222,0.4444,0.2963,0.1274,0.1399
2,0.7258,0.7323,0.2778,0.5556,0.3704,0.2193,0.2408
3,0.7581,0.8775,0.3889,0.6364,0.4828,0.3367,0.3541
4,0.6885,0.6872,0.2941,0.4167,0.3448,0.1484,0.1523
5,0.7705,0.7995,0.3529,0.6667,0.4615,0.3328,0.36
6,0.7869,0.6858,0.2941,0.8333,0.4348,0.3386,0.4086
7,0.7705,0.8155,0.2941,0.7143,0.4167,0.3034,0.3498
8,0.7541,0.7794,0.4706,0.5714,0.5161,0.3534,0.3563
9,0.8361,0.8463,0.4118,1.0,0.5833,0.5024,0.5792


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7419,0.7974,0.6471,0.5238,0.5789,0.3959,0.4005
1,0.6129,0.6692,0.3889,0.35,0.3684,0.0905,0.0907
2,0.7742,0.7588,0.7222,0.5909,0.65,0.4858,0.4911
3,0.7581,0.8396,0.6667,0.5714,0.6154,0.4404,0.4432
4,0.7213,0.738,0.7647,0.5,0.6047,0.4037,0.4254
5,0.7213,0.8155,0.7647,0.5,0.6047,0.4037,0.4254
6,0.7049,0.7059,0.5294,0.4737,0.5,0.2916,0.2925
7,0.7541,0.8222,0.6471,0.55,0.5946,0.4198,0.4226
8,0.7377,0.8209,0.6471,0.5238,0.5789,0.3915,0.3961
9,0.7541,0.8556,0.7647,0.5417,0.6341,0.457,0.4724


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.8176,0.5294,0.6923,0.6,0.4753,0.4828
1,0.6452,0.6982,0.1667,0.3,0.2143,0.0087,0.0093
2,0.7903,0.8106,0.5,0.6923,0.5806,0.4457,0.4562
3,0.8387,0.8782,0.5,0.9,0.6429,0.5494,0.589
4,0.7377,0.7293,0.3529,0.5455,0.4286,0.2684,0.2791
5,0.8197,0.8189,0.5294,0.75,0.6207,0.507,0.5202
6,0.7705,0.7159,0.3529,0.6667,0.4615,0.3328,0.36
7,0.8197,0.8242,0.5294,0.75,0.6207,0.507,0.5202
8,0.7377,0.8175,0.4706,0.5333,0.5,0.3232,0.3243
9,0.8197,0.8155,0.4706,0.8,0.5926,0.4866,0.5149


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7903,0.8314,0.4706,0.6667,0.5517,0.4201,0.431
1,0.6774,0.7273,0.2778,0.4167,0.3333,0.1317,0.1364
2,0.7742,0.8056,0.5,0.6429,0.5625,0.4135,0.4194
3,0.8387,0.8434,0.5556,0.8333,0.6667,0.5658,0.5861
4,0.7869,0.7059,0.4706,0.6667,0.5517,0.4173,0.4282
5,0.8197,0.7861,0.5294,0.75,0.6207,0.507,0.5202
6,0.7541,0.7447,0.3529,0.6,0.4444,0.2999,0.3173
7,0.8361,0.8436,0.5882,0.7692,0.6667,0.5605,0.5694
8,0.7541,0.7901,0.5294,0.5625,0.5455,0.3771,0.3775
9,0.8197,0.8356,0.5294,0.75,0.6207,0.507,0.5202


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0,Description,Value
0,Session id,7472
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 10)"
4,Transformed data shape,"(768, 10)"
5,Transformed train set shape,"(614, 10)"
6,Transformed test set shape,"(154, 10)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8226,0.8652,0.5333,0.6667,0.5926,0.481,0.4859
1,0.7742,0.7702,0.2,0.6,0.3,0.2037,0.2476
2,0.7742,0.7292,0.2857,0.5,0.3636,0.2386,0.2524
3,0.7903,0.8185,0.4286,0.5455,0.48,0.351,0.3551
4,0.7377,0.769,0.2857,0.4,0.3333,0.1757,0.1795
5,0.7541,0.7416,0.1429,0.4,0.2105,0.1021,0.1211
6,0.8361,0.9103,0.4286,0.75,0.5455,0.4544,0.4809
7,0.8033,0.8313,0.5,0.5833,0.5385,0.4144,0.4164
8,0.7541,0.7492,0.1429,0.4,0.2105,0.1021,0.1211
9,0.7869,0.769,0.5,0.5385,0.5185,0.3819,0.3823


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8226,0.8652,0.5333,0.6667,0.5926,0.481,0.4859
1,0.7742,0.7702,0.2,0.6,0.3,0.2037,0.2476
2,0.7742,0.7292,0.2857,0.5,0.3636,0.2386,0.2524
3,0.7903,0.8185,0.4286,0.5455,0.48,0.351,0.3551
4,0.7377,0.769,0.2857,0.4,0.3333,0.1757,0.1795
5,0.7541,0.7416,0.1429,0.4,0.2105,0.1021,0.1211
6,0.8361,0.9103,0.4286,0.75,0.5455,0.4544,0.4809
7,0.8033,0.8298,0.5,0.5833,0.5385,0.4144,0.4164
8,0.7541,0.7492,0.1429,0.4,0.2105,0.1021,0.1211
9,0.7869,0.7675,0.5,0.5385,0.5185,0.3819,0.3823


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8226,0.8489,0.5333,0.6667,0.5926,0.481,0.4859
1,0.7419,0.6759,0.2667,0.4444,0.3333,0.1856,0.1949
2,0.8065,0.7091,0.2857,0.6667,0.4,0.306,0.3451
3,0.7581,0.7262,0.2857,0.4444,0.3478,0.2078,0.2155
4,0.7541,0.6725,0.4286,0.4615,0.4444,0.2868,0.2872
5,0.7869,0.614,0.2857,0.5714,0.381,0.2691,0.2927
6,0.7541,0.6884,0.3571,0.4545,0.4,0.2482,0.251
7,0.7705,0.7211,0.3571,0.5,0.4167,0.2787,0.2848
8,0.7541,0.693,0.2143,0.4286,0.2857,0.1567,0.1704
9,0.7705,0.715,0.3571,0.5,0.4167,0.2787,0.2848


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.8752,0.3333,0.7143,0.4545,0.3553,0.3935
1,0.7742,0.7071,0.0667,1.0,0.125,0.0977,0.2266
2,0.8226,0.7128,0.3571,0.7143,0.4762,0.3834,0.4168
3,0.7742,0.7277,0.2143,0.5,0.3,0.1903,0.2147
4,0.7705,0.709,0.2857,0.5,0.3636,0.2361,0.2499
5,0.8033,0.6299,0.1429,1.0,0.25,0.2043,0.3373
6,0.7869,0.7698,0.2143,0.6,0.3158,0.2218,0.2633
7,0.8033,0.7462,0.2857,0.6667,0.4,0.3042,0.3434
8,0.7541,0.6467,0.1429,0.4,0.2105,0.1021,0.1211
9,0.8197,0.7751,0.3571,0.7143,0.4762,0.3816,0.4151


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8387,0.8227,0.6667,0.6667,0.6667,0.5603,0.5603
1,0.7903,0.7532,0.4667,0.5833,0.5185,0.3866,0.3905
2,0.7742,0.6979,0.2857,0.5,0.3636,0.2386,0.2524
3,0.7903,0.814,0.5,0.5385,0.5185,0.3847,0.3852
4,0.7541,0.7447,0.4286,0.4615,0.4444,0.2868,0.2872
5,0.7705,0.7219,0.2143,0.5,0.3,0.1882,0.2125
6,0.8361,0.8875,0.6429,0.6429,0.6429,0.5365,0.5365
7,0.7869,0.8191,0.5,0.5385,0.5185,0.3819,0.3823
8,0.7377,0.6884,0.2143,0.375,0.2727,0.127,0.1344
9,0.8197,0.7903,0.6429,0.6,0.6207,0.5026,0.5031


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8387,0.8227,0.6667,0.6667,0.6667,0.5603,0.5603
1,0.7903,0.7532,0.4667,0.5833,0.5185,0.3866,0.3905
2,0.7742,0.6979,0.2857,0.5,0.3636,0.2386,0.2524
3,0.7903,0.814,0.5,0.5385,0.5185,0.3847,0.3852
4,0.7541,0.7447,0.4286,0.4615,0.4444,0.2868,0.2872
5,0.7705,0.7219,0.2143,0.5,0.3,0.1882,0.2125
6,0.8361,0.8875,0.6429,0.6429,0.6429,0.5365,0.5365
7,0.7869,0.8191,0.5,0.5385,0.5185,0.3819,0.3823
8,0.7377,0.6884,0.2143,0.375,0.2727,0.127,0.1344
9,0.8197,0.7903,0.6429,0.6,0.6207,0.5026,0.5031


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7581,0.0,0.6667,0.5,0.5714,0.4076,0.4158
1,0.6774,0.0,0.5333,0.381,0.4444,0.226,0.2323
2,0.7258,0.0,0.2857,0.3636,0.32,0.1514,0.1531
3,0.7742,0.0,0.2857,0.5,0.3636,0.2386,0.2524
4,0.7377,0.0,0.3571,0.4167,0.3846,0.2192,0.2203
5,0.6885,0.0,0.0714,0.1429,0.0952,-0.0682,-0.0742
6,0.7213,0.0,0.5,0.4118,0.4516,0.2671,0.2694
7,0.8197,0.0,0.3571,0.7143,0.4762,0.3816,0.4151
8,0.6885,0.0,0.2143,0.2727,0.24,0.0477,0.0482
9,0.7869,0.0,0.0714,1.0,0.1333,0.106,0.2365


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8226,0.0,0.5333,0.6667,0.5926,0.481,0.4859
1,0.7903,0.0,0.5333,0.5714,0.5517,0.4151,0.4155
2,0.7903,0.0,0.2857,0.5714,0.381,0.2712,0.2949
3,0.7742,0.0,0.4286,0.5,0.4615,0.3197,0.3213
4,0.7869,0.0,0.2143,0.6,0.3158,0.2218,0.2633
5,0.7869,0.0,0.1429,0.6667,0.2353,0.1679,0.2364
6,0.8361,0.0,0.2857,1.0,0.4444,0.3813,0.4854
7,0.7705,0.0,0.3571,0.5,0.4167,0.2787,0.2848
8,0.7541,0.0,0.0714,0.3333,0.1176,0.0399,0.0562
9,0.7869,0.0,0.5,0.5385,0.5185,0.3819,0.3823


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7903,0.8851,0.2667,0.6667,0.381,0.2816,0.3246
1,0.7581,0.7674,0.1333,0.5,0.2105,0.121,0.1582
2,0.8065,0.7738,0.2857,0.6667,0.4,0.306,0.3451
3,0.8226,0.7188,0.4286,0.6667,0.5217,0.4191,0.4345
4,0.7377,0.7082,0.2143,0.375,0.2727,0.127,0.1344
5,0.7869,0.6641,0.2143,0.6,0.3158,0.2218,0.2633
6,0.8689,0.8359,0.4286,1.0,0.6,0.5361,0.6052
7,0.7705,0.7416,0.2857,0.5,0.3636,0.2361,0.2499
8,0.7705,0.6839,0.2857,0.5,0.3636,0.2361,0.2499
9,0.8033,0.766,0.3571,0.625,0.4545,0.3453,0.3654


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7903,0.844,0.4667,0.5833,0.5185,0.3866,0.3905
1,0.8065,0.5858,0.4,0.6667,0.5,0.3892,0.4087
2,0.8226,0.715,0.4286,0.6667,0.5217,0.4191,0.4345
3,0.7419,0.6339,0.4286,0.4286,0.4286,0.2619,0.2619
4,0.7213,0.6413,0.5,0.4118,0.4516,0.2671,0.2694
5,0.7377,0.6444,0.3571,0.4167,0.3846,0.2192,0.2203
6,0.7213,0.6748,0.5,0.4118,0.4516,0.2671,0.2694
7,0.7213,0.6535,0.2857,0.3636,0.32,0.1479,0.1496
8,0.7377,0.6672,0.5,0.4375,0.4667,0.2938,0.2949
9,0.7705,0.6854,0.5714,0.5,0.5333,0.3821,0.3836


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.8199,0.3333,0.7143,0.4545,0.3553,0.3935
1,0.7903,0.756,0.2667,0.6667,0.381,0.2816,0.3246
2,0.8065,0.7463,0.2857,0.6667,0.4,0.306,0.3451
3,0.8226,0.7798,0.4286,0.6667,0.5217,0.4191,0.4345
4,0.7541,0.7325,0.4286,0.4615,0.4444,0.2868,0.2872
5,0.7377,0.6596,0.1429,0.3333,0.2,0.0722,0.0815
6,0.8525,0.8663,0.4286,0.8571,0.5714,0.494,0.5374
7,0.7541,0.7401,0.2857,0.4444,0.3478,0.205,0.2126
8,0.7869,0.7561,0.2143,0.6,0.3158,0.2218,0.2633
9,0.8361,0.8199,0.5,0.7,0.5833,0.4848,0.4954


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7903,0.8213,0.2,0.75,0.3158,0.2382,0.3116
1,0.7903,0.7674,0.2,0.75,0.3158,0.2382,0.3116
2,0.7903,0.7679,0.2143,0.6,0.3158,0.2235,0.2651
3,0.8065,0.7857,0.2857,0.6667,0.4,0.306,0.3451
4,0.7541,0.7599,0.3571,0.4545,0.4,0.2482,0.251
5,0.7869,0.6976,0.2143,0.6,0.3158,0.2218,0.2633
6,0.8361,0.9149,0.2857,1.0,0.4444,0.3813,0.4854
7,0.7705,0.7644,0.2143,0.5,0.3,0.1882,0.2125
8,0.8197,0.7234,0.2143,1.0,0.3529,0.2959,0.4167
9,0.8361,0.7979,0.5,0.7,0.5833,0.4848,0.4954


Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [6]:
"""
#update setup_param log setings
dataset_settings[0]['setup_param']['experiment_name'] = "SD0Q1_0"
dataset_settings[0]['setup_param']['system_log'] = folders['log_dir']+"Step4_SD"
# Do not save train test datasplit, to save resources
dataset_settings[0]['setup_param']['log_data'] = False
dataset_settings[0]['setup_param']['experiment_custom_tags'] = {'Dataset Type': 'Synthetic'}


pycaret_setup = setup(data=pd.read_csv(folders['sd_dir']+"SD0q1_0.csv"), 
                      **dataset_settings[0]['setup_param'])
"""

Unnamed: 0,Description,Value
0,Session id,6025
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 10)"
4,Transformed data shape,"(768, 10)"
5,Transformed train set shape,"(614, 10)"
6,Transformed test set shape,"(154, 10)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


In [7]:
# the dataset specific saved experiment
#config_filename = folders['setup_dir']+'D0.pkl'

# load the experiment but with the synthetic data, and test_data=original_data.csv
#load_experiment(config_filename, data=pd.read_csv(folders['sd_dir']+"SD0Q1_0.csv"))
# Update log experiment name
#set_config('exp_name_log', 'SD0Q1_0')

In [8]:
#log = get_logs(exp_name)
#log.columns

In [None]:
"""
best = compare_models(include=config['ml_models'])

# Dataset of performance with
tune_model(best, optimize='Accuracy', choose_better=True, n_iter=200)


model = load_model(folders['models_dir']+'D0')
"""

Following cell is example code on how to extract information from the mlflow logs

In [25]:
"""
exp_name =dataset_settings[0]['meta']['id']
#Get logs
get_logs(exp_name)[[
    'tags.mlflow.runName',
    'metrics.Accuracy',
    'metrics.F1',
    'metrics.Prec',
    'metrics.Recall',
    'tags.Source',
    'run_id', 
]].sort_values('metrics.Accuracy', ascending=False)
"""

Unnamed: 0,tags.mlflow.runName,metrics.Accuracy,metrics.F1,metrics.Prec,metrics.Recall,tags.Source,run_id
7,Random Forest Classifier,0.79,0.6711,0.7465,0.6167,compare_models,a5d9b18b84d04269b0d11d3df3dde699
6,Logistic Regression,0.7754,0.6428,0.7319,0.584,compare_models,8790ff71a4374e30a6297cd62365584c
1,Random Forest Classifier,0.7671,0.6981,0.6416,0.7749,tune_model,79492964faa146c8aa95bb2bfd4a6e6c
5,SVM - Radial Kernel,0.7655,0.6296,0.7207,0.5742,compare_models,656021909f5f48de9dcba758c3065bb0
4,Naive Bayes,0.7639,0.6462,0.689,0.6203,compare_models,a1671c530fe144cea6b708ae50acaac6
3,K Neighbors Classifier,0.7558,0.6142,0.6864,0.5701,compare_models,c407633800fd46229a4cf6118f1334c7
2,SVM - Linear Kernel,0.7279,0.5689,0.6376,0.5238,compare_models,9e9d07ca556b452eb05165f67e599b98
0,Session Initialized 0dfa,,,,,setup,0ddcd068a0a24037971bfa78547b4270
8,Session Initialized 09bf,,,,,setup,8865811c66f74657b50760513f858724
