In [1]:
# How long the whole notebook took to run
import time

start_time = time.perf_counter()

# Step 2: Preprocessing & Classification model
This section will load up the defined settings from the pickles directory and run the machine learning pipeline with the help of the `pycaret` library and save respective data.

In [2]:
# importing all packages needed in this section
import pandas as pd
import os
import sys 

from sklearn.metrics import classification_report
from pycaret.classification import *

# utility functions for the experiment
sys.path.append('../src')

from mlflow_manager import MLFlowManager
from tuning_grids import Grids
from utils import getPicklesFromDir, getExperimentConfig, run_pycaret_setup, translate_model_name

# Get global experiment settings
config = getExperimentConfig()
folders = config['folders']
# get a list of all settings for the datasets prepared beforehand
dataset_settings = getPicklesFromDir(folders['settings_dir'])  

dataset_settings pickle is saved as follows:
```
"meta_data": meta_dataset,  # contains information about the dataset, including path
"setup_param": setup_param, # contains all the setup parameters for pycaret setup() function
"sdg_param": sdg_param,     # contains all sdg parameters for the CTGAN() function

```

In [3]:
run_dataset = config['run_dataset']

for settings in dataset_settings:
        
    if run_dataset is not None and settings['meta']['id'] not in run_dataset:
        # Checks if run_dataset contains dataset_id's
        # if it does, run the experiment only on specified datasets
        continue
        
    # get path
    dataset_path = f"{folders['real_dir']}{settings['meta']['filename']}"
    # run setup function
    s = run_pycaret_setup(dataset_path, settings['setup_param'])
    
    USI = s.get_config('USI')

    
    # Init experiment logging
    experiment_name = f"{settings['meta']['id']}-{settings['meta']['name']}"
    mlflow = MLFlowManager(experiment_name)
    
    logg_tags = {
        'USI': USI,
        'Dataset ID': settings['meta']['id'],
        'Dataset Type': 'original'
    }
    
    mlflow.start_run("Original data models", tags=logg_tags)
    
    # for each defined model in the global config
    # create specified model and tune it
    for ml_model in config['clf']['ml_models']:
        
        model_name = f"{settings['meta']['id']}-{translate_model_name(ml_model)}"
        logg_tags['model']=ml_model
        
        mlflow.start_run(model_name, tags=logg_tags, nested=True)

        # create & tune model
        model = s.create_model(ml_model)
        
        tune_grid = Grids.get_tuning_grid(ml_model)
        # Is buggy, use default tuning by pycaret
        #tuned_model = s.tune_model(model, **config['clf']['tuning_param'], custom_grid=tune_grid)
        tuned_model = s.tune_model(model, **config['clf']['tuning_param'])
        
        # get validation results
        val_df = s.pull()
        val_score = {}
        val_score['Accuracy'] = val_df['Accuracy']['Mean']
        val_score['F1-score'] = val_df['F1']['Mean']
        val_score['AUC']      = val_df['AUC']['Mean']
        val_score['Kappa']    = val_df['Kappa']['Mean']
        val_score['MCC']      = val_df['MCC']['Mean']
        
        # test the model on the holdout-data
        holdout_score = s.predict_model(estimator=tuned_model)
        #metrics =  classification_report(y_true=y_test, y_pred=y_pred, output_dict=True, digits=4)
        #metrics_df = pd.DataFrame(metrics).transpose()
        
        # log parameters     
        mlflow.log_params(tuned_model.get_params())
        # log performance
        mlflow.log_metrics(val_score)
        mlflow.log_score_report_to_html(val_df, "Validation")
        mlflow.log_score_report_to_html(holdout_score, "Holdout")
        # log model
        mlflow.log_model(model=tuned_model)
        
        mlflow.end_run()
        
    # Save model details on the model with best accurracy under the the 'Original data models' run
    best_run = mlflow.get_best_run_by_metric(metric_name='Accuracy')
    
    mlflow.log_params(best_run.data.params)
    mlflow.log_metrics(best_run.data.metrics)
    mlflow.log_tag('model run name', best_run.data.tags['mlflow.runName'])
    mlflow.log_tag('model', best_run.data.tags['model'])
    mlflow.log_tag('model run id', best_run.info.run_id)
    
    mlflow.end_run()          

Unnamed: 0,Description,Value
0,Session id,7079
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 9)"
4,Transformed data shape,"(768, 9)"
5,Transformed train set shape,"(614, 9)"
6,Transformed test set shape,"(154, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7903,0.933,0.5,0.8462,0.6286,0.4956,0.5289
1,0.7903,0.833,0.6364,0.7368,0.6829,0.5275,0.5307
2,0.6774,0.6705,0.2727,0.6,0.375,0.1969,0.2247
3,0.7903,0.8466,0.7273,0.6957,0.7111,0.5467,0.547
4,0.7377,0.7952,0.4762,0.6667,0.5556,0.3768,0.3875
5,0.8033,0.8524,0.5714,0.8,0.6667,0.5326,0.5477
6,0.8033,0.8679,0.6667,0.7368,0.7,0.5542,0.5557
7,0.7049,0.7857,0.4762,0.5882,0.5263,0.3155,0.3192
8,0.7541,0.8679,0.619,0.65,0.6341,0.4491,0.4494
9,0.8033,0.8167,0.6667,0.7368,0.7,0.5542,0.5557


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7903,0.933,0.5,0.8462,0.6286,0.4956,0.5289
1,0.7903,0.833,0.6364,0.7368,0.6829,0.5275,0.5307
2,0.6774,0.6693,0.2727,0.6,0.375,0.1969,0.2247
3,0.7903,0.8466,0.7273,0.6957,0.7111,0.5467,0.547
4,0.7377,0.7988,0.4762,0.6667,0.5556,0.3768,0.3875
5,0.8033,0.8512,0.5714,0.8,0.6667,0.5326,0.5477
6,0.8033,0.8667,0.6667,0.7368,0.7,0.5542,0.5557
7,0.7049,0.7845,0.4762,0.5882,0.5263,0.3155,0.3192
8,0.7541,0.8679,0.619,0.65,0.6341,0.4491,0.4494
9,0.8033,0.8155,0.6667,0.7368,0.7,0.5542,0.5557


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.7857,0.8289,0.5741,0.7561,0.6526,0.5019,0.5118


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7581,0.8744,0.5455,0.7059,0.6154,0.4431,0.451
1,0.7903,0.842,0.6818,0.7143,0.6977,0.5373,0.5377
2,0.7097,0.7193,0.4545,0.625,0.5263,0.3245,0.333
3,0.6935,0.7341,0.5909,0.5652,0.5778,0.3375,0.3377
4,0.7213,0.7661,0.4762,0.625,0.5405,0.3457,0.3523
5,0.7377,0.7857,0.4762,0.6667,0.5556,0.3768,0.3875
6,0.7541,0.7798,0.6667,0.6364,0.6512,0.4614,0.4617
7,0.6393,0.6804,0.2857,0.4615,0.3529,0.1217,0.1285
8,0.7049,0.744,0.7143,0.5556,0.625,0.388,0.3963
9,0.7705,0.7762,0.6667,0.6667,0.6667,0.4917,0.4917


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7742,0.8898,0.5,0.7857,0.6111,0.4629,0.4863
1,0.8065,0.8545,0.6364,0.7778,0.7,0.5592,0.5654
2,0.7419,0.7386,0.4545,0.7143,0.5556,0.3861,0.4057
3,0.7742,0.8188,0.5909,0.7222,0.65,0.4858,0.4911
4,0.6885,0.7488,0.3333,0.5833,0.4242,0.2319,0.249
5,0.7377,0.8149,0.4286,0.6923,0.5294,0.3613,0.3812
6,0.7049,0.803,0.4762,0.5882,0.5263,0.3155,0.3192
7,0.7869,0.7982,0.5238,0.7857,0.6286,0.4874,0.5071
8,0.8197,0.8554,0.6667,0.7778,0.7179,0.5866,0.5903
9,0.7377,0.7756,0.5714,0.6316,0.6,0.4056,0.4067


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,K Neighbors Classifier,0.7727,0.8116,0.537,0.7436,0.6237,0.4669,0.4796


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8226,0.8977,0.6364,0.8235,0.7179,0.5916,0.6021
1,0.8065,0.833,0.7273,0.7273,0.7273,0.5773,0.5773
2,0.6774,0.6943,0.4091,0.5625,0.4737,0.2494,0.256
3,0.7581,0.8023,0.6364,0.6667,0.6512,0.4661,0.4664
4,0.7377,0.7786,0.5714,0.6316,0.6,0.4056,0.4067
5,0.7377,0.8381,0.4762,0.6667,0.5556,0.3768,0.3875
6,0.7541,0.8524,0.619,0.65,0.6341,0.4491,0.4494
7,0.6393,0.7345,0.4762,0.4762,0.4762,0.2012,0.2012
8,0.7377,0.7917,0.6667,0.6087,0.6364,0.4319,0.433
9,0.8197,0.8595,0.619,0.8125,0.7027,0.5767,0.5876


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8226,0.8977,0.6364,0.8235,0.7179,0.5916,0.6021
1,0.8065,0.833,0.7273,0.7273,0.7273,0.5773,0.5773
2,0.6935,0.6943,0.4091,0.6,0.4865,0.2791,0.2895
3,0.7581,0.8023,0.6364,0.6667,0.6512,0.4661,0.4664
4,0.7377,0.7786,0.5714,0.6316,0.6,0.4056,0.4067
5,0.7377,0.8381,0.4762,0.6667,0.5556,0.3768,0.3875
6,0.7541,0.8524,0.619,0.65,0.6341,0.4491,0.4494
7,0.6393,0.7345,0.4762,0.4762,0.4762,0.2012,0.2012
8,0.7377,0.7917,0.6667,0.6087,0.6364,0.4319,0.433
9,0.8197,0.8595,0.619,0.8125,0.7027,0.5767,0.5876


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Naive Bayes,0.7987,0.8378,0.6481,0.7447,0.6931,0.5444,0.5473


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7258,0.0,0.5909,0.619,0.6047,0.3949,0.3952
1,0.7258,0.0,0.6364,0.6087,0.6222,0.4072,0.4074
2,0.6613,0.0,0.5455,0.5217,0.5333,0.2677,0.2679
3,0.7419,0.0,0.5,0.6875,0.5789,0.3995,0.41
4,0.6721,0.0,0.7619,0.5161,0.6154,0.3476,0.3677
5,0.5902,0.0,0.5238,0.4231,0.4681,0.1408,0.143
6,0.8033,0.0,0.8095,0.68,0.7391,0.5831,0.5889
7,0.6721,0.0,0.5714,0.5217,0.5455,0.2899,0.2906
8,0.6721,0.0,0.3333,0.5385,0.4118,0.2016,0.2127
9,0.6721,0.0,0.5238,0.5238,0.5238,0.2738,0.2738


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.0,0.5,0.9167,0.6471,0.5291,0.5753
1,0.8065,0.0,0.6818,0.75,0.7143,0.5684,0.5699
2,0.6935,0.0,0.3182,0.6364,0.4242,0.2458,0.2733
3,0.7903,0.0,0.7273,0.6957,0.7111,0.5467,0.547
4,0.7377,0.0,0.4762,0.6667,0.5556,0.3768,0.3875
5,0.7705,0.0,0.4762,0.7692,0.5882,0.4411,0.4655
6,0.7705,0.0,0.619,0.6842,0.65,0.4799,0.4812
7,0.6885,0.0,0.4762,0.5556,0.5128,0.2859,0.2877
8,0.7705,0.0,0.619,0.6842,0.65,0.4799,0.4812
9,0.8033,0.0,0.6667,0.7368,0.7,0.5542,0.5557


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,SVM - Linear Kernel,0.7792,0.732,0.5741,0.7381,0.6458,0.4891,0.4972


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.9023,0.5455,0.8571,0.6667,0.5396,0.567
1,0.7903,0.8398,0.6364,0.7368,0.6829,0.5275,0.5307
2,0.6935,0.708,0.3636,0.6154,0.4571,0.2628,0.2805
3,0.7097,0.8534,0.6364,0.5833,0.6087,0.3786,0.3795
4,0.7377,0.8286,0.5238,0.6471,0.5789,0.3915,0.3961
5,0.7377,0.8643,0.4762,0.6667,0.5556,0.3768,0.3875
6,0.7705,0.8202,0.7143,0.6522,0.6818,0.5029,0.5042
7,0.7049,0.781,0.4286,0.6,0.5,0.2989,0.3074
8,0.8197,0.8702,0.8095,0.7083,0.7556,0.6137,0.6171
9,0.7377,0.8083,0.5714,0.6316,0.6,0.4056,0.4067


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7419,0.8295,0.4545,0.7143,0.5556,0.3861,0.4057
1,0.7581,0.7869,0.5909,0.6842,0.6341,0.4549,0.4576
2,0.6935,0.7227,0.4091,0.6,0.4865,0.2791,0.2895
3,0.7419,0.7693,0.6818,0.625,0.6522,0.4477,0.4487
4,0.7541,0.8071,0.619,0.65,0.6341,0.4491,0.4494
5,0.7869,0.7738,0.5714,0.75,0.6486,0.4997,0.5092
6,0.7213,0.7512,0.6667,0.5833,0.6222,0.403,0.4052
7,0.7049,0.7405,0.4286,0.6,0.5,0.2989,0.3074
8,0.7213,0.7964,0.6667,0.5833,0.6222,0.403,0.4052
9,0.6721,0.7024,0.5714,0.5217,0.5455,0.2899,0.2906


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,SVM - Radial Kernel,0.7468,0.8365,0.5926,0.6531,0.6214,0.4318,0.4329


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7903,0.8773,0.5455,0.8,0.6486,0.5067,0.5256
1,0.8226,0.8352,0.6818,0.7895,0.7317,0.6002,0.6038
2,0.7419,0.7409,0.4545,0.7143,0.5556,0.3861,0.4057
3,0.7903,0.8239,0.7273,0.6957,0.7111,0.5467,0.547
4,0.7377,0.794,0.5714,0.6316,0.6,0.4056,0.4067
5,0.7213,0.775,0.5238,0.6111,0.5641,0.3611,0.3634
6,0.7705,0.7762,0.8095,0.6296,0.7083,0.524,0.5352
7,0.7541,0.8202,0.5238,0.6875,0.5946,0.4227,0.4308
8,0.7869,0.8619,0.8095,0.6538,0.7234,0.5532,0.5616
9,0.7049,0.7679,0.5714,0.5714,0.5714,0.3464,0.3464


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.9307,0.5909,0.8125,0.6842,0.5496,0.5641
1,0.8065,0.8409,0.6818,0.75,0.7143,0.5684,0.5699
2,0.6613,0.6659,0.3636,0.5333,0.4324,0.2032,0.2108
3,0.7581,0.8364,0.7273,0.64,0.6809,0.4873,0.4899
4,0.7213,0.8012,0.4762,0.625,0.5405,0.3457,0.3523
5,0.8197,0.8619,0.619,0.8125,0.7027,0.5767,0.5876
6,0.8197,0.8845,0.7619,0.7273,0.7442,0.6051,0.6055
7,0.6885,0.7857,0.4762,0.5556,0.5128,0.2859,0.2877
8,0.7869,0.8643,0.7619,0.6667,0.7111,0.5435,0.5465
9,0.7377,0.8179,0.7143,0.6,0.6522,0.4442,0.4485


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,MLP Classifier,0.7468,0.817,0.6111,0.6471,0.6286,0.4367,0.4371


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.8983,0.5909,0.8125,0.6842,0.5496,0.5641
1,0.7742,0.8375,0.5909,0.7222,0.65,0.4858,0.4911
2,0.7419,0.733,0.4091,0.75,0.5294,0.3722,0.4046
3,0.8226,0.842,0.7273,0.7619,0.7442,0.6085,0.6089
4,0.7377,0.7679,0.5714,0.6316,0.6,0.4056,0.4067
5,0.7869,0.8488,0.619,0.7222,0.6667,0.5114,0.5147
6,0.7541,0.8292,0.6667,0.6364,0.6512,0.4614,0.4617
7,0.7213,0.7792,0.4762,0.625,0.5405,0.3457,0.3523
8,0.7049,0.8196,0.5714,0.5714,0.5714,0.3464,0.3464
9,0.6885,0.769,0.619,0.5417,0.5778,0.3328,0.3346


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8065,0.9182,0.5909,0.8125,0.6842,0.5496,0.5641
1,0.8065,0.8682,0.6818,0.75,0.7143,0.5684,0.5699
2,0.7581,0.7534,0.4545,0.7692,0.5714,0.418,0.4461
3,0.8065,0.8443,0.7727,0.7083,0.7391,0.5857,0.5871
4,0.8033,0.8143,0.7143,0.7143,0.7143,0.5643,0.5643
5,0.8033,0.8583,0.6667,0.7368,0.7,0.5542,0.5557
6,0.7705,0.856,0.619,0.6842,0.65,0.4799,0.4812
7,0.7213,0.7548,0.4762,0.625,0.5405,0.3457,0.3523
8,0.7541,0.8476,0.5238,0.6875,0.5946,0.4227,0.4308
9,0.7049,0.7607,0.619,0.5652,0.5909,0.3609,0.3618


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.7662,0.8456,0.6667,0.6667,0.6667,0.4867,0.4867


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8387,0.9239,0.6818,0.8333,0.75,0.6327,0.6396
1,0.7903,0.8455,0.6364,0.7368,0.6829,0.5275,0.5307
2,0.6935,0.7136,0.4545,0.5882,0.5128,0.2946,0.2998
3,0.7742,0.8284,0.7273,0.6667,0.6957,0.5167,0.5179
4,0.6393,0.7643,0.3333,0.4667,0.3889,0.143,0.1471
5,0.8361,0.8857,0.6667,0.8235,0.7368,0.6197,0.627
6,0.7541,0.8226,0.6667,0.6364,0.6512,0.4614,0.4617
7,0.7213,0.8036,0.4762,0.625,0.5405,0.3457,0.3523
8,0.7705,0.8274,0.5714,0.7059,0.6316,0.4676,0.4731
9,0.6885,0.7929,0.7143,0.5357,0.6122,0.3607,0.3712


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7581,0.8875,0.5,0.7333,0.5946,0.4308,0.4469
1,0.7903,0.8443,0.6364,0.7368,0.6829,0.5275,0.5307
2,0.7581,0.7352,0.4091,0.8182,0.5455,0.4046,0.4497
3,0.8226,0.8591,0.7273,0.7619,0.7442,0.6085,0.6089
4,0.6885,0.7583,0.4286,0.5625,0.4865,0.2688,0.2739
5,0.7705,0.8774,0.5238,0.7333,0.6111,0.4547,0.4676
6,0.7541,0.8571,0.619,0.65,0.6341,0.4491,0.4494
7,0.7377,0.7786,0.4762,0.6667,0.5556,0.3768,0.3875
8,0.8033,0.8762,0.619,0.7647,0.6842,0.5436,0.55
9,0.7541,0.7917,0.5714,0.6667,0.6154,0.4362,0.439


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Gradient Boosting Classifier,0.7403,0.8374,0.537,0.6591,0.5918,0.4043,0.4088


In [4]:
#import numpy as np
tune_grid['C'] = np.logspace(-3,2,20).tolist()
tuned_model = s.tune_model(model, **config['clf']['tuning_param'], custom_grid=tune_grid)



NameError: name 'np' is not defined

In [None]:
end_time = time.perf_counter()

print(f"Time to run the whole notebook: {int(round(end_time-start_time, 0))} seconds")
print(f"Time to run the whole notebook: {round((end_time-start_time)/60, 1)} minutes")

In [None]:

best_run.data.tags['mlflow.runName']
mlflow.get_best_run_by_metric()

In [None]:
mlflow.log_tag('Best model run name', mlflow.get_run_name(best_run))
mlflow.log_tag('model', mlflow.get_model_tag(best_run))
mlflow.log_tag('Best model run id', best_run.info.run_id)

---

### Notice 
Following cells until end of section (i.e. section 3.0) contains experimental code that will not be run.

In [None]:
s.get_leaderboard()

In [None]:
### Following shows which are models are natively available in the pycaret library
# It is possible to add estimators
all_models = models()
display(all_models)

In [None]:
# Need to define param that should be explored, define which method e.g. grid_search vs random vs optuna
# default search method: random grid search
# Todo: lookup default search range parameters

# uses the best model to optimze
#tuned = tune_model(clf, optimize='Accuracy', n_iter=10)