# Step 4: Create models with SD

In [1]:
import pandas as pd 
import os 
import sys 
import pickle


from sklearn.metrics import (classification_report, 
                             roc_auc_score, 
                             matthews_corrcoef,
                             cohen_kappa_score)
sys.path.append('../src')
from utils import (getExperimentConfig, 
                   getPicklesFromDir, 
                   run_pycaret_setup, 
                   translate_model_name,
                   get_synthetic_filepaths_from_original_data_id)

from mlflow_manager import MLFlowManager

# Get global variables for the experiment
config = getExperimentConfig()
# Get folders
folders = config['folders']
# Load dataset specific settings (from the real-data)
dataset_settings = getPicklesFromDir(folders['settings_dir'])

In [2]:
settings = dataset_settings[0]

original_data = pd.read_csv(f"{folders['real_dir']}{settings['meta']['filename']}")

synthetic_datasets = get_synthetic_filepaths_from_original_data_id(settings['meta']['id'])

sd_filename  = synthetic_datasets[0]

sd_id = os.path.splitext(sd_filename)[0]
sd_path = folders['sd_dir']+sd_filename

s = run_pycaret_setup(sd_path, settings['setup_param'])

ml_model = 'lr'

display(f"Creating {ml_model}")
model = s.create_model(ml_model)
display(f"Tuning the model")
tuned_model = s.tune_model(model, **config['clf']['tuning_param']) 

# get validation results
val_df = s.pull()
val_score = {}
val_score['val_accuracy'] = val_df['Accuracy']['Mean']
val_score['val_f1-score'] = val_df['F1']['Mean']

display(f"Predict on the original data")
holdout_df = s.predict_model(estimator=tuned_model, data=original_data.drop(columns=['Outcome']))

# test the model on the holdout-data
#y_pred = tuned_model.predict(x_test_transformed)
#metrics =  classification_report(y_true=y_test, y_pred=y_pred, output_dict=True, digits=4)
display(holdout_df)

'Creating lr'

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7258,0.7677,0.6562,0.7778,0.7119,0.4539,0.4599
1,0.7419,0.8188,0.7188,0.7667,0.7419,0.4844,0.4854
2,0.7581,0.7417,0.8125,0.7429,0.7761,0.5141,0.5166
3,0.6613,0.7302,0.6875,0.6667,0.6769,0.3212,0.3213
4,0.6885,0.7575,0.7188,0.697,0.7077,0.3745,0.3747
5,0.6885,0.7834,0.75,0.6857,0.7164,0.3725,0.3743
6,0.6393,0.7931,0.75,0.6316,0.6857,0.2699,0.2754
7,0.6885,0.8473,0.6774,0.7,0.6885,0.3772,0.3774
8,0.6393,0.6903,0.5806,0.6667,0.6207,0.28,0.2825
9,0.5574,0.6376,0.4194,0.5909,0.4906,0.1188,0.1243


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

'Tuning the model'

Fitting 10 folds for each of 2 candidates, totalling 20 fits


'Predict on the original data'

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,prediction_label,prediction_score
0,6,148,72,35,0,33.599998,0.627,50,1,0.5937
1,1,85,66,29,0,26.600000,0.351,31,1,0.7490
2,8,183,64,0,0,23.299999,0.672,32,0,0.6384
3,1,89,66,23,94,28.100000,0.167,21,1,0.8755
4,0,137,40,35,168,43.099998,2.288,33,1,0.8989
...,...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.900002,0.171,63,1,0.8499
764,2,122,70,27,0,36.799999,0.340,27,1,0.7435
765,5,121,72,23,112,26.200001,0.245,30,1,0.8037
766,1,126,60,0,0,30.100000,0.349,47,1,0.5116


In [3]:
y_true = original_data[settings['meta']['target']]
x_test = original_data.drop(columns=[settings['meta']['target']])
y_pred = s.predict_model(estimator=tuned_model, data=x_test)
y_pred_score = y_pred['prediction_score']
y_pred = y_pred['prediction_label']

metrics =  classification_report(y_true=y_true, y_pred=y_pred, output_dict=True, digits=4)
display(metrics)

{'0': {'precision': 0.6028708133971292,
  'recall': 0.252,
  'f1-score': 0.3554301833568406,
  'support': 500},
 '1': {'precision': 0.33094812164579607,
  'recall': 0.6902985074626866,
  'f1-score': 0.44740024183796856,
  'support': 268},
 'accuracy': 0.4049479166666667,
 'macro avg': {'precision': 0.4669094675214627,
  'recall': 0.4711492537313433,
  'f1-score': 0.4014152125974046,
  'support': 768},
 'weighted avg': {'precision': 0.5079811240880703,
  'recall': 0.4049479166666667,
  'f1-score': 0.3875239016809842,
  'support': 768}}

In [4]:
thi = pd.DataFrame.from_dict(metrics).transpose()
display(thi)

from sklearn.metrics import roc_auc_score

auc = roc_auc_score(y_true=y_true, y_score=y_pred_score, multi_class='raise')
display(auc)

Unnamed: 0,precision,recall,f1-score,support
0,0.602871,0.252,0.35543,500.0
1,0.330948,0.690299,0.4474,268.0
accuracy,0.404948,0.404948,0.404948,0.404948
macro avg,0.466909,0.471149,0.401415,768.0
weighted avg,0.507981,0.404948,0.387524,768.0


0.5210671641791045

In [2]:
# TODO: consider testing, and consider extracting hyperparameters, consider how to summarize
for settings in dataset_settings:
    # update system_log name
    settings['setup_param']['system_log'] = folders['log_dir']+"Step4_SD"
    # disable saving train-test split data (to save space)
    settings['setup_param']['log_data'] = False
    
    # Get experiment logging
    experiment_name = f"{settings['meta']['id']}-{settings['meta']['name']}"
    mlflow = MLFlowManager(experiment_name)
    
    # no need to extract from mlflow the test_data, it is no longer relevant
    #test_data = mlflow.get_test_holdout_data()  
    original_data = pd.read_csv(f"{folders['real_dir']}{settings['meta']['filename']}")
    y_true = original_data[settings['meta']['target']]
    x_test = original_data.drop(columns=[settings['meta']['target']])

    logg_tags = {'Dataset Type': 'synthetic'}
    mlflow.start_run('Synthetic data models', tags=logg_tags)
    
    synthetic_datasets = get_synthetic_filepaths_from_original_data_id(settings['meta']['id'])

    for sd_filename in synthetic_datasets:
        
        sd_id = os.path.splitext(sd_filename)[0]
        sd_path = folders['sd_dir']+sd_filename
        
        s = run_pycaret_setup(sd_path, settings['setup_param'])
        USI = s.get_config('USI')
        
        # Add custom tags to the logg, defining dataset type, and Id
        logg_tags = {
            'Dataset Type': 'synthetic',
            'Dataset ID': sd_id,
            'USI': USI
        }
        run_name = f"{sd_id}-models"
        mlflow.start_run(run_name, tags=logg_tags, nested=True)
        
        
        for ml_model in config['clf']['ml_models']:
            #start log run
            model_name = f"{sd_id}-{translate_model_name(ml_model)}"
            mlflow.start_run(model_name, tags=logg_tags, nested=True)
            
            # create & tune model
            model = s.create_model(ml_model)
            tuned_model = s.tune_model(model, **config['clf']['tuning_param']) 
        
            # get validation results
            val_df = s.pull()
            val_score = {}
            val_score['val_Accuracy'] = val_df['Accuracy']['Mean']
            val_score['val_F1-score'] = val_df['F1']['Mean']


            pred_model = s.predict_model(estimator=tuned_model, data=x_test)
            y_pred = pred_model['prediction_label']

            metrics =  classification_report(y_true=y_true, y_pred=y_pred, output_dict=True, digits=4)
            test_score = pd.DataFrame.from_dict(metrics).transpose()
            
            test_metrics = {
                "Accuracy": metrics['accuracy'],
                "F1-score": metrics['macro avg']['f1-score'],
                "MCC": matthews_corrcoef(y_true=y_true, y_pred=y_pred),
                "Kappa": cohen_kappa_score(y1=y_true, y2=y_pred)
            }

            # If there is a prediction_score in the from predict_model
            if 'prediction_score' in pred_model.columns:
                y_pred_score = pred_model['prediction_score']
                # compute AUC
                if original_data[settings['meta']['target']].nunique() > 2:
                    m_class = 'ovo'
                else:
                    m_class = 'raise'
                test_metrics['AUC'] = roc_auc_score(y_true=y_true, y_score=y_pred_score, multi_class=m_class)


            # log parameters     
            mlflow.log_params(tuned_model.get_params())
            # log performance
            mlflow.log_metric_report(test_metrics)
            mlflow.log_metrics(val_score)
            mlflow.log_score_report_to_html(val_df, "Validation")
            mlflow.log_score_report_to_html(test_score, "Test_score")
            # log model
            mlflow.log_model(model=tuned_model)
            # end logging run for the model
            mlflow.end_run()
            
        # end logging run for SD_id
        mlflow.end_run()

    # end logging for the sd_meta synthetic dataset
    mlflow.end_run()          

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6935,0.7385,0.6562,0.7241,0.6885,0.3884,0.3902
1,0.7097,0.7292,0.7188,0.7188,0.7188,0.4188,0.4188
2,0.6129,0.699,0.5938,0.6333,0.6129,0.2266,0.2271
3,0.6129,0.7125,0.5938,0.6333,0.6129,0.2266,0.2271
4,0.7049,0.757,0.8387,0.6667,0.7429,0.4071,0.422
5,0.6393,0.7097,0.6452,0.6452,0.6452,0.2785,0.2785
6,0.7049,0.7441,0.6129,0.76,0.6786,0.4116,0.4197
7,0.6557,0.7371,0.6562,0.6774,0.6667,0.3109,0.3111
8,0.7049,0.7974,0.7188,0.7188,0.7188,0.4084,0.4084
9,0.6557,0.7381,0.6875,0.6667,0.6769,0.3087,0.3089


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6935,0.7385,0.6562,0.7241,0.6885,0.3884,0.3902
1,0.7097,0.7292,0.7188,0.7188,0.7188,0.4188,0.4188
2,0.6129,0.699,0.5938,0.6333,0.6129,0.2266,0.2271
3,0.6129,0.7125,0.5938,0.6333,0.6129,0.2266,0.2271
4,0.7049,0.757,0.8387,0.6667,0.7429,0.4071,0.422
5,0.6393,0.7097,0.6452,0.6452,0.6452,0.2785,0.2785
6,0.7049,0.7441,0.6129,0.76,0.6786,0.4116,0.4197
7,0.6557,0.7371,0.6562,0.6774,0.6667,0.3109,0.3111
8,0.7049,0.7974,0.7188,0.7188,0.7188,0.4084,0.4084
9,0.6557,0.7381,0.6875,0.6667,0.6769,0.3087,0.3089


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6935,0.7385,0.6562,0.7241,0.6885,0.3884,0.3902
1,0.7097,0.7292,0.7188,0.7188,0.7188,0.4188,0.4188
2,0.6129,0.7,0.5938,0.6333,0.6129,0.2266,0.2271
3,0.6129,0.7125,0.5938,0.6333,0.6129,0.2266,0.2271
4,0.7049,0.757,0.8387,0.6667,0.7429,0.4071,0.422
5,0.6393,0.7097,0.6452,0.6452,0.6452,0.2785,0.2785
6,0.7049,0.7441,0.6129,0.76,0.6786,0.4116,0.4197
7,0.6557,0.7371,0.6562,0.6774,0.6667,0.3109,0.3111
8,0.7049,0.7963,0.7188,0.7188,0.7188,0.4084,0.4084
9,0.6557,0.7381,0.6875,0.6667,0.6769,0.3087,0.3089


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Description,Value
0,Session id,3836
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 9)"
4,Transformed data shape,"(768, 9)"
5,Transformed train set shape,"(614, 9)"
6,Transformed test set shape,"(154, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Description,Value
0,Session id,3836
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 9)"
4,Transformed data shape,"(768, 9)"
5,Transformed train set shape,"(614, 9)"
6,Transformed test set shape,"(154, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5323,0.6719,0.6333,0.5135,0.5672,0.0703,0.0722
1,0.629,0.649,0.4667,0.6667,0.549,0.2503,0.2618
2,0.6774,0.8365,0.4333,0.8125,0.5652,0.3446,0.3878
3,0.6774,0.7156,0.5,0.75,0.6,0.3474,0.3675
4,0.6721,0.7194,0.6333,0.6786,0.6552,0.3434,0.3441
5,0.9016,0.9065,0.9333,0.875,0.9032,0.8034,0.8052
6,0.7213,0.8215,0.7,0.7241,0.7119,0.4422,0.4424
7,0.7541,0.8179,0.6207,0.8182,0.7059,0.5014,0.5155
8,0.6885,0.7177,0.5862,0.7083,0.6415,0.3705,0.3756
9,0.6721,0.7134,0.6207,0.6667,0.6429,0.3405,0.3413


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5323,0.6719,0.6333,0.5135,0.5672,0.0703,0.0722
1,0.629,0.65,0.4667,0.6667,0.549,0.2503,0.2618
2,0.6774,0.8396,0.4333,0.8125,0.5652,0.3446,0.3878
3,0.6774,0.7167,0.5,0.75,0.6,0.3474,0.3675
4,0.6721,0.7204,0.6333,0.6786,0.6552,0.3434,0.3441
5,0.9016,0.9065,0.9333,0.875,0.9032,0.8034,0.8052
6,0.7213,0.8204,0.7,0.7241,0.7119,0.4422,0.4424
7,0.7541,0.8179,0.6207,0.8182,0.7059,0.5014,0.5155
8,0.6885,0.7198,0.5862,0.7083,0.6415,0.3705,0.3756
9,0.6885,0.7123,0.6207,0.6923,0.6545,0.3725,0.3743


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Description,Value
0,Session id,5772
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 9)"
4,Transformed data shape,"(768, 9)"
5,Transformed train set shape,"(614, 9)"
6,Transformed test set shape,"(154, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6935,0.7048,0.5714,0.6957,0.6275,0.3714,0.3766
1,0.6613,0.7332,0.6071,0.6296,0.6182,0.314,0.3142
2,0.6613,0.6702,0.4643,0.6842,0.5532,0.2962,0.3107
3,0.7097,0.7658,0.5714,0.7273,0.64,0.4026,0.4108
4,0.7213,0.7723,0.6667,0.6923,0.6792,0.433,0.4333
5,0.623,0.6797,0.3929,0.6471,0.4889,0.2175,0.2346
6,0.7869,0.855,0.75,0.7778,0.7636,0.5697,0.57
7,0.7049,0.7911,0.6429,0.6923,0.6667,0.4026,0.4035
8,0.6885,0.7413,0.6429,0.6667,0.6545,0.3711,0.3713
9,0.6721,0.7143,0.6071,0.6538,0.6296,0.3362,0.337


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.629,0.7048,0.6071,0.5862,0.5965,0.2534,0.2535
1,0.6935,0.73,0.7143,0.6452,0.678,0.3871,0.3889
2,0.6613,0.6691,0.5357,0.6522,0.5882,0.3052,0.3095
3,0.7258,0.77,0.6429,0.72,0.6792,0.4411,0.4433
4,0.7213,0.7723,0.7778,0.6562,0.7119,0.4458,0.4518
5,0.6557,0.6818,0.4643,0.6842,0.5532,0.2895,0.304
6,0.8525,0.855,0.9286,0.7879,0.8525,0.7069,0.7165
7,0.6721,0.7922,0.7143,0.625,0.6667,0.3469,0.3499
8,0.7213,0.7446,0.75,0.6774,0.7119,0.4434,0.4455
9,0.6557,0.7186,0.7143,0.6061,0.6557,0.3161,0.3203


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Description,Value
0,Session id,320
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 9)"
4,Transformed data shape,"(768, 9)"
5,Transformed train set shape,"(614, 9)"
6,Transformed test set shape,"(154, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Description,Value
0,Session id,320
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 9)"
4,Transformed data shape,"(768, 9)"
5,Transformed train set shape,"(614, 9)"
6,Transformed test set shape,"(154, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8871,0.9582,0.8485,0.9333,0.8889,0.7747,0.7783
1,0.871,0.9342,0.9091,0.8571,0.8824,0.7398,0.7413
2,0.9032,0.977,0.9394,0.8857,0.9118,0.8048,0.8065
3,0.8387,0.926,0.8438,0.8438,0.8438,0.6771,0.6771
4,0.8525,0.9052,0.875,0.8485,0.8615,0.7037,0.7041
5,0.7377,0.8987,0.7188,0.7667,0.7419,0.4758,0.4769
6,0.8361,0.9116,0.8125,0.8667,0.8387,0.6724,0.6738
7,0.7869,0.8825,0.7812,0.8065,0.7937,0.5734,0.5737
8,0.8197,0.8675,0.8125,0.8387,0.8254,0.6391,0.6394
9,0.8689,0.9407,0.875,0.875,0.875,0.7371,0.7371


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Description,Value
0,Session id,3928
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 9)"
4,Transformed data shape,"(768, 9)"
5,Transformed train set shape,"(614, 9)"
6,Transformed test set shape,"(154, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Description,Value
0,Session id,3928
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 9)"
4,Transformed data shape,"(768, 9)"
5,Transformed train set shape,"(614, 9)"
6,Transformed test set shape,"(154, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7097,0.7771,0.6875,0.7333,0.7097,0.42,0.4208
1,0.6774,0.6844,0.7188,0.6765,0.697,0.3528,0.3536
2,0.8387,0.8312,0.8438,0.8438,0.8438,0.6771,0.6771
3,0.7581,0.8448,0.6875,0.8148,0.7458,0.5181,0.525
4,0.7049,0.814,0.6452,0.7407,0.6897,0.4109,0.4145
5,0.8361,0.9,0.871,0.8182,0.8438,0.6717,0.6731
6,0.6721,0.7946,0.6452,0.6897,0.6667,0.3448,0.3455
7,0.5902,0.6455,0.625,0.6061,0.6154,0.177,0.1771
8,0.7541,0.8502,0.8438,0.7297,0.7826,0.503,0.51
9,0.8033,0.8308,0.7812,0.8333,0.8065,0.6069,0.6082


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7097,0.776,0.6875,0.7333,0.7097,0.42,0.4208
1,0.6452,0.6802,0.6562,0.6562,0.6562,0.2896,0.2896
2,0.8387,0.8312,0.8438,0.8438,0.8438,0.6771,0.6771
3,0.7742,0.8448,0.6875,0.8462,0.7586,0.5507,0.5612
4,0.7213,0.814,0.6452,0.7692,0.7018,0.444,0.45
5,0.8361,0.8978,0.8387,0.8387,0.8387,0.672,0.672
6,0.6885,0.7957,0.6452,0.7143,0.678,0.3779,0.3797
7,0.5738,0.6466,0.5938,0.5938,0.5938,0.1455,0.1455
8,0.8033,0.8502,0.8438,0.7941,0.8182,0.6043,0.6056
9,0.8033,0.833,0.7812,0.8333,0.8065,0.6069,0.6082


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7097,0.776,0.6875,0.7333,0.7097,0.42,0.4208
1,0.6452,0.6802,0.6562,0.6562,0.6562,0.2896,0.2896
2,0.8387,0.8312,0.8438,0.8438,0.8438,0.6771,0.6771
3,0.7742,0.8448,0.6875,0.8462,0.7586,0.5507,0.5612
4,0.7213,0.814,0.6452,0.7692,0.7018,0.444,0.45
5,0.8361,0.8978,0.8387,0.8387,0.8387,0.672,0.672
6,0.6885,0.7957,0.6452,0.7143,0.678,0.3779,0.3797
7,0.5738,0.6466,0.5938,0.5938,0.5938,0.1455,0.1455
8,0.8033,0.8502,0.8438,0.7941,0.8182,0.6043,0.6056
9,0.8033,0.833,0.7812,0.8333,0.8065,0.6069,0.6082


Fitting 10 folds for each of 2 candidates, totalling 20 fits


Unnamed: 0,Description,Value
0,Session id,5133
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 9)"
4,Transformed data shape,"(768, 9)"
5,Transformed train set shape,"(614, 9)"
6,Transformed test set shape,"(154, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Description,Value
0,Session id,5133
1,Target,Outcome
2,Target type,Binary
3,Original data shape,"(768, 9)"
4,Transformed data shape,"(768, 9)"
5,Transformed train set shape,"(614, 9)"
6,Transformed test set shape,"(154, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8871,0.9458,0.875,0.9032,0.8889,0.7742,0.7746
1,0.8871,0.9531,0.8438,0.931,0.8852,0.7747,0.7783
2,0.8871,0.9365,0.9062,0.8788,0.8923,0.7737,0.7741
3,0.8065,0.9115,0.8438,0.7941,0.8182,0.6117,0.613
4,0.9016,0.9753,0.9677,0.8571,0.9091,0.8028,0.8098
5,0.8689,0.943,0.9032,0.8485,0.875,0.7374,0.7389
6,0.9344,0.9752,0.9375,0.9375,0.9375,0.8685,0.8685
7,0.8852,0.958,0.9688,0.8378,0.8986,0.7681,0.7788
8,0.9016,0.9677,0.875,0.9333,0.9032,0.8034,0.8052
9,0.8361,0.9537,0.7188,0.9583,0.8214,0.6755,0.6995


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Fitting 10 folds for each of 2 candidates, totalling 20 fits


In [None]:
df=s.predict_model(estimator=tuned_model, data=x_test)
display(df)
if 'prediction_score' in df.columns:
    print("yes")
if 'prediction_label' in df.columns:
    print("yesyes")


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,prediction_label
0,6,148,72,35,0,33.599998,0.627,50,1
1,1,85,66,29,0,26.600000,0.351,31,1
2,8,183,64,0,0,23.299999,0.672,32,0
3,1,89,66,23,94,28.100000,0.167,21,1
4,0,137,40,35,168,43.099998,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.900002,0.171,63,1
764,2,122,70,27,0,36.799999,0.340,27,1
765,5,121,72,23,112,26.200001,0.245,30,1
766,1,126,60,0,0,30.100000,0.349,47,1


yesyes
