# Ensemble learning
Notebook to explore improvement in performance of building custom-made ensemble learners. The workflow is the following:
1. Build-up of the individual learners and performance evaluation
- LighGBM
- XGBoost
- Random forest
- (Maybe) lasso regression
In this part, we will also include the resampling of data performing upsampling + downsampling

2. Study on how to ensemble them together for performance optimization
- Hard voting (including predictive threshold performance optimization for all of them)
- Soft voting (with a posterior predictive threshold optimization)
- Stacking



In [50]:
import os
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import GridSearchCV, train_test_split
from xgboost import XGBClassifier, plot_importance
from tqdm import tqdm
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score
from sklearn.ensemble import VotingClassifier
import lightgbm as lgbm

from auxFuns.EDA import *
from auxFuns.modelling import *


In [49]:
import auxFuns.modelling
importlib.reload(auxFuns.modelling)

<module 'auxFuns.modelling' from 'c:\\Users\\angel\\Documents\\VSCode\\rsv_modelling_transfer_learning\\auxFuns\\modelling.py'>

In [4]:
raw_datasets_path = os.getcwd() + '/datasets/raw'
processed_datasets_path = os.getcwd() + '/datasets/processed'

rsv_predictors_df_v2 = pd.read_csv(processed_datasets_path + '/rsv_predictors_phase1_daysDedup_seasons_prevTest.csv',low_memory=False)
rsv_predictors_df_v2 = make_it_categorical_v2(rsv_predictors_df_v2)

rsv_predictors_df_v2.shape

# summary_function_rsv(rsv_predictors_df_v2)

# Extract a reduced sample of the data for modelling
# sample_size = 80000
# sample_v2_df = rsv_predictors_df_v2.sample(n = sample_size, random_state=42)

sample_v2_df = rsv_predictors_df_v2

In [5]:
selected_features = ['sex', 'marital_status', 'race','patient_regional_location', 'age_group',
                     'Acute_upper_respiratory_infection','Influenza','Pneumonia','Bronchitis','Symptoms_and_signs__digestive_system_and_abdomen','General_symptoms_and_signs','any_symptom',
                     'COPD','AIDS','Asthma_chronic','CCI',
                     'sine','cosine','calendar_year', 
                     'healthcare_seeking', 'influenza_vaccine',
                     'n_symptoms','prev_positive_rsv','previous_test_daydiff','n_immunodeficiencies', 
                     'tumor_indicator','tumor_last_year',
                     'season',
                     'n_tests_that_day']
# selected_features = ['sex', 'marital_status', 'race', 'patient_regional_location', 'age_group',
#                      'Acute_upper_respiratory_infection','Influenza','Pneumonia','Bronchitis','Symptoms_and_signs__digestive_system_and_abdomen','General_symptoms_and_signs','any_symptom',
#                      'COPD','AIDS','Asthma_chronic','CCI',
#                      'sine','cosine','calendar_year', 
#                      'healthcare_seeking', 'influenza_vaccine',
#                      'n_symptoms','prev_positive_rsv','previous_test_daydiff','n_immunodeficiencies', 
#                      'tumor_indicator','tumor_last_year']
selected_features.append('RSV_test_result')

In [6]:
df1 = sample_v2_df[selected_features]

input_test_size = 0.2
random_seed = 42

X_train, y_train, X_test, y_test, sample_weights, preprocessor_rsv = preprocess_and_resample_rsv(
    df1, input_test_size = input_test_size, random_seed = random_seed, resampling_technique = "downsample_upweight", ratio_maj_min = 0.7)

Resampling method chosen:

Downsampling and Upweighting


# 0. Study resampling techniques (WIP)

# 1. Build-up of the models

### 1.1. XGboost
- Train it using the previous approach (GridSearchCV)
- Train it using Bayesian parameter optimization

##### Approach 1: GridSearch CV

In [None]:
# Approach 1: GridSearch CV
random_seed = 42
cost_sensitive = True

if cost_sensitive:
    weight_dict = {"Negative": 1,
                   "Positive": 50}
    scale_pos_weight = weight_dict["Positive"]/weight_dict["Negative"]  # Use scale_pos_weight parameter
    model_class = XGBClassifier(scale_pos_weight=scale_pos_weight,
                                random_state=random_seed)
else:
    model_class = XGBClassifier(random_state=random_seed)

param_grid = {
    'n_estimators': range(20,205,15),
    'max_depth': range(5,30,1),
    'learning_rate': np.arange(0.01, 0.51, 0.05),
    'min_child_weight': np.arange(1, 11, 1), 
    'gamma': np.arange(0.1, 0.5, 0.1) 
}

target_scorer = make_scorer(f1_score, average='macro')
n_cv_folds = 5

# XGBoost needs labels in numeric format
y_train_numeric = [1 if label == "Positive" else 0 for label in y_train]

model1_xgb = train_model_rsv(model = model_class, param_grid = param_grid, target_scorer = target_scorer, n_cv_folds = n_cv_folds,
                    X_train = X_train, y_train = y_train_numeric)

optimal_threshold = find_optimal_moving_threshold(model = model1_xgb, X_test = X_test, y_test = y_test)
__,__,__,__,__,__,f1, __ = calculate_performance_metrics_rsv(trained_model = model1_xgb, X_test = X_test, y_test = y_test,
                                                         threshold = optimal_threshold, 
                                                         print_roc = False)

##### Approach 2: Bayesian hyperparameter optimization

In [16]:
class XGBoostClassifier_custom:

    def __init__(self, scoring, max_evals, cost_sensitive_yn, sample_weights):
        self.scoring = scoring
        self.max_evals = max_evals
        self.cost_sensitive = cost_sensitive_yn
        self.sample_weights = sample_weights
        self.best = None
        self.model = None
        self.score_f1 = None
        self.score_auc = None
        self.n_estimators_choice = list(range(20, 205, 25))

        

    def objective(self, space):
        classifier = XGBClassifier(n_estimators = int(space['n_estimators']),
                                    max_depth = int(space['max_depth']),
                                    learning_rate = space['learning_rate'],
                                    gamma = space['gamma'],
                                    min_child_weight = space['min_child_weight'],
                                    subsample = space['subsample'],
                                    colsample_bytree = space['colsample_bytree'],
                                    )
        # classifier.fit(self.X_train, self.y_train, early_stopping_rounds = space['early_stopping_rounds'], eval_metric = 'logloss',  eval_set=eval_set, verbose=True)
        if self.cost_sensitive:
            classifier.fit(self.X_train, self.y_train, sample_weight = self.sample_weights)
        else: 
            classifier.fit(self.X_train, self.y_train)

        Scores = cross_val_score(estimator = classifier, X = self.X_train, y = self.y_train, cv = 10, scoring=self.scoring)
        score = Scores.mean()
        loss = 1-score
        return {'loss': loss, 'status': STATUS_OK}

    def train(self, X_train, y_train):
        print('--------------------------------------------------------------------')
        print(f'Training XGBoost classifier with objective metric: {self.scoring}')
        self.X_train = X_train
        self.y_train = y_train

        self.space = {
        'max_depth' : hp.quniform('max_depth',  5, 21, 5),
        'learning_rate' : hp.uniform('learning_rate', 0.010, 0.200),
        'n_estimators' : hp.choice('n_estimators', self.n_estimators_choice),
        'gamma' : hp.quniform('gamma', 0, 0.50, 0.1),
        'min_child_weight' : hp.quniform('min_child_weight', 1, 10, 2),
        'subsample' : hp.quniform('subsample', 0.6, 1, 0.1),
        'colsample_bytree' : hp.quniform('colsample_bytree', 0.6, 1.0, 0.1),
        'early_stopping_rounds': 100
        }

        trials = Trials()
        print("Tuning Hyperparameters ...")
        self.best = fmin(fn=self.objective,
                    space=self.space,
                    algo=tpe.suggest,
                    max_evals=self.max_evals,
                    trials=trials)
        print("Best Hyperparameters: ", self.best)
        self.fit_model()

    def fit_model(self):
        self.model = XGBClassifier(n_estimators = self.n_estimators_choice[self.best['n_estimators']],
                                max_depth = int(self.best['max_depth']),
                                learning_rate = self.best['learning_rate'],
                                gamma = self.best['gamma'],
                                min_child_weight = self.best['min_child_weight'],
                                subsample = self.best['subsample'],
                                colsample_bytree = self.best['colsample_bytree'], 
                                verbose = True
                                )
        # self.model.fit(self.X_train, self.y_train, early_stopping_rounds = self.space['early_stopping_rounds'], eval_metric = 'logloss')
        if self.cost_sensitive:
            self.model.fit(self.X_train, self.y_train, sample_weight = self.sample_weights)
        else: 
            self.model.fit(self.X_train, self.y_train)

        print('XGBoostClassifier Performance:')

        # Scores_f1 = cross_val_score(estimator = self.model, X = self.X_train, y = self.y_train, cv = 10, scoring='f1')
        # self.score_f1 = Scores_f1.mean()
        # print("Train Set 10-Fold F1-Score: ", self.score)

        # Scores_auc = cross_val_score(estimator = self.model, X = self.X_train, y = self.y_train, cv = 10, scoring='roc_auc')
        # self.score_auc = Scores_auc.mean()
        # print("Train Set 10-Fold F1-Score: ", self.score)

    def predict(self, X_test, y_test):

        optimal_threshold = find_optimal_moving_threshold(model = self.model, X_test = X_test, y_test = y_test)
        self.score_auc,__,__,__,__,__,self.score_f1 = calculate_performance_metrics_rsv(trained_model = self.model, X_test = X_test, y_test = y_test,
                                                         threshold = optimal_threshold, 
                                                         print_roc = False)

In [17]:
# Approach 2: train the model using bayesian hyperparameter optimization

# Scorings = ['accuracy', 'balanced_accuracy', 'f1', 'f1_micro', 'f1_macro', 'f1_weighted', 'precision', 'recall', 'roc_auc']
scorings = ['balanced_accuracy', 'f1', 'f1_micro', 'f1_macro', 'f1_weighted', 'precision', 'recall', 'roc_auc']
y_train_numeric = [1 if label == 'Positive' else 0 for label in y_train]

best_models_xgb = {}

for score in tqdm(scorings):
    classifier = XGBoostClassifier_custom(scoring=score, max_evals=12, sample_weights = sample_weights, cost_sensitive_yn = True)
    classifier.train(X_train, y_train_numeric)
    classifier.predict(X_test, y_test)
    best_models_xgb[score] = {'model': classifier.model, 'score_f1': classifier.score_f1, 'score_auc': classifier.score_auc}
    

  0%|          | 0/8 [00:00<?, ?it/s]

--------------------------------------------------------------------
Training XGBoost classifier with objective metric: balanced_accuracy
Tuning Hyperparameters ...
100%|██████████| 12/12 [02:24<00:00, 12.01s/trial, best loss: 0.28366882607512856]
Best Hyperparameters:  {'colsample_bytree': 0.9, 'gamma': 0.30000000000000004, 'learning_rate': 0.1265924625201866, 'max_depth': 5.0, 'min_child_weight': 2.0, 'n_estimators': 4, 'subsample': 0.8}
Parameters: { "verbose" } are not used.

XGBoostClassifier Performance:
Optimal threshold: 0.6900000000000001
Optimal f1: 0.35348837209302325




 12%|█▎        | 1/8 [02:27<17:09, 147.14s/it]

AUC Score: 0.7824106296833311
Precision / Positive predictive value: 1.0
Specificity: 1.0
Recall / sensitivity: 0.21468926553672316
Negative predictive value: 0.9756111825944555
Accuracy: 0.9757727167092726
F-1: 0.35348837209302325
--------------------------------------------------------------------
Training XGBoost classifier with objective metric: f1
Tuning Hyperparameters ...
100%|██████████| 12/12 [00:47<00:00,  3.98s/trial, best loss: 0.22202706374262815]
Best Hyperparameters:  {'colsample_bytree': 0.8, 'gamma': 0.4, 'learning_rate': 0.02916068966412473, 'max_depth': 5.0, 'min_child_weight': 6.0, 'n_estimators': 7, 'subsample': 0.6000000000000001}
Parameters: { "verbose" } are not used.

XGBoostClassifier Performance:
Optimal threshold: 0.5700000000000001
Optimal f1: 0.35348837209302325




 25%|██▌       | 2/8 [03:19<09:09, 91.64s/it] 

AUC Score: 0.7900250981895682
Precision / Positive predictive value: 1.0
Specificity: 1.0
Recall / sensitivity: 0.21468926553672316
Negative predictive value: 0.9756111825944555
Accuracy: 0.9757727167092726
F-1: 0.35348837209302325
--------------------------------------------------------------------
Training XGBoost classifier with objective metric: f1_micro
Tuning Hyperparameters ...
100%|██████████| 12/12 [01:09<00:00,  5.80s/trial, best loss: 0.2687734687596184]
Best Hyperparameters:  {'colsample_bytree': 0.8, 'gamma': 0.5, 'learning_rate': 0.07713572310730138, 'max_depth': 5.0, 'min_child_weight': 6.0, 'n_estimators': 4, 'subsample': 0.6000000000000001}
Parameters: { "verbose" } are not used.

XGBoostClassifier Performance:
Optimal threshold: 0.56
Optimal f1: 0.3553826199740596




 38%|███▊      | 3/8 [04:32<06:54, 82.89s/it]

AUC Score: 0.7826644227207538
Precision / Positive predictive value: 0.5708333333333333
Specificity: 0.9938253102331994
Recall / sensitivity: 0.2580037664783427
Negative predictive value: 0.9767852934244639
Accuracy: 0.9711247966534976
F-1: 0.3553826199740596
--------------------------------------------------------------------
Training XGBoost classifier with objective metric: f1_macro
Tuning Hyperparameters ...
100%|██████████| 12/12 [01:07<00:00,  5.61s/trial, best loss: 0.28517776352626956]
Best Hyperparameters:  {'colsample_bytree': 1.0, 'gamma': 0.0, 'learning_rate': 0.08936245821508768, 'max_depth': 5.0, 'min_child_weight': 6.0, 'n_estimators': 6, 'subsample': 0.9}
Parameters: { "verbose" } are not used.

XGBoostClassifier Performance:
Optimal threshold: 0.6900000000000001
Optimal f1: 0.35348837209302325




 50%|█████     | 4/8 [05:42<05:11, 77.91s/it]

AUC Score: 0.7872069568193951
Precision / Positive predictive value: 1.0
Specificity: 1.0
Recall / sensitivity: 0.21468926553672316
Negative predictive value: 0.9756111825944555
Accuracy: 0.9757727167092726
F-1: 0.35348837209302325
--------------------------------------------------------------------
Training XGBoost classifier with objective metric: f1_weighted
Tuning Hyperparameters ...
100%|██████████| 12/12 [01:44<00:00,  8.72s/trial, best loss: 0.27517739608677394]
Best Hyperparameters:  {'colsample_bytree': 1.0, 'gamma': 0.0, 'learning_rate': 0.16700661295438024, 'max_depth': 5.0, 'min_child_weight': 8.0, 'n_estimators': 2, 'subsample': 0.9}
Parameters: { "verbose" } are not used.

XGBoostClassifier Performance:
Optimal threshold: 0.64
Optimal f1: 0.35348837209302325




 62%|██████▎   | 5/8 [07:35<04:31, 90.53s/it]

AUC Score: 0.7760808755317885
Precision / Positive predictive value: 1.0
Specificity: 1.0
Recall / sensitivity: 0.21468926553672316
Negative predictive value: 0.9756111825944555
Accuracy: 0.9757727167092726
F-1: 0.35348837209302325
--------------------------------------------------------------------
Training XGBoost classifier with objective metric: precision
Tuning Hyperparameters ...
100%|██████████| 12/12 [01:16<00:00,  6.40s/trial, best loss: 0.2515762676686215]
Best Hyperparameters:  {'colsample_bytree': 0.9, 'gamma': 0.30000000000000004, 'learning_rate': 0.16113752094091888, 'max_depth': 5.0, 'min_child_weight': 2.0, 'n_estimators': 1, 'subsample': 0.8}
Parameters: { "verbose" } are not used.

XGBoostClassifier Performance:
Optimal threshold: 0.5700000000000001
Optimal f1: 0.34688346883468835




 75%|███████▌  | 6/8 [08:55<02:54, 87.01s/it]

AUC Score: 0.7684212481220952
Precision / Positive predictive value: 0.6183574879227053
Specificity: 0.9952640728973083
Recall / sensitivity: 0.24105461393596986
Negative predictive value: 0.976301087915319
Accuracy: 0.9719962816639554
F-1: 0.34688346883468835
--------------------------------------------------------------------
Training XGBoost classifier with objective metric: recall
Tuning Hyperparameters ...
100%|██████████| 12/12 [01:22<00:00,  6.88s/trial, best loss: 0.17476747276109494]
Best Hyperparameters:  {'colsample_bytree': 0.6000000000000001, 'gamma': 0.0, 'learning_rate': 0.07950538207084898, 'max_depth': 10.0, 'min_child_weight': 4.0, 'n_estimators': 0, 'subsample': 0.7000000000000001}
Parameters: { "verbose" } are not used.

XGBoostClassifier Performance:
Optimal threshold: 0.0
Optimal f1: 0.059854590542749254




  npv = tn / (tn + fn)
 88%|████████▊ | 7/8 [10:21<01:26, 86.46s/it]

AUC Score: 0.5
Precision / Positive predictive value: 0.030850569370206832
Specificity: 0.0
Recall / sensitivity: 1.0
Negative predictive value: nan
Accuracy: 0.030850569370206832
F-1: 0.059854590542749254
--------------------------------------------------------------------
Training XGBoost classifier with objective metric: roc_auc
Tuning Hyperparameters ...
100%|██████████| 12/12 [01:42<00:00,  8.52s/trial, best loss: 0.20218686602971636]
Best Hyperparameters:  {'colsample_bytree': 0.6000000000000001, 'gamma': 0.5, 'learning_rate': 0.08694305887031364, 'max_depth': 10.0, 'min_child_weight': 10.0, 'n_estimators': 2, 'subsample': 0.9}
Parameters: { "verbose" } are not used.

XGBoostClassifier Performance:
Optimal threshold: 0.58
Optimal f1: 0.2589073634204276




100%|██████████| 8/8 [12:11<00:00, 91.49s/it]

AUC Score: 0.7365478118196882
Precision / Positive predictive value: 0.3504823151125402
Specificity: 0.9878904142437503
Recall / sensitivity: 0.20527306967984935
Negative predictive value: 0.9750310632506952
Accuracy: 0.9637462235649547
F-1: 0.2589073634204276





## 1.2. LightGBM

As the Bayesian hyperparameter has proven more effective, this approach will be followed here too

In [22]:
class LightGBMClassifier_custom:

    def __init__(self, scoring, max_evals, cost_sensitive_yn, sample_weights):
        self.scoring = scoring
        self.max_evals = max_evals
        self.cost_sensitive = cost_sensitive_yn
        self.sample_weights = sample_weights
        self.best = None
        self.model = None
        self.score_f1 = None
        self.score_auc = None
        self.n_estimators_choice = list(range(20, 205, 25))

        
    def objective(self, space):
        classifier = lgbm.LGBMClassifier(n_estimators = space['n_estimators'],
                                    max_depth = int(space['max_depth']),
                                    learning_rate = space['learning_rate'],
                                    min_child_weight = space['min_child_weight'],
                                    subsample = space['subsample'],
                                    colsample_bytree = space['colsample_bytree'],
                                    )
        
        if self.cost_sensitive:
            classifier.fit(self.X_train, self.y_train, sample_weight = self.sample_weights)
        else: 
            classifier.fit(self.X_train, self.y_train)

        Scores = cross_val_score(estimator = classifier, X = self.X_train, y = self.y_train, cv = 10, scoring=self.scoring)
        score = Scores.mean()
        loss = 1-score
        return {'loss': loss, 'status': STATUS_OK}

    def train(self, X_train, y_train):
        print('--------------------------------------------------------------------')
        print(f'Training LightGBM classifier with objective metric: {self.scoring}')
        self.X_train = X_train
        self.y_train = y_train

        self.space = {
        'max_depth' : hp.quniform('max_depth', 5, 21, 5),
        'learning_rate' : hp.uniform('learning_rate', 0.010, 0.200),
        'n_estimators' : hp.choice('n_estimators', self.n_estimators_choice),
        'min_child_weight' : hp.quniform('min_child_weight', 1, 10, 2),
        'subsample' : hp.quniform('subsample', 0.6, 1, 0.1),
        'colsample_bytree' : hp.quniform('colsample_bytree', 0.6, 1.0, 0.1),
        }

        trials = Trials()
        print("Tuning Hyperparameters ...")
        self.best = fmin(fn=self.objective,
                    space=self.space,
                    algo=tpe.suggest,
                    max_evals=self.max_evals,
                    trials=trials)
        print("Best Hyperparameters: ", self.best)
        self.fit_model()

    def fit_model(self):
        self.model = lgbm.LGBMClassifier(n_estimators = self.n_estimators_choice[self.best['n_estimators']],
                                max_depth = int(self.best['max_depth']),
                                learning_rate = self.best['learning_rate'],
                                min_child_weight = self.best['min_child_weight'],
                                subsample = self.best['subsample'],
                                colsample_bytree = self.best['colsample_bytree']
                                )

        if self.cost_sensitive:
            self.model.fit(self.X_train, self.y_train, sample_weight = self.sample_weights)
        else: 
            self.model.fit(self.X_train, self.y_train)

        print('LightGBMClassifier Performance:')
        # These should be part of the evaluation set, not the training set

        # Scores_f1 = cross_val_score(estimator = self.model, X = self.X_train, y = self.y_train, cv = 10, scoring='f1')
        # self.score_f1 = Scores_f1.mean()
        # print("Train Set 10-Fold F1-Score: ", self.score_f1)

        # Scores_auc = cross_val_score(estimator = self.model, X = self.X_train, y = self.y_train, cv = 10, scoring='roc_auc')
        # self.score_auc = Scores_auc.mean()
        # print("Train Set 10-Fold AUC Score: ", self.score_auc)

    def predict(self, X_test, y_test):       
        optimal_threshold = find_optimal_moving_threshold(model = self.model, X_test = X_test, y_test = y_test)
        self.score_auc,__,__,__,__,__,self.score_f1 = calculate_performance_metrics_rsv(trained_model = self.model, X_test = X_test, y_test = y_test, threshold = optimal_threshold, print_roc = False)


In [23]:
# Scorings = ['accuracy', 'balanced_accuracy', 'f1', 'f1_micro', 'f1_macro', 'f1_weighted', 'precision', 'recall', 'roc_auc']
scorings = ['balanced_accuracy', 'f1', 'f1_micro', 'f1_macro', 'f1_weighted', 'precision', 'recall', 'roc_auc']
y_train_numeric = [1 if label == 'Positive' else 0 for label in y_train]

best_models_lightGBM = {}

for score in tqdm(scorings):
    classifier = LightGBMClassifier_custom(scoring=score, max_evals=12, sample_weights = sample_weights, cost_sensitive_yn = True)
    classifier.train(X_train, y_train_numeric)
    classifier.predict(X_test, y_test)
    best_models_lightGBM[score] = {'model': classifier.model, 'score_f1': classifier.score_f1, 'score_auc': classifier.score_auc}

  0%|          | 0/8 [00:00<?, ?it/s]

--------------------------------------------------------------------
Training LightGBM classifier with objective metric: balanced_accuracy
Tuning Hyperparameters ...


100%|██████████| 12/12 [00:38<00:00,  3.18s/trial, best loss: 0.28736599491354364]
Best Hyperparameters:  {'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.0887891203811311, 'max_depth': 15.0, 'min_child_weight': 8.0, 'n_estimators': 3, 'subsample': 0.9}
LightGBMClassifier Performance:
Optimal threshold: 0.91
Optimal f1: 0.36997319034852544




 12%|█▎        | 1/8 [00:42<04:55, 42.23s/it]

AUC Score: 0.8084914205421756
Precision / Positive predictive value: 0.641860465116279
Specificity: 0.9953839697859841
Recall / sensitivity: 0.2598870056497175
Negative predictive value: 0.9768782726363476
Accuracy: 0.9726934696723216
F-1: 0.36997319034852544
--------------------------------------------------------------------
Training LightGBM classifier with objective metric: f1
Tuning Hyperparameters ...
100%|██████████| 12/12 [00:36<00:00,  3.06s/trial, best loss: 0.21499936157417743]
Best Hyperparameters:  {'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.03730759750586875, 'max_depth': 5.0, 'min_child_weight': 2.0, 'n_estimators': 5, 'subsample': 1.0}
LightGBMClassifier Performance:
Optimal threshold: 0.89
Optimal f1: 0.36607142857142855




 25%|██▌       | 2/8 [01:23<04:08, 41.43s/it]

AUC Score: 0.8140520621192329
Precision / Positive predictive value: 0.8723404255319149
Specificity: 0.9989209280019183
Recall / sensitivity: 0.23163841807909605
Negative predictive value: 0.976099818405483
Accuracy: 0.9752498257029979
F-1: 0.36607142857142855
--------------------------------------------------------------------
Training LightGBM classifier with objective metric: f1_micro
Tuning Hyperparameters ...
100%|██████████| 12/12 [00:30<00:00,  2.53s/trial, best loss: 0.27098645737149885]
Best Hyperparameters:  {'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.038662975722522044, 'max_depth': 20.0, 'min_child_weight': 10.0, 'n_estimators': 3, 'subsample': 0.6000000000000001}
LightGBMClassifier Performance:
Optimal threshold: 0.86
Optimal f1: 0.375




 38%|███▊      | 3/8 [01:57<03:11, 38.29s/it]

AUC Score: 0.8090898324615972
Precision / Positive predictive value: 0.7630057803468208
Specificity: 0.9975421137821473
Recall / sensitivity: 0.24858757062146894
Negative predictive value: 0.9765831328129585
Accuracy: 0.9744364396932372
F-1: 0.375
--------------------------------------------------------------------
Training LightGBM classifier with objective metric: f1_macro
Tuning Hyperparameters ...
100%|██████████| 12/12 [00:26<00:00,  2.17s/trial, best loss: 0.2844078223447395]
Best Hyperparameters:  {'colsample_bytree': 0.8, 'learning_rate': 0.18183202094494266, 'max_depth': 5.0, 'min_child_weight': 8.0, 'n_estimators': 4, 'subsample': 0.9}
LightGBMClassifier Performance:
Optimal threshold: 0.9400000000000001
Optimal f1: 0.3678756476683938




 50%|█████     | 4/8 [02:26<02:18, 34.71s/it]

AUC Score: 0.804457883734113
Precision / Positive predictive value: 0.5892116182572614
Specificity: 0.9940651040105509
Recall / sensitivity: 0.2674199623352166
Negative predictive value: 0.9770785457545225
Accuracy: 0.9716476876597723
F-1: 0.3678756476683938
--------------------------------------------------------------------
Training LightGBM classifier with objective metric: f1_weighted
Tuning Hyperparameters ...
100%|██████████| 12/12 [00:25<00:00,  2.16s/trial, best loss: 0.2706013887579325]
Best Hyperparameters:  {'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.05653146087281452, 'max_depth': 5.0, 'min_child_weight': 6.0, 'n_estimators': 6, 'subsample': 0.7000000000000001}
LightGBMClassifier Performance:
Optimal threshold: 0.93
Optimal f1: 0.363103953147877




 62%|██████▎   | 5/8 [02:55<01:37, 32.65s/it]

AUC Score: 0.8150089792834659
Precision / Positive predictive value: 0.8157894736842105
Specificity: 0.9983214435585397
Recall / sensitivity: 0.2335216572504708
Negative predictive value: 0.9761430246189918
Accuracy: 0.9747269346967232
F-1: 0.363103953147877
--------------------------------------------------------------------
Training LightGBM classifier with objective metric: precision
Tuning Hyperparameters ...
100%|██████████| 12/12 [00:24<00:00,  2.02s/trial, best loss: 0.24761021208655754]
Best Hyperparameters:  {'colsample_bytree': 0.9, 'learning_rate': 0.05391254789268059, 'max_depth': 20.0, 'min_child_weight': 4.0, 'n_estimators': 3, 'subsample': 0.7000000000000001}
LightGBMClassifier Performance:
Optimal threshold: 0.9
Optimal f1: 0.36619718309859156




 75%|███████▌  | 6/8 [03:23<01:01, 30.81s/it]

AUC Score: 0.8124124552320033
Precision / Positive predictive value: 0.7262569832402235
Specificity: 0.9970625262274444
Recall / sensitivity: 0.2448210922787194
Negative predictive value: 0.9764574649210357
Accuracy: 0.9738554496862654
F-1: 0.36619718309859156
--------------------------------------------------------------------
Training LightGBM classifier with objective metric: recall
Tuning Hyperparameters ...
100%|██████████| 12/12 [00:21<00:00,  1.83s/trial, best loss: 0.14461422623793074]
Best Hyperparameters:  {'colsample_bytree': 0.7000000000000001, 'learning_rate': 0.10620377200005032, 'max_depth': 5.0, 'min_child_weight': 4.0, 'n_estimators': 0, 'subsample': 1.0}
LightGBMClassifier Performance:
Optimal threshold: 0.76
Optimal f1: 0.36535859269282817




 88%|████████▊ | 7/8 [03:47<00:28, 28.80s/it]

AUC Score: 0.800020005394231
Precision / Positive predictive value: 0.6490384615384616
Specificity: 0.9956237635633355
Recall / sensitivity: 0.2542372881355932
Negative predictive value: 0.976711362032463
Accuracy: 0.9727515686730188
F-1: 0.36535859269282817
--------------------------------------------------------------------
Training LightGBM classifier with objective metric: roc_auc
Tuning Hyperparameters ...
100%|██████████| 12/12 [00:25<00:00,  2.12s/trial, best loss: 0.194394052027422]
Best Hyperparameters:  {'colsample_bytree': 0.9, 'learning_rate': 0.03507671894512612, 'max_depth': 10.0, 'min_child_weight': 2.0, 'n_estimators': 5, 'subsample': 0.8}
LightGBMClassifier Performance:
Optimal threshold: 0.89
Optimal f1: 0.37228260869565216




100%|██████████| 8/8 [04:16<00:00, 32.04s/it]

AUC Score: 0.8076939143071422
Precision / Positive predictive value: 0.6682926829268293
Specificity: 0.9959235057850249
Recall / sensitivity: 0.2580037664783427
Negative predictive value: 0.9768330687364026
Accuracy: 0.9731582616778991
F-1: 0.37228260869565216





## 2. Ensemble of the models


In [36]:
def MetaClassifier(trained_models, X_train, y_train, X_test, y_test, target_metric = 'f1', top_iterations = 5):
    """
    Function to build a soft-voting ensemble meta-classifier
    Arguments:
    trained_models -- list of model iterations outputs of different algorithms
    train_dict -- a dictionary of train set features and labels
    test_dict (optional) -- a dictionary that contains test set features and labels
    Return:
    out -- a dictionary of meta-classifier model ('BestModel'), a dataframe of performance metrics of all models (Metrics), 
           and a dataframe of metaclassifier predicted labels and probabilities on trainset instances (train_df_pred)
    """

    estimators=[]
    weights = []
    results_auc = []
    results_f1 = []

    train_dict = {
        'X_train': X_train,
        'y_train': y_train 
    }
    test_dict = {
        'X_test ': X_test,
        'y_test': y_test
    }


    print('Generating a Soft-Voting Ensemble Classifier...')

    for alg in tqdm(trained_models):
        for metric in tqdm(list(alg.keys())):
            # for voting ensemble
            estimators.append((str(alg)+'_'+metric, alg[metric]['model']))
            weights.append(alg[metric]['score_f1'])

            # for results
            optimal_threshold = find_optimal_moving_threshold(model = alg[metric]['model'], X_test = X_test, y_test = y_test)
            score_auc,__,__,__,__,__,score_f1 = calculate_performance_metrics_rsv(trained_model = alg[metric]['model'], X_test = X_test, y_test = y_test, threshold = optimal_threshold, print_roc = False)
            results_auc.append(score_auc)
            results_f1.append(score_f1)

    if target_metric == 'f1':
        results_df = pd.DataFrame(results_f1, columns = ['f1'])
    elif target_metric == 'auc':
        results_df = pd.DataFrame(results_auc, columns = ['auc'])
    else:
        raise ValueError("Please, indicate a target metric. Accepted values are ['f1', 'auc']")
        
   
    results_df.sort_values(by= target_metric, ascending=False)

    # keeping top five iterations for the ensemble
    KeepIdx = results_df.nlargest(top_iterations, target_metric).index

    BestEstimators= [estimators[i] for i in KeepIdx.tolist()]
    BestWeights = [weights[i] for i in KeepIdx.tolist()]

    X_train, y_train = train_dict['X_train'], train_dict['y_train']

    MetaClassifier = VotingClassifier(estimators=BestEstimators, voting='soft', weights=BestWeights)
    MetaClassifier = MetaClassifier.fit(X_train, y_train)

    # results_df = results_df.append(eval_model(MetaClassifier, train_dict, test_dict), ignore_index=True).sort_values(by='train_CV_f1', ascending=False) # add metaclassification model metrics

    optimal_threshold = find_optimal_moving_threshold(model = MetaClassifier, X_test = X_test, y_test = y_test)
    __,__,__,__,__,__,__ = calculate_performance_metrics_rsv(trained_model = MetaClassifier, X_test = X_test, y_test = y_test, threshold = optimal_threshold, print_roc = False)
            

    # storing metaclassifier performance on trainset
    # y_pred = MetaClassifier.predict(X_train)
    # y_proba =  MetaClassifier.predict_proba(X_train)
    # train_df_pred = X_train.copy()
    # train_df_pred['y_true'] = y_train
    # train_df_pred['y_hat'] = y_pred
    # train_df_pred['y_hat_proba'] = y_proba[:,1] # probability of belonging to class 1

    out = {'MetaClassifier': MetaClassifier,
           'BestModel': BestEstimators}
    return out

In [37]:
metaclassifier_dict = MetaClassifier(trained_models = [best_models_xgb, best_models_lightGBM], 
                                     X_train = X_train, y_train = y_train, X_test = X_test, y_test = y_test, target_metric = 'f1', top_iterations = 5
                                     )

Generating a Soft-Voting Ensemble Classifier...


  0%|          | 0/2 [00:00<?, ?it/s]

Optimal threshold: 0.6900000000000001
Optimal f1: 0.35348837209302325






AUC Score: 0.7824106296833311
Precision / Positive predictive value: 1.0
Specificity: 1.0
Recall / sensitivity: 0.21468926553672316
Negative predictive value: 0.9756111825944555
Accuracy: 0.9757727167092726
F-1: 0.35348837209302325
Optimal threshold: 0.5700000000000001
Optimal f1: 0.35348837209302325






AUC Score: 0.7900250981895682
Precision / Positive predictive value: 1.0
Specificity: 1.0
Recall / sensitivity: 0.21468926553672316
Negative predictive value: 0.9756111825944555
Accuracy: 0.9757727167092726
F-1: 0.35348837209302325
Optimal threshold: 0.56
Optimal f1: 0.3553826199740596






AUC Score: 0.7826644227207538
Precision / Positive predictive value: 0.5708333333333333
Specificity: 0.9938253102331994
Recall / sensitivity: 0.2580037664783427
Negative predictive value: 0.9767852934244639
Accuracy: 0.9711247966534976
F-1: 0.3553826199740596
Optimal threshold: 0.6900000000000001
Optimal f1: 0.35348837209302325






AUC Score: 0.7872069568193951
Precision / Positive predictive value: 1.0
Specificity: 1.0
Recall / sensitivity: 0.21468926553672316
Negative predictive value: 0.9756111825944555
Accuracy: 0.9757727167092726
F-1: 0.35348837209302325
Optimal threshold: 0.64
Optimal f1: 0.35348837209302325






AUC Score: 0.7760808755317885
Precision / Positive predictive value: 1.0
Specificity: 1.0
Recall / sensitivity: 0.21468926553672316
Negative predictive value: 0.9756111825944555
Accuracy: 0.9757727167092726
F-1: 0.35348837209302325
Optimal threshold: 0.5700000000000001
Optimal f1: 0.34688346883468835






AUC Score: 0.7684212481220952
Precision / Positive predictive value: 0.6183574879227053
Specificity: 0.9952640728973083
Recall / sensitivity: 0.24105461393596986
Negative predictive value: 0.976301087915319
Accuracy: 0.9719962816639554
F-1: 0.34688346883468835
Optimal threshold: 0.0
Optimal f1: 0.059854590542749254




  npv = tn / (tn + fn)


AUC Score: 0.5
Precision / Positive predictive value: 0.030850569370206832
Specificity: 0.0
Recall / sensitivity: 1.0
Negative predictive value: nan
Accuracy: 0.030850569370206832
F-1: 0.059854590542749254
Optimal threshold: 0.58
Optimal f1: 0.2589073634204276




100%|██████████| 8/8 [00:26<00:00,  3.31s/it]
 50%|█████     | 1/2 [00:26<00:26, 26.50s/it]

AUC Score: 0.7365478118196882
Precision / Positive predictive value: 0.3504823151125402
Specificity: 0.9878904142437503
Recall / sensitivity: 0.20527306967984935
Negative predictive value: 0.9750310632506952
Accuracy: 0.9637462235649547
F-1: 0.2589073634204276




Optimal threshold: 0.91
Optimal f1: 0.36997319034852544






AUC Score: 0.8084914205421756
Precision / Positive predictive value: 0.641860465116279
Specificity: 0.9953839697859841
Recall / sensitivity: 0.2598870056497175
Negative predictive value: 0.9768782726363476
Accuracy: 0.9726934696723216
F-1: 0.36997319034852544
Optimal threshold: 0.89
Optimal f1: 0.36607142857142855






AUC Score: 0.8140520621192329
Precision / Positive predictive value: 0.8723404255319149
Specificity: 0.9989209280019183
Recall / sensitivity: 0.23163841807909605
Negative predictive value: 0.976099818405483
Accuracy: 0.9752498257029979
F-1: 0.36607142857142855
Optimal threshold: 0.86
Optimal f1: 0.375






AUC Score: 0.8090898324615972
Precision / Positive predictive value: 0.7630057803468208
Specificity: 0.9975421137821473
Recall / sensitivity: 0.24858757062146894
Negative predictive value: 0.9765831328129585
Accuracy: 0.9744364396932372
F-1: 0.375
Optimal threshold: 0.9400000000000001
Optimal f1: 0.3678756476683938






AUC Score: 0.804457883734113
Precision / Positive predictive value: 0.5892116182572614
Specificity: 0.9940651040105509
Recall / sensitivity: 0.2674199623352166
Negative predictive value: 0.9770785457545225
Accuracy: 0.9716476876597723
F-1: 0.3678756476683938
Optimal threshold: 0.93
Optimal f1: 0.363103953147877






AUC Score: 0.8150089792834659
Precision / Positive predictive value: 0.8157894736842105
Specificity: 0.9983214435585397
Recall / sensitivity: 0.2335216572504708
Negative predictive value: 0.9761430246189918
Accuracy: 0.9747269346967232
F-1: 0.363103953147877
Optimal threshold: 0.9
Optimal f1: 0.36619718309859156






AUC Score: 0.8124124552320033
Precision / Positive predictive value: 0.7262569832402235
Specificity: 0.9970625262274444
Recall / sensitivity: 0.2448210922787194
Negative predictive value: 0.9764574649210357
Accuracy: 0.9738554496862654
F-1: 0.36619718309859156
Optimal threshold: 0.76
Optimal f1: 0.36535859269282817






AUC Score: 0.800020005394231
Precision / Positive predictive value: 0.6490384615384616
Specificity: 0.9956237635633355
Recall / sensitivity: 0.2542372881355932
Negative predictive value: 0.976711362032463
Accuracy: 0.9727515686730188
F-1: 0.36535859269282817
Optimal threshold: 0.89
Optimal f1: 0.37228260869565216




100%|██████████| 8/8 [00:23<00:00,  2.90s/it]
100%|██████████| 2/2 [00:49<00:00, 24.86s/it]

AUC Score: 0.8076939143071422
Precision / Positive predictive value: 0.6682926829268293
Specificity: 0.9959235057850249
Recall / sensitivity: 0.2580037664783427
Negative predictive value: 0.9768330687364026
Accuracy: 0.9731582616778991
F-1: 0.37228260869565216





Optimal threshold: 0.92
Optimal f1: 0.36549707602339176


AUC Score: 0.8123084204081665
Precision / Positive predictive value: 0.8169934640522876
Specificity: 0.9983214435585397
Recall / sensitivity: 0.23540489642184556
Negative predictive value: 0.9762002462043496
Accuracy: 0.9747850336974204
F-1: 0.36549707602339176
