In [1]:
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder
#https://machinelearningmastery.com/calibrated-classification-model-in-scikit-learn/

In [2]:
#create imblanced data sets for 2 classes
features_2, output_2 = make_classification(n_samples = 3000,
                                       n_features = 33,
                                       n_informative = 10,
                                       n_redundant = 3,
                                       n_classes = 2,
                                       weights = [.1, .9],
                                       random_state=42
                                      )


#create imblanced data sets for 3 classes
features_3, output_3 = make_classification(n_samples = 3000,
                                       n_features = 33,
                                       n_informative = 10,
                                       n_redundant = 3,
                                       n_classes = 3,
                                       weights = [.05, .05, .9],
                                       random_state=42
                                      )

#create imblanced data sets for 6 classes
features_6, output_6 = make_classification(n_samples = 3000,
                                       n_features = 33,
                                       n_informative = 10,
                                       n_redundant = 3,
                                       n_classes = 6,
                                       weights = [.01, .01, .01, .02, .05, .9 ],
                                       random_state=42
                                      )

In [3]:
##########TRAINING, TEST AND VALIDATION DATA FOR 2 CLASS TASK############

#set the 20% of the data as the test set. do not forget to fix the random_state. otherwise you will evalaute different data sets for each run.
x_trainc20, x_testc2, y_trainc20, y_testc2 = train_test_split(features_2, output_2, test_size=0.2, random_state = 42)

#set the 20% of the training data as the validation set. do not forget to fix the random_state. otherwise you will evalaute different data sets for each run.
x_trainc2, x_validc2, y_trainc2, y_validc2 = train_test_split(x_trainc20, y_trainc20, test_size=0.2, random_state = 42)




##########TRAINING, TEST AND VALIDATION DATA FOR 3 CLASS TASK############

#set the 20% of the data as the test set. do not forget to fix the random_state. otherwise you will evalaute different data sets for each run.
x_trainc30, x_testc3, y_trainc30, y_testc3 = train_test_split(features_3, output_3, test_size=0.2, random_state = 42)

#set the 20% of the training data as the validation set. do not forget to fix the random_state. otherwise you will evalaute different data sets for each run.
x_trainc3, x_validc3, y_trainc3, y_validc3 = train_test_split(x_trainc30, y_trainc30, test_size=0.2, random_state = 42)




##########TRAINING, TEST AND VALIDATION DATA FOR 6 CLASS TASK############




#set the 20% of the data as the test set. do not forget to fix the random_state. otherwise you will evalaute different data sets for each run.
x_trainc60, x_testc6, y_trainc60, y_testc6 = train_test_split(features_6, output_6, test_size=0.2, random_state = 5)

#set the 20% of the training data as the validation set. do not forget to fix the random_state. otherwise you will evalaute different data sets for each run.
x_trainc6, x_validc6, y_trainc6, y_validc6 = train_test_split(x_trainc60, y_trainc60, test_size=0.2, random_state = 5)




.

In [6]:
#define classifier function
def classifier( train_data, train_target, test_data, test_target,estim,parameters,number_cv,verbose=1 ,score ='roc_auc_ovo'):

  """
  This function aims to initially perform parameter search on the training data using grid search.
   Afterward, it evaluates the classifier's results on the test data in terms of ROC AUC score and mean square error.

  """
  #define grid search function
  grid_search = GridSearchCV(estimator = estim, param_grid = parameters,cv = number_cv, n_jobs=-1, verbose=1, scoring=score)
  #fit the grid search function to your training data.
  grid_search.fit(train_data, train_target)
  #it gives you the classifier which gives the best combination
  last_estim = grid_search.best_estimator_
  #I need probability estimates; then, I need to convert the target array, which I can calculate probability estimates after predictions
  enc = OneHotEncoder()
  #before fit the data reshape it
  enc.fit(test_target.reshape(-1, 1))
  test_target = enc.transform(test_target.reshape(-1, 1)).toarray()

  roc_auc= roc_auc_score(test_target, last_estim.predict_proba(test_data), multi_class='ovr')
  mse = mean_squared_error(test_target, last_estim.predict_proba(test_data))
  return roc_auc, mse


In [7]:
ada_version1 = AdaBoostClassifier(random_state=42)
ada_version2 = AdaBoostClassifier(random_state=42)
ada_version3 = AdaBoostClassifier(random_state=42)


paramsada = {

    'learning_rate': [0.0001, 0.001, 0.01, 0.1, 1.0],
    'n_estimators': [10,25,30,50,100,200]
}




roc_adav1, mse_adav1 =  classifier(x_trainc2, y_trainc2, x_testc2, y_testc2, ada_version1, paramsada, 4,1 ,'roc_auc_ovo')
print(roc_adav1, mse_adav1)

roc_adav2, mse_adav2 =  classifier(x_trainc3, y_trainc3, x_testc3, y_testc3, ada_version2, paramsada, 4,1 ,'roc_auc_ovo')
print(roc_adav2, mse_adav2)

roc_adav3, mse_adav3 =  classifier(x_trainc6, y_trainc6, x_testc6, y_testc6, ada_version3, paramsada, 4,1 ,'roc_auc_ovo')
print(roc_adav3, mse_adav3)

Fitting 4 folds for each of 30 candidates, totalling 120 fits
0.8920685608231665 0.2040553534747273
Fitting 4 folds for each of 30 candidates, totalling 120 fits
0.7821617845668648 0.12939986120242106
Fitting 4 folds for each of 30 candidates, totalling 120 fits
0.6007588876686442 0.05340734102470318


In [8]:
random_version1 = RandomForestClassifier(random_state=42, n_jobs=-1)
random_version2 = RandomForestClassifier(random_state=42, n_jobs=-1)
random_version3 = RandomForestClassifier(random_state=42, n_jobs=-1)

params = {
    'max_depth': [2,3,5,10,20],
    'min_samples_leaf': [5,10,20,50,100,200],
    'n_estimators': [10,25,30,50,100,200]
}

roc_random1, mse_random1 =  classifier(x_trainc2, y_trainc2, x_testc2, y_testc2, random_version1, params, 4,1 ,'roc_auc_ovo')
print(roc_random1, mse_random1)

roc_random2, mse_random2 =  classifier(x_trainc3, y_trainc3, x_testc3, y_testc3, random_version2, params, 4,1 ,'roc_auc_ovo')
print(roc_random2, mse_random2)

roc_random3, mse_random3 =  classifier(x_trainc6, y_trainc6, x_testc6, y_testc6, random_version3, params, 4,1 ,'roc_auc_ovo')
print(roc_random3, mse_random3)

Fitting 4 folds for each of 180 candidates, totalling 720 fits
0.9235514069707143 0.06408894821182361
Fitting 4 folds for each of 180 candidates, totalling 720 fits
0.8699478361290115 0.045924708461993535
Fitting 4 folds for each of 180 candidates, totalling 720 fits
0.749256409081641 0.02834870455272587


In [11]:
from sklearn.calibration import CalibratedClassifierCV
from sklearn.calibration import calibration_curve
from matplotlib import pyplot

#define classifier function
def classifier_calibrated( train_data, train_target,val_data, val_target, test_data, test_target,estim,parameters,number_cv,verbose=1 ,score ='roc_auc_ovo'):

  """
  This function aims to initially perform parameter search on the training data using grid search.
   Afterward, it evaluates the classifier's results on the test data in terms of ROC AUC score and mean square error.

  """
  #define grid search function
  grid_search = GridSearchCV(estimator = estim, param_grid = parameters,cv = number_cv, n_jobs=-1, verbose=1, scoring=score)
  #fit the grid search function to your training data.
  grid_search.fit(train_data, train_target)
  #it gives you the classifier which gives the best combination
  last_estim = grid_search.best_estimator_
  #I need probability estimates; then, I need to convert the target array, which I can calculate probability estimates after predictions
  enc = OneHotEncoder()
  enc2 = OneHotEncoder()
  #before fit the data reshape it
  enc.fit(test_target.reshape(-1, 1))
  enc2.fit(val_target.reshape(-1, 1))
  test_target = enc.transform(test_target.reshape(-1, 1)).toarray()
  #val_target = enc2.transform(val_target.reshape(-1, 1)).toarray()
  # calibrate model on validation data
  calibrator = CalibratedClassifierCV(last_estim,method='sigmoid')
  calibrator.fit(val_data, val_target)
  # evaluate the model
  last_estim_cal = calibrator.predict_proba(test_data)

  roc_auc= roc_auc_score(test_target, last_estim_cal , multi_class='ovr')
  mse = mean_squared_error(test_target, last_estim_cal )
  return roc_auc, mse


In [12]:
roc_adav1_cal, mse_adav1_cal =  classifier_calibrated(x_trainc2, y_trainc2, x_validc2, y_validc2,  x_testc2, y_testc2, ada_version1, paramsada, 4,1 ,'roc_auc_ovo')
print(roc_adav1_cal, mse_adav1_cal)

roc_adav2_cal, mse_adav2_cal =  classifier_calibrated(x_trainc3, y_trainc3, x_validc3, y_validc3,  x_testc3, y_testc3, ada_version2, paramsada, 4,1 ,'roc_auc_ovo')
print(roc_adav2_cal, mse_adav2_cal)


roc_adav3_cal, mse_adav3_cal =  classifier_calibrated(x_trainc6, y_trainc6, x_validc6, y_validc6,  x_testc6, y_testc6, ada_version3, paramsada, 4,1 ,'roc_auc_ovo')
print(roc_adav3_cal, mse_adav3_cal)


Fitting 4 folds for each of 30 candidates, totalling 120 fits
0.8582111957204072 0.08380148069486756
Fitting 4 folds for each of 30 candidates, totalling 120 fits
0.6321991792336137 0.06262205369899433
Fitting 4 folds for each of 30 candidates, totalling 120 fits
0.6248164548642703 0.03192652826553554


In [13]:
roc_randomv1_cal, mse_randomv1_cal =  classifier_calibrated(x_trainc2, y_trainc2, x_validc2, y_validc2,  x_testc2, y_testc2, random_version1, params, 4,1 ,'roc_auc_ovo')
print(roc_adav1_cal, mse_adav1_cal)

roc_randomv2_cal, mse_randomv2_cal =  classifier_calibrated(x_trainc3, y_trainc3, x_validc3, y_validc3,  x_testc3, y_testc3, random_version2, params, 4,1 ,'roc_auc_ovo')
print(roc_adav2_cal, mse_adav2_cal)


roc_randomv3_cal, mse_randomv3_cal =  classifier_calibrated(x_trainc6, y_trainc6, x_validc6, y_validc6,  x_testc6, y_testc6, random_version3, params, 4,1 ,'roc_auc_ovo')
print(roc_adav3_cal, mse_adav3_cal)


Fitting 4 folds for each of 180 candidates, totalling 720 fits
0.8582111957204072 0.08380148069486756
Fitting 4 folds for each of 180 candidates, totalling 720 fits
0.6321991792336137 0.06262205369899433
Fitting 4 folds for each of 180 candidates, totalling 720 fits
0.6248164548642703 0.03192652826553554


In [14]:
print(roc_randomv1_cal, mse_randomv1_cal)

print(roc_randomv2_cal, mse_randomv2_cal)

print(roc_randomv3_cal, mse_randomv3_cal)


0.8208739321488032 0.07977300673700467
0.7719904241723518 0.051478847065914095
0.6602049347617239 0.029941022330675884


In [18]:
print(f'After calibration MSE score for the Adaboost algorithm was improved by {(mse_adav1- mse_adav1_cal)/mse_adav1:.2%} for the two class classification task.')

print(f'After calibration MSE score for the Adaboost algorithm was improved by {(mse_adav2 - mse_adav2_cal)/mse_adav2:.2%} for the three class classification task.')

print(f'After calibration MSE score for the Adaboost algorithm was improved by {(mse_adav3 - mse_adav3_cal)/mse_adav3:.2%} for the six class classification task.')

After calibration MSE score for the Adaboost algorithm was improved by 58.93% for the two class classification task.
After calibration MSE score for the Adaboost algorithm was improved by 51.61% for the three class classification task.
After calibration MSE score for the Adaboost algorithm was improved by 40.22% for the six class classification task.


In [21]:
print(f'After calibration MSE score for the Random Forests algorithm was degraded by {(mse_random1 - mse_randomv1_cal)/mse_random1:.2%} for the two class classification task.')

print(f'After calibration MSE score for the Random Forests algorithm was degraded by {(mse_random2 - mse_randomv2_cal)/mse_random2:.2%} for the three class classification task.')

print(f'After calibration MSE score for the Random Forests algorithm was degraded by {(mse_random3 - mse_randomv3_cal)/mse_random3:.2%} for the six class classification task.')

After calibration MSE score for the Random Forests algorithm was degraded by -24.47% for the two class classification task.
After calibration MSE score for the Random Forests algorithm was degraded by -12.09% for the three class classification task.
After calibration MSE score for the Random Forests algorithm was degraded by -5.62% for the six class classification task.


We observed that probability calibration **improved** the performance of the *Adaboost* classifier in terms of mean square error. However, for the *Random Forest* algorithm, **performance degraded** after calibration. Moreover, as the **number of classes** increased, **the effect of calibration decreased** for both Adaboost and Random Forest algorithms.