In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix, recall_score, cohen_kappa_score

from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train"

INFO:numexpr.utils:Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [2]:
percent = 100
random = False
batch_size = 24

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.pad2square(fill=np.nan)
factory.resize((240,240))
factory.normalize2float()

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(GradientExtractor()) # Add gradient feature

strategy.add_extractor(ColorMomentsExtractor("rgb",))   # Add color moments feature
strategy.add_extractor(ColorMomentsExtractor("lab",))   # Add color moments feature
strategy.add_extractor(ColorMomentsExtractor("hsv",))   # Add color moments feature

strategy.add_extractor(LBPExtractor(radius=1, n_points=8))
strategy.add_extractor(LBPExtractor(radius=2, n_points=16))
strategy.add_extractor(LBPExtractor(radius=3, n_points=24))
strategy.add_extractor(LBPExtractor(radius=4, n_points=32))
strategy.add_extractor(LBPExtractor(radius=5, n_points=40))

strategy.add_extractor(FourierTransformExtractor())
strategy.add_extractor(FFTExtractor())

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random, batch_size=batch_size)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train
INFO:utils.ml:Preprocessing steps


In [3]:
pipeline.feature_matrix = None
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/212 batches.
Processed 10/212 batches.
Processed 15/212 batches.
Processed 20/212 batches.
Processed 25/212 batches.
Processed 30/212 batches.
Processed 35/212 batches.
Processed 40/212 batches.
Processed 45/212 batches.
Processed 50/212 batches.
Processed 55/212 batches.
Processed 60/212 batches.
Processed 65/212 batches.
Processed 70/212 batches.
Processed 75/212 batches.
Processed 80/212 batches.
Processed 85/212 batches.
Processed 90/212 batches.
Processed 95/212 batches.
Processed 100/212 batches.
Processed 105/212 batches.
Processed 110/212 batches.
Processed 115/212 batches.
Processed 120/212 batches.
Processed 125/212 batches.
Processed 130/212 batches.
Processed 135/212 batches.
Processed 140/212 batches.
Processed 145/212 batches.
Processed 150/212 batches.
Processed 155/212 batches.
Processed 160/212 batches.
Processed 165/212 batches.
Processed 170/212 batches.
Processed 175/212 batches.
Processed 180/212 batches.
Processed 185/212 batches.
Processed 190/212 bat

INFO:utils.ml:Feature extraction completed. Extracted 5082 features.


Processed 212/212 batches.


In [4]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

# Random Forest with Bootstrap Disabled
rf0 = RandomForestClassifier(n_estimators=100, bootstrap=False, random_state=42)
# Random Forest with Out-of-Bag Error (OOB)
rf1 = RandomForestClassifier(n_estimators=150, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf2 = RandomForestClassifier(n_estimators=150, bootstrap=False, random_state=42)
# Random Forest with Bootstrap Disabled
rf3 = RandomForestClassifier(n_estimators=200, bootstrap=False, random_state=42)

xgb4 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7) # 2ND
xgb5 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb6 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb7 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb8 = XGBClassifier(learning_rate=0.1, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)

xgb9 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb10 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb11 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb12 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)

xgb13 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb14 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb15 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb16 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)



pipeline.classifiers = [rf0, rf1, rf2, rf3,
                        # svm1, svm2, svm3, svm6, svm7, svm8,
                        xgb8, xgb9, xgb10, xgb11, xgb12, xgb13, xgb14, xgb15, xgb16]
pipeline.fitted_classifiers = {}

In [5]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Top 10 features for RandomForestClassifier0: [('color_moments_lab_B_var', 0.040078301369389877), ('color_moments_lab_B_std', 0.032312646875050505), ('color_moments_rgb_B_iqr', 0.02100261973888299), ('color_moments_rgb_B_std', 0.01907227104341949), ('color_moments_rgb_B_var', 0.01742389117842051), ('color_moments_rgb_R_iqr', 0.017218257480761456), ('color_moments_lab_L_iqr', 0.01398176197075886), ('color_moments_lab_B_iqr', 0.01285717963389727), ('color_moments_rgb_B_kurtosis', 0.012764181567512228), ('color_moments_rgb_R_skew', 0.012649381094118036)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 5.857973337173462 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Top 10 features for RandomForestClassifier1: [('color_moments_lab_B_std', 0.03745268003455307), ('color_moments_lab_B_var', 0.0361345414796509), ('color_moments_rgb_B_iqr

In [6]:
pipeline.predict_with_classifiers(VAL_PATH, percent)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11


Processed 53/53 batches.


INFO:utils.ml:Predictions made with classifier: XGBClassifier12


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 0, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 0, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 1, ..., 1, 0, 0]),
 'XGBClassifier4': array([0, 0, 0, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier5': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier7': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier8': array([0, 0, 0, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier9': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 0, ..., 1, 0, 0], dtype=int64)}

In [7]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics())
df = df.T
df

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.84251968503937, 'precision': 0.8720269913244364, 'recall': 0.6552831548373876, 'f1': 0.6765557987064076, 'kappa': 0.7049914517208058}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.8330708661417323, 'precision': 0.8841015466535761, 'recall': 0.6274691435042877, 'f1': 0.634591292792572, 'kappa': 0.6859808724049452}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8456692913385827, 'precision': 0.8731582243656547, 'recall': 0.6547283718442464, 'f1': 0.6740220744316682, 'kappa': 0.7108163872262909}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.8464566929133859, 'precision': 0.8746639908820567, 'recall': 0.658096769176751, 'f1': 0.6792776739687029, 'kappa': 0.7123666654277856}
INFO:utils.ml:Metrics for classifie

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.84252,0.872027,0.655283,0.676556,0.704991
RandomForestClassifier1,0.833071,0.884102,0.627469,0.634591,0.685981
RandomForestClassifier2,0.845669,0.873158,0.654728,0.674022,0.710816
RandomForestClassifier3,0.846457,0.874664,0.658097,0.679278,0.712367
XGBClassifier4,0.871654,0.857894,0.704696,0.733143,0.762162
XGBClassifier5,0.869291,0.840341,0.701444,0.729463,0.757106
XGBClassifier6,0.866142,0.858378,0.692091,0.71815,0.751487
XGBClassifier7,0.874803,0.857068,0.715826,0.746032,0.768333
XGBClassifier8,0.870079,0.850303,0.706412,0.734953,0.759517


In [8]:
highest = df[df["kappa"] == df.iloc[1:]["kappa"].max()]
idx = highest.index[0]

confusion_matrix(pipeline.predictions["GT"], pipeline.predictions[idx])

array([[456,  38,   4],
       [ 50, 626,   2],
       [ 46,  19,  29]], dtype=int64)

___
# Balance the data

In [9]:
# Backup
pipeline.backup_feature_matrix = pipeline.feature_matrix.copy()
pipeline.backup_labels = pipeline.labels.copy()

In [10]:
print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(5082, 190)
(5082, 190)
(5082,)
(5082,)


In [11]:
pipeline.get_feature_names()

['gradient_magnitude_mean',
 'gradient_magnitude_std',
 'gradient_direction_mean',
 'gradient_direction_std',
 'color_moments_rgb_R_mean',
 'color_moments_rgb_R_std',
 'color_moments_rgb_R_skew',
 'color_moments_rgb_R_kurtosis',
 'color_moments_rgb_R_median',
 'color_moments_rgb_R_var',
 'color_moments_rgb_R_min',
 'color_moments_rgb_R_max',
 'color_moments_rgb_R_iqr',
 'color_moments_rgb_R_entropy',
 'color_moments_rgb_G_mean',
 'color_moments_rgb_G_std',
 'color_moments_rgb_G_skew',
 'color_moments_rgb_G_kurtosis',
 'color_moments_rgb_G_median',
 'color_moments_rgb_G_var',
 'color_moments_rgb_G_min',
 'color_moments_rgb_G_max',
 'color_moments_rgb_G_iqr',
 'color_moments_rgb_G_entropy',
 'color_moments_rgb_B_mean',
 'color_moments_rgb_B_std',
 'color_moments_rgb_B_skew',
 'color_moments_rgb_B_kurtosis',
 'color_moments_rgb_B_median',
 'color_moments_rgb_B_var',
 'color_moments_rgb_B_min',
 'color_moments_rgb_B_max',
 'color_moments_rgb_B_iqr',
 'color_moments_rgb_B_entropy',
 'color_

In [12]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)


pipeline.feature_matrix, pipeline.labels = smote.fit_resample(np.nan_to_num(pipeline.feature_matrix), pipeline.labels)

In [13]:
pipeline.fitted_classifiers = {}
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Top 10 features for RandomForestClassifier0: [('color_moments_rgb_B_iqr', 0.02141248972034945), ('color_moments_lab_B_std', 0.02123731819864567), ('color_moments_lab_B_var', 0.02053418529773359), ('color_moments_rgb_B_var', 0.01757289101859818), ('color_moments_lab_L_iqr', 0.01389085167379286), ('color_moments_rgb_G_iqr', 0.013708811176309057), ('color_moments_rgb_R_iqr', 0.013685514709031763), ('color_moments_rgb_B_std', 0.012375685473543868), ('glcm_ASM_dist_1_angle_0', 0.010647785351581151), ('gradient_direction_std', 0.010569430919342622)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 11.050365686416626 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Top 10 features for RandomForestClassifier1: [('color_moments_lab_B_var', 0.020037756705878002), ('color_moments_rgb_B_iqr', 0.01991791778312504), ('color_moments_lab_B_std', 0

In [14]:
pipeline.predict_with_classifiers(VAL_PATH, percent)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8


Processed 53/53 batches.


INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier4': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier5': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier7': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier8': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier9': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 0, 2, 0], dtype=int64)}

In [15]:
import pandas as pd

df_smote = pd.DataFrame(data=pipeline.calculate_metrics())
df_smote = df_smote.T
df_smote

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8330708661417323, 'precision': 0.7315370168423604, 'recall': 0.707273958698032, 'f1': 0.717112657340968, 'kappa': 0.697270895549239}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.8330708661417323, 'precision': 0.7285846706180128, 'recall': 0.7099730119876985, 'f1': 0.717873063344903, 'kappa': 0.6977588986228295}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8362204724409449, 'precision': 0.7391375363564207, 'recall': 0.7040201224152242, 'f1': 0.7170975682658617, 'kappa': 0.7020353099888557}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.8346456692913385, 'precision': 0.7403414195867026, 'recall': 0.7087903487419837, 'f1': 0.7209996294908048, 'kappa': 0.6997027428725848}
INFO:utils.ml:Metrics for classifie

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.833071,0.731537,0.707274,0.717113,0.697271
RandomForestClassifier1,0.833071,0.728585,0.709973,0.717873,0.697759
RandomForestClassifier2,0.83622,0.739138,0.70402,0.717098,0.702035
RandomForestClassifier3,0.834646,0.740341,0.70879,0.721,0.699703
XGBClassifier4,0.873228,0.794348,0.774543,0.783545,0.770374
XGBClassifier5,0.869291,0.789883,0.768675,0.778223,0.763135
XGBClassifier6,0.85748,0.761913,0.754126,0.757814,0.742964
XGBClassifier7,0.870079,0.788325,0.7697,0.778105,0.764954
XGBClassifier8,0.874016,0.792953,0.775035,0.783251,0.771904


In [16]:
highest = df_smote[df_smote["kappa"] == df_smote.iloc[1:]["kappa"].max()]
idx = highest.index[0]

confusion_matrix(pipeline.predictions["GT"], pipeline.predictions[idx])

array([[431,  40,  27],
       [ 43, 629,   6],
       [ 29,  15,  50]], dtype=int64)

___
# Borderline SMOTE

In [17]:
from imblearn.over_sampling import BorderlineSMOTE
smote = BorderlineSMOTE(sampling_strategy='auto', random_state=42)

pipeline.feature_matrix, pipeline.labels = smote.fit_resample(
    np.nan_to_num(pipeline.backup_feature_matrix), pipeline.backup_labels)

print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(5082, 190)
(8139, 190)
(5082,)
(8139,)


In [18]:
pipeline.fitted_classifiers = {}

pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Top 10 features for RandomForestClassifier0: [('color_moments_lab_B_var', 0.02195088523407914), ('color_moments_rgb_B_iqr', 0.020569778786657538), ('color_moments_lab_B_std', 0.018532280309344187), ('color_moments_rgb_R_iqr', 0.01746095239755185), ('color_moments_rgb_B_std', 0.014076965942762169), ('color_moments_rgb_B_var', 0.01327280536226767), ('color_moments_rgb_G_iqr', 0.012660526857984442), ('color_moments_rgb_B_kurtosis', 0.011614555223653044), ('color_moments_hsv_H_median', 0.01142598618674037), ('color_moments_lab_L_iqr', 0.010561869143768318)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 11.84119701385498 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Top 10 features for RandomForestClassifier1: [('color_moments_lab_B_var', 0.020776825438865665), ('color_moments_lab_B_std', 0.02071574380164637), ('color_moments_rgb_

In [19]:
pipeline.predict_with_classifiers(VAL_PATH, percent)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6


Processed 53/53 batches.


INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier4': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier5': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier7': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier8': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier9': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 1, 2, 0], dtype=int64)}

In [20]:
df_borderline_smote = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df_borderline_smote = df_borderline_smote.T
df_borderline_smote

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8377952755905512, 'precision': 0.7618519024017827, 'recall': 0.71129002295506, 'f1': 0.7293431079634861, 'kappa': 0.7039251811848425}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.8291338582677166, 'precision': 0.7276391736918053, 'recall': 0.7080479074397131, 'f1': 0.7161045383929473, 'kappa': 0.6909899444748433}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8440944881889764, 'precision': 0.7631711100322622, 'recall': 0.7188107227126039, 'f1': 0.7352712507595118, 'kappa': 0.7158130283441073}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.8433070866141732, 'precision': 0.7660191552454817, 'recall': 0.7269493463718679, 'f1': 0.7422656979262096, 'kappa': 0.7148984269946845}
INFO:utils.ml:Metrics for classif

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.837795,0.761852,0.71129,0.729343,0.703925
RandomForestClassifier1,0.829134,0.727639,0.708048,0.716105,0.69099
RandomForestClassifier2,0.844094,0.763171,0.718811,0.735271,0.715813
RandomForestClassifier3,0.843307,0.766019,0.726949,0.742266,0.714898
XGBClassifier4,0.868504,0.776939,0.759198,0.767239,0.761817
XGBClassifier5,0.874016,0.794612,0.77809,0.785736,0.772007
XGBClassifier6,0.871654,0.785835,0.767629,0.775849,0.767756
XGBClassifier7,0.872441,0.781599,0.767765,0.774218,0.769169
XGBClassifier8,0.870079,0.781493,0.763591,0.771686,0.764812


In [21]:
highest = df_borderline_smote[df_borderline_smote["kappa"] == df_borderline_smote.iloc[1:]["kappa"].max()]
idx = highest.index[0]

confusion_matrix(pipeline.predictions["GT"], pipeline.predictions[idx])

array([[431,  43,  24],
       [ 40, 629,   9],
       [ 30,  13,  51]], dtype=int64)