In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix, recall_score, cohen_kappa_score

from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train"

In [2]:
percent = 100
random = False
batch_size = 48

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.gaussian_smoothing(5)
factory.clahe(clip_limit=1.5)
factory.pad2square(fill=np.nan)
factory.resize((150,150))
factory.hair_removal()
factory.normalize2float()

factory_loader = FactoryLoader(path=VAL_PATH, batch_size=batch_size, factory=factory, percentage=percent, shuffle=random)

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(GradientExtractor())   # Add variance feature

# strategy.add_extractor(MeanExtractor())  # Add mean feature
# strategy.add_extractor(StdExtractor())   # Add standard deviation feature
strategy.add_extractor(VarExtractor())   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor())   # Add color moments feature


# strategy.add_extractor(MeanExtractor("lab"))  # Add mean feature
# strategy.add_extractor(StdExtractor("lab"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("lab"))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("lab"))   # Add color moments feature

# strategy.add_extractor(MeanExtractor("hsv"))  # Add mean feature
# strategy.add_extractor(StdExtractor("hsv"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("hsv"))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("hsv"))   # Add color moments feature

strategy.add_extractor(LBPExtractor(radius=1, n_points=32))
strategy.add_extractor(LBPExtractor(radius=2, n_points=32))
strategy.add_extractor(LBPExtractor(radius=3, n_points=32))
strategy.add_extractor(LBPExtractor(radius=1, n_points=64))
strategy.add_extractor(LBPExtractor(radius=2, n_points=64))
strategy.add_extractor(LBPExtractor(radius=3, n_points=64))

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation',]))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random, batch_size=batch_size)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train
INFO:utils.ml:Preprocessing steps


In [3]:
pipeline.feature_matrix = None
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/106 batches.
Processed 10/106 batches.
Processed 15/106 batches.
Processed 20/106 batches.
Processed 25/106 batches.
Processed 30/106 batches.
Processed 35/106 batches.
Processed 40/106 batches.
Processed 45/106 batches.
Processed 50/106 batches.
Processed 55/106 batches.
Processed 60/106 batches.
Processed 65/106 batches.
Processed 70/106 batches.
Processed 75/106 batches.
Processed 80/106 batches.
Processed 85/106 batches.
Processed 90/106 batches.
Processed 95/106 batches.
Processed 100/106 batches.
Processed 105/106 batches.


INFO:utils.ml:Feature extraction completed. Extracted 5082 features.


Processed 106/106 batches.


In [21]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

rf1 = RandomForestClassifier(n_estimators=100)
rf2 = RandomForestClassifier(n_estimators=150)
# Random Forest with Out-of-Bag Error (OOB)
rf3 = RandomForestClassifier(n_estimators=100, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf4 = RandomForestClassifier(n_estimators=100, bootstrap=False, random_state=42)
# Random Forest with Out-of-Bag Error (OOB)
rf5 = RandomForestClassifier(n_estimators=150, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf6 = RandomForestClassifier(n_estimators=150, bootstrap=False, random_state=42)
# Random Forest with Out-of-Bag Error (OOB)
rf7 = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf8 = RandomForestClassifier(n_estimators=200, bootstrap=False, random_state=42)


xgb1 = XGBClassifier(n_estimators=350)
xgb2 = XGBClassifier(n_estimators=450)
xgb3 = XGBClassifier(n_estimators=550)
xgb4 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=3, min_child_weight=4, subsample=0.8, colsample_bytree=0.8)
xgb5 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7) # 2ND
xgb6 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb7 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb8 = XGBClassifier(learning_rate=0.05, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb9 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb10 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb11 = XGBClassifier(learning_rate=0.1, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)


pipeline.classifiers = [rf1, rf2, rf3, rf4, rf5, rf6, rf7, rf8,
                        # svm1, svm2, svm3, svm6, svm7, svm8,
                        xgb1, xgb2, xgb3, xgb4, xgb5, xgb6, xgb7, xgb8, xgb9, xgb10, xgb11]
pipeline.fitted_classifiers = {}

In [22]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 10.390198945999146 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 27.617349863052368 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 30.24943518638611 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 43.9967999458313 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 25.629243850708008 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 23.045916318893433 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier6


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier6; Done in 21.709500312805176 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier7


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier7; Done in 29.68074083328247 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 29.95575761795044 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 15.264234781265259 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier10; Done in 17.42990732192993 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier11


list index out of range


INFO:utils.ml:Top 10 features for XGBClassifier11: [('color_moments_hsv_channel_0_mean', 0.017054819), ('lbp_rad2_bins64_29', 0.013586495), ('lbp_rad2_bins32_16', 0.011955246), ('lbp_rad2_bins32_15', 0.011581601), ('lbp_rad1_bins32_22', 0.010779704), ('lbp_rad1_bins64_63', 0.010063744), ('color_moments_lab_channel_0_skew', 0.0099976985), ('color_moments_lab_channel_1_std', 0.009294928), ('lbp_rad2_bins64_41', 0.009251839), ('lbp_rad1_bins64_26', 0.008946392)]
INFO:utils.ml:Fitted classifier: XGBClassifier11; Done in 6.199901819229126 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier12
INFO:utils.ml:Top 10 features for XGBClassifier12: [('color_moments_hsv_channel_0_mean', 0.016213028), ('lbp_rad1_bins64_63', 0.010902622), ('var_lab_channel_0', 0.00989256), ('lbp_rad1_bins64_26', 0.009672324), ('lbp_rad2_bins32_15', 0.009448707), ('lbp_rad2_bins32_0', 0.008388236), ('lbp_rad1_bins64_31', 0.007700602), ('color_moments_lab_channel_0_skew', 0.0074120266), ('color_moments_lab_channel_

In [23]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier8': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier11': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier12': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier13': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier14': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier15': array([2

In [24]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics())
df = df.T
df

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.7968503937007874, 'precision': 0.6753896293217174, 'recall': 0.6753173492312311, 'f1': 0.6742957811080837, 'kappa': 0.6372312393436816}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7874015748031497, 'precision': 0.6690320976131559, 'recall': 0.6688845383416551, 'f1': 0.6674821144913423, 'kappa': 0.6211133455614242}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.794488188976378, 'precision': 0.6743255744321446, 'recall': 0.6729539131861659, 'f1': 0.6728574055441944, 'kappa': 0.6321887802431436}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.7976377952755905, 'precision': 0.6774553571428572, 'recall': 0.6578376519950384, 'f1': 0.6640093613789434, 'kappa': 0.6347143880383204}
INFO:utils.ml:Metrics for classi

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.79685,0.67539,0.675317,0.674296,0.637231
RandomForestClassifier1,0.787402,0.669032,0.668885,0.667482,0.621113
RandomForestClassifier2,0.794488,0.674326,0.672954,0.672857,0.632189
RandomForestClassifier3,0.797638,0.677455,0.657838,0.664009,0.634714
RandomForestClassifier4,0.790551,0.669752,0.676249,0.672199,0.627168
RandomForestClassifier5,0.797638,0.676203,0.654961,0.661401,0.634457
RandomForestClassifier6,0.785039,0.663855,0.665988,0.664182,0.616273
RandomForestClassifier7,0.802362,0.684159,0.658266,0.666185,0.642145
XGBClassifier8,0.829921,0.705985,0.697566,0.701043,0.693433


In [26]:
df

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.79685,0.67539,0.675317,0.674296,0.637231
RandomForestClassifier1,0.787402,0.669032,0.668885,0.667482,0.621113
RandomForestClassifier2,0.794488,0.674326,0.672954,0.672857,0.632189
RandomForestClassifier3,0.797638,0.677455,0.657838,0.664009,0.634714
RandomForestClassifier4,0.790551,0.669752,0.676249,0.672199,0.627168
RandomForestClassifier5,0.797638,0.676203,0.654961,0.661401,0.634457
RandomForestClassifier6,0.785039,0.663855,0.665988,0.664182,0.616273
RandomForestClassifier7,0.802362,0.684159,0.658266,0.666185,0.642145
XGBClassifier8,0.829921,0.705985,0.697566,0.701043,0.693433


___
Balance the data

In [9]:
# Backup
pipeline.backup_feature_matrix = pipeline.feature_matrix.copy()
pipeline.backup_labels = pipeline.labels.copy()

In [10]:
print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(5082, 354)
(5082, 354)
(5082,)
(5082,)


In [11]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
pipeline.feature_matrix, pipeline.labels = smote.fit_resample(pipeline.feature_matrix, pipeline.labels)

In [12]:
pipeline.fitted_classifiers = {}
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 9.74233078956604 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 16.17583155632019 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 14.872129917144775 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 27.7579026222229 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 27.495516538619995 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 41.82298231124878 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier6


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier6; Done in 37.18102526664734 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier7


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier7; Done in 47.74561905860901 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 12.20540714263916 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


list index out of range


INFO:utils.ml:Top 10 features for XGBClassifier9: [('color_moments_hsv_channel_0_mean', 0.017733097), ('var_lab_channel_0', 0.015833523), ('lbp_rad2_bins32_0', 0.014874804), ('lbp_rad2_bins64_29', 0.013070298), ('lbp_rad1_bins64_58', 0.012508532), ('lbp_rad1_bins64_31', 0.010982232), ('color_moments_lab_channel_0_skew', 0.010319066), ('lbp_rad3_bins64_20', 0.009157334), ('lbp_rad2_bins64_33', 0.009129643), ('lbp_rad1_bins64_63', 0.00910205)]
INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 14.587161540985107 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10
INFO:utils.ml:Top 10 features for XGBClassifier10: [('color_moments_hsv_channel_0_mean', 0.017430328), ('var_lab_channel_0', 0.015962644), ('lbp_rad2_bins32_0', 0.015815852), ('lbp_rad2_bins64_29', 0.01350032), ('lbp_rad1_bins64_58', 0.0123061575), ('lbp_rad1_bins64_31', 0.010751022), ('color_moments_lab_channel_0_skew', 0.010404027), ('lbp_rad2_bins64_33', 0.0095666675), ('lbp_rad1_bins64_63', 0.009238388), ('lbp_ra

In [13]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([2, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier8': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier12': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier14': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier15': array([2

___
Borderline SMOTE

In [14]:
from imblearn.over_sampling import BorderlineSMOTE
smote = BorderlineSMOTE(sampling_strategy='auto', random_state=42)

pipeline.feature_matrix, pipeline.labels = smote.fit_resample(
    pipeline.backup_feature_matrix, pipeline.backup_labels)

print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

pipeline.fitted_classifiers = {}

(5082, 354)
(8139, 354)
(5082,)
(8139,)


In [15]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 18.79904079437256 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 26.141379594802856 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 19.60924983024597 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 28.389144897460938 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 29.00088882446289 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 34.15178990364075 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier6


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier6; Done in 21.0226411819458 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier7


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier7; Done in 30.358280658721924 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 12.190419912338257 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 15.612688541412354 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier10; Done in 18.50666093826294 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier11


list index out of range


INFO:utils.ml:Top 10 features for XGBClassifier11: [('color_moments_hsv_channel_0_mean', 0.017054819), ('lbp_rad2_bins64_29', 0.013586495), ('lbp_rad2_bins32_16', 0.011955246), ('lbp_rad2_bins32_15', 0.011581601), ('lbp_rad1_bins32_22', 0.010779704), ('lbp_rad1_bins64_63', 0.010063744), ('color_moments_lab_channel_0_skew', 0.0099976985), ('color_moments_lab_channel_1_std', 0.009294928), ('lbp_rad2_bins64_41', 0.009251839), ('lbp_rad1_bins64_26', 0.008946392)]
INFO:utils.ml:Fitted classifier: XGBClassifier11; Done in 6.893215894699097 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier12
INFO:utils.ml:Top 10 features for XGBClassifier12: [('color_moments_hsv_channel_0_mean', 0.016213028), ('lbp_rad1_bins64_63', 0.010902622), ('var_lab_channel_0', 0.00989256), ('lbp_rad1_bins64_26', 0.009672324), ('lbp_rad2_bins32_15', 0.009448707), ('lbp_rad2_bins32_0', 0.008388236), ('lbp_rad1_bins64_31', 0.007700602), ('color_moments_lab_channel_0_skew', 0.0074120266), ('color_moments_lab_channel_

In [16]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassif

{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier8': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier11': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier12': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier13': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier14': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier15': array([2

In [17]:
df_borderline_smote = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df_borderline_smote = df_borderline_smote.T
df_borderline_smote

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.7937007874015748, 'precision': 0.6791109826867973, 'recall': 0.6783934835557632, 'f1': 0.6778509520595518, 'kappa': 0.6313130193905817}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7929133858267716, 'precision': 0.6727668510113314, 'recall': 0.6674610322290858, 'f1': 0.6680346939461946, 'kappa': 0.6300927176961004}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.794488188976378, 'precision': 0.6743255744321446, 'recall': 0.6729539131861659, 'f1': 0.6728574055441944, 'kappa': 0.6321887802431436}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.7976377952755905, 'precision': 0.6774553571428572, 'recall': 0.6578376519950384, 'f1': 0.6640093613789434, 'kappa': 0.6347143880383204}
INFO:utils.ml:Metrics for classi

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value


INFO:utils.ml:Metrics for classifier XGBClassifier11: {'accuracy': 0.7771653543307087, 'precision': 0.6521047914459501, 'recall': 0.6801426505620801, 'f1': 0.6611362019354973, 'kappa': 0.6109169719486953}
INFO:utils.ml:Metrics for classifier XGBClassifier12: {'accuracy': 0.8354330708661417, 'precision': 0.7437161094326767, 'recall': 0.7161017130720837, 'f1': 0.7271585414677966, 'kappa': 0.701600428099902}
INFO:utils.ml:Metrics for classifier XGBClassifier13: {'accuracy': 0.8228346456692913, 'precision': 0.7071829652939098, 'recall': 0.6920746438966184, 'f1': 0.6982685234223954, 'kappa': 0.6796136751983424}


cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value


INFO:utils.ml:Metrics for classifier XGBClassifier14: {'accuracy': 0.8212598425196851, 'precision': 0.7086295119983866, 'recall': 0.6902028499361156, 'f1': 0.6977487291212782, 'kappa': 0.6761456058522732}
INFO:utils.ml:Metrics for classifier XGBClassifier15: {'accuracy': 0.8322834645669291, 'precision': 0.7322278168008207, 'recall': 0.719533251313167, 'f1': 0.7252085874313358, 'kappa': 0.6970202680907494}
INFO:utils.ml:Metrics for classifier XGBClassifier16: {'accuracy': 0.8299212598425196, 'precision': 0.7213470390286237, 'recall': 0.7150038678532621, 'f1': 0.7178345762004555, 'kappa': 0.6937394775976099}
INFO:utils.ml:Metrics for classifier XGBClassifier17: {'accuracy': 0.8338582677165355, 'precision': 0.7307948004688676, 'recall': 0.7057773553260301, 'f1': 0.7158224831151574, 'kappa': 0.6983787166603634}
INFO:utils.ml:Metrics for classifier XGBClassifier18: {'accuracy': 0.8322834645669291, 'precision': 0.7250721075546586, 'recall': 0.6988628587027454, 'f1': 0.7090501310104288, 'kapp

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value


Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.793701,0.679111,0.678393,0.677851,0.631313
RandomForestClassifier1,0.792913,0.672767,0.667461,0.668035,0.630093
RandomForestClassifier2,0.794488,0.674326,0.672954,0.672857,0.632189
RandomForestClassifier3,0.797638,0.677455,0.657838,0.664009,0.634714
RandomForestClassifier4,0.790551,0.669752,0.676249,0.672199,0.627168
RandomForestClassifier5,0.797638,0.676203,0.654961,0.661401,0.634457
RandomForestClassifier6,0.785039,0.663855,0.665988,0.664182,0.616273
RandomForestClassifier7,0.802362,0.684159,0.658266,0.666185,0.642145
XGBClassifier8,0.829921,0.705985,0.697566,0.701043,0.693433


In [18]:
confusion_matrix(pipeline.predictions["GT"], pipeline.predictions["XGBClassifier14"])

array([[414,  55,  29],
       [ 70, 595,  13],
       [ 38,  22,  34]], dtype=int64)