In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix, recall_score, cohen_kappa_score

from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train"

In [2]:
percent = 100
random = False
batch_size = 48

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.gaussian_smoothing(3)
factory.clahe(clip_limit=1.5)
factory.pad2square(fill=np.nan)
factory.resize((150,150))
factory.hair_removal()
factory.normalize2float()

factory_loader = FactoryLoader(path=VAL_PATH, batch_size=batch_size, factory=factory, percentage=percent, shuffle=random)

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(GradientExtractor())   # Add variance feature

# strategy.add_extractor(MeanExtractor())  # Add mean feature
# strategy.add_extractor(StdExtractor())   # Add standard deviation feature
strategy.add_extractor(VarExtractor())   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor())   # Add color moments feature


# strategy.add_extractor(MeanExtractor("lab"))  # Add mean feature
# strategy.add_extractor(StdExtractor("lab"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("lab"))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("lab"))   # Add color moments feature

# strategy.add_extractor(MeanExtractor("hsv"))  # Add mean feature
# strategy.add_extractor(StdExtractor("hsv"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("hsv"))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("hsv"))   # Add color moments feature

strategy.add_extractor(LBPExtractor(radius=1, n_points=16))
strategy.add_extractor(LBPExtractor(radius=2, n_points=16))
strategy.add_extractor(LBPExtractor(radius=3, n_points=16))
strategy.add_extractor(LBPExtractor(radius=1, n_points=32))
strategy.add_extractor(LBPExtractor(radius=2, n_points=32))
strategy.add_extractor(LBPExtractor(radius=3, n_points=32))
strategy.add_extractor(LBPExtractor(radius=1, n_points=64))
strategy.add_extractor(LBPExtractor(radius=2, n_points=64))
strategy.add_extractor(LBPExtractor(radius=3, n_points=64))

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation',]))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random, batch_size=batch_size)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train
INFO:utils.ml:Preprocessing steps


In [3]:
pipeline.feature_matrix = None
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/106 batches.
Processed 10/106 batches.
Processed 15/106 batches.
Processed 20/106 batches.
Processed 25/106 batches.
Processed 30/106 batches.
Processed 35/106 batches.
Processed 40/106 batches.
Processed 45/106 batches.
Processed 50/106 batches.
Processed 55/106 batches.
Processed 60/106 batches.
Processed 65/106 batches.
Processed 70/106 batches.
Processed 75/106 batches.
Processed 80/106 batches.
Processed 85/106 batches.
Processed 90/106 batches.
Processed 95/106 batches.
Processed 100/106 batches.
Processed 105/106 batches.


INFO:utils.ml:Feature extraction completed. Extracted 5082 features.


Processed 106/106 batches.


In [4]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

rf1 = RandomForestClassifier(n_estimators=100)
rf2 = RandomForestClassifier(n_estimators=150)
rf3 = RandomForestClassifier(n_estimators=250)

# Random Forest with Class Weight Balancing
rf4 = RandomForestClassifier(n_estimators=100, class_weight="balanced", random_state=42)
# Random Forest with Feature Selection via max_features
rf5 = RandomForestClassifier(n_estimators=100, max_features='sqrt', random_state=42)
# Random Forest with Min Samples per Leaf
rf6 = RandomForestClassifier(n_estimators=100, min_samples_leaf=5, random_state=42)
# Random Forest with Out-of-Bag Error (OOB)
rf7 = RandomForestClassifier(n_estimators=100, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf8 = RandomForestClassifier(n_estimators=100, bootstrap=False, random_state=42)
# Random Forest with High Number of Trees and Limited Depth
rf9 = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)

xgb1 = XGBClassifier(n_estimators=350)
xgb2 = XGBClassifier(n_estimators=450)
xgb3 = XGBClassifier(n_estimators=550)
xgb4 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=3, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb5 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb6 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8)
xgb7 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb8 = XGBClassifier(learning_rate=0.05, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb9 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb10 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb11 = XGBClassifier(learning_rate=0.1, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)


pipeline.classifiers = [rf1, rf2, rf3, rf4, rf5, rf6, rf7, rf8, rf9,
                        # svm1, svm2, svm3, svm6, svm7, svm8,
                        xgb1, xgb2, xgb3, xgb4, xgb5, xgb6, xgb7, xgb8, xgb9, xgb10, xgb11]
pipeline.fitted_classifiers = {}

In [5]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Top 10 features for RandomForestClassifier0: [('color_moments_hsv_channel_0_mean', 0.029587400504046887), ('color_moments_lab_channel_0_skew', 0.02591669611055424), ('var_rgb_channel_2', 0.017263259389507683), ('var_lab_channel_0', 0.016797216260174656), ('color_moments_rgb_channel_1_mean', 0.011146676213786112), ('gradient_magnitude_std', 0.010253598383538523), ('var_rgb_channel_0', 0.00945869122532415), ('color_moments_lab_channel_1_std', 0.009171744156913707), ('color_moments_rgb_channel_0_std', 0.009099427626501172), ('color_moments_rgb_channel_1_skew', 0.008610169997555343)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 6.634038925170898 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Top 10 features for RandomForestClassifier1: [('color_moments_hsv_channel_0_mean', 0.02936956914735586), ('color_moments_lab_channel_0_skew'

In [6]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18
INFO:utils.ml:Predictions made with classifier: XGBClassifier19


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier8': array([0, 0, 1, ..., 1, 0, 0]),
 'XGBClassifier9': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier14': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier15': array([0, 0,

In [7]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics())
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.7850393700787401, 'precision': 0.7418266345462997, 'recall': 0.568067663864521, 'f1': 0.553180435923755, 'kappa': 0.5933459380336668}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7960629921259843, 'precision': 0.8602862841129989, 'recall': 0.5736732154249332, 'f1': 0.5546106606294477, 'kappa': 0.6139971647984377}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.7968503937007874, 'precision': 0.8607524845791993, 'recall': 0.5741648575094958, 'f1': 0.5550892440226041, 'kappa': 0.6154875232355095}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.7913385826771654, 'precision': 0.6502032686553941, 'recall': 0.5754106616637894, 'f1': 0.563811999822456, 'kappa': 0.6064442344752745}
INFO:utils.ml:Metrics for classifi

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.785039,0.741827,0.568068,0.55318,0.593346
RandomForestClassifier1,0.796063,0.860286,0.573673,0.554611,0.613997
RandomForestClassifier2,0.79685,0.860752,0.574165,0.555089,0.615488
RandomForestClassifier3,0.791339,0.650203,0.575411,0.563812,0.606444
RandomForestClassifier4,0.791339,0.690767,0.569124,0.550985,0.604323
RandomForestClassifier5,0.784252,0.518975,0.561112,0.539203,0.590583
RandomForestClassifier6,0.791339,0.690767,0.569124,0.550985,0.604323
RandomForestClassifier7,0.809449,0.813621,0.594604,0.588901,0.639883
RandomForestClassifier8,0.751181,0.497115,0.532644,0.5137,0.522976


___
Balance the data

In [9]:
# Backup
pipeline.backup_feature_matrix = pipeline.feature_matrix.copy()
pipeline.backup_labels = pipeline.labels.copy()

In [10]:
print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(5082, 408)
(5082, 408)
(5082,)
(5082,)


In [11]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
pipeline.feature_matrix, pipeline.labels = smote.fit_resample(pipeline.feature_matrix, pipeline.labels)

In [12]:
pipeline.fitted_classifiers = {}
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Top 10 features for RandomForestClassifier0: [('color_moments_hsv_channel_0_mean', 0.026008558530876254), ('color_moments_lab_channel_0_skew', 0.017623271470553665), ('var_lab_channel_0', 0.011123282387172664), ('var_rgb_channel_2', 0.010996243209384441), ('gradient_magnitude_std', 0.010303261024935373), ('var_rgb_channel_1', 0.009393865760132892), ('color_moments_lab_channel_1_std', 0.008390497843031395), ('color_moments_rgb_channel_1_skew', 0.008025859659890075), ('color_moments_rgb_channel_2_std', 0.0077374636018902295), ('var_lab_channel_2', 0.007260742943628485)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 11.069393634796143 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 19.57318377494812 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 31.78447914123535 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


list index out of range


INFO:utils.ml:Top 10 features for RandomForestClassifier3: [('color_moments_hsv_channel_0_mean', 0.024155399169778127), ('color_moments_lab_channel_0_skew', 0.02025170583166333), ('var_rgb_channel_2', 0.013040587890718859), ('var_lab_channel_0', 0.011464254001536677), ('gradient_magnitude_std', 0.009401918247800605), ('color_moments_rgb_channel_1_skew', 0.008086593058318756), ('color_moments_lab_channel_2_mean', 0.007834743733502418), ('color_moments_rgb_channel_1_mean', 0.007746993745761491), ('var_lab_channel_2', 0.007509421207016721), ('color_moments_lab_channel_1_std', 0.00750908722864548)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 11.094903945922852 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4
INFO:utils.ml:Top 10 features for RandomForestClassifier4: [('color_moments_hsv_channel_0_mean', 0.024155399169778127), ('color_moments_lab_channel_0_skew', 0.02025170583166333), ('var_rgb_channel_2', 0.013040587890718859), ('var_lab_channel_0', 0

list index out of range


INFO:utils.ml:Top 10 features for RandomForestClassifier8: [('color_moments_hsv_channel_0_mean', 0.06029093704211354), ('color_moments_lab_channel_0_skew', 0.04834247433876437), ('var_rgb_channel_2', 0.02990822773048195), ('var_lab_channel_0', 0.02383348012541294), ('lbp_rad1_bins64_58', 0.018810851859491218), ('var_lab_channel_2', 0.018680436134708142), ('gradient_magnitude_std', 0.01849518426646217), ('var_rgb_channel_1', 0.015099386821374565), ('color_moments_rgb_channel_1_skew', 0.014410524878269916), ('color_moments_lab_channel_1_std', 0.013566729316456232)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier8; Done in 4.2423272132873535 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9
INFO:utils.ml:Top 10 features for XGBClassifier9: [('var_lab_channel_0', 0.015200589), ('color_moments_hsv_channel_0_mean', 0.014973395), ('lbp_rad1_bins16_15', 0.014307985), ('lbp_rad2_bins16_4', 0.0135518275), ('lbp_rad1_bins16_16', 0.011419139), ('lbp_rad3_bins32_14', 0.010721969), ('

In [13]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18
INFO:utils.ml:Predictions made with classifier: XGBClassifier19


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier8': array([0, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier12': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier14': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier15': array([0, 0,

In [14]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.7889763779527559, 'precision': 0.6722362722594258, 'recall': 0.6793865980459843, 'f1': 0.6743538792149902, 'kappa': 0.6255550225969557}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7921259842519685, 'precision': 0.6777102532653937, 'recall': 0.6838745177156249, 'f1': 0.6797277472567019, 'kappa': 0.6303205916987523}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.7929133858267716, 'precision': 0.6806240272172474, 'recall': 0.6825556163018988, 'f1': 0.6801041156742126, 'kappa': 0.6314120248778399}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.7858267716535433, 'precision': 0.6701126693036078, 'recall': 0.6853394876184585, 'f1': 0.6759714803498548, 'kappa': 0.6215733015494638}
INFO:utils.ml:Metrics for class

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.788976,0.672236,0.679387,0.674354,0.625555
RandomForestClassifier1,0.792126,0.67771,0.683875,0.679728,0.630321
RandomForestClassifier2,0.792913,0.680624,0.682556,0.680104,0.631412
RandomForestClassifier3,0.785827,0.670113,0.685339,0.675971,0.621573
RandomForestClassifier4,0.785827,0.670113,0.685339,0.675971,0.621573
RandomForestClassifier5,0.775591,0.655982,0.674827,0.663281,0.604478
RandomForestClassifier6,0.785827,0.670113,0.685339,0.675971,0.621573
RandomForestClassifier7,0.798425,0.681927,0.660851,0.668063,0.635328
RandomForestClassifier8,0.662205,0.568322,0.582107,0.554503,0.441287


___
Borderline SMOTE

In [15]:
from imblearn.over_sampling import BorderlineSMOTE
smote = BorderlineSMOTE(sampling_strategy='auto', random_state=42)

pipeline.feature_matrix, pipeline.labels = smote.fit_resample(
    pipeline.backup_feature_matrix, pipeline.backup_labels)

print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

pipeline.fitted_classifiers = {}

(5082, 408)
(8139, 408)
(5082,)
(8139,)


In [16]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 11.071761846542358 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 16.740445852279663 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


list index out of range


INFO:utils.ml:Top 10 features for RandomForestClassifier2: [('color_moments_lab_channel_0_skew', 0.019820747437701507), ('color_moments_hsv_channel_0_mean', 0.017707205919324016), ('var_lab_channel_0', 0.012777465256529943), ('var_rgb_channel_2', 0.01134323524320351), ('color_moments_hsv_channel_2_mean', 0.008869025967237399), ('var_rgb_channel_1', 0.00879990342702696), ('color_moments_lab_channel_1_std', 0.008578474439866984), ('color_moments_rgb_channel_1_skew', 0.008531379424008972), ('gradient_magnitude_std', 0.008290083907492335), ('var_lab_channel_2', 0.007892913773635008)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 28.788798332214355 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3
INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 11.504178524017334 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 11.624989748001099 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 9.622249126434326 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier6


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier6; Done in 11.733146667480469 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier7


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier7; Done in 17.101027488708496 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier8


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier8; Done in 4.09400486946106 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


list index out of range


INFO:utils.ml:Top 10 features for XGBClassifier9: [('lbp_rad2_bins16_12', 0.027490644), ('color_moments_hsv_channel_0_mean', 0.014926342), ('lbp_rad2_bins16_4', 0.0141115915), ('lbp_rad1_bins32_2', 0.013780878), ('var_lab_channel_0', 0.012862133), ('lbp_rad1_bins16_15', 0.011498478), ('lbp_rad1_bins64_9', 0.009550598), ('lbp_rad1_bins64_58', 0.00904974), ('color_moments_lab_channel_1_std', 0.009013804), ('color_moments_lab_channel_0_skew', 0.008985067)]
INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 13.546777248382568 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10
INFO:utils.ml:Top 10 features for XGBClassifier10: [('lbp_rad2_bins16_12', 0.026554331), ('lbp_rad1_bins32_2', 0.014974523), ('color_moments_hsv_channel_0_mean', 0.014326155), ('lbp_rad2_bins16_4', 0.014136703), ('var_lab_channel_0', 0.0133110285), ('lbp_rad1_bins16_15', 0.0121044805), ('lbp_rad1_bins64_9', 0.009767645), ('color_moments_lab_channel_1_std', 0.009142162), ('color_moments_lab_channel_0_skew'

In [17]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18
INFO:utils.ml:Predictions made with classifier: XGBClassifier19


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier8': array([2, 0, 2, ..., 1, 0, 2]),
 'XGBClassifier9': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier12': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier14': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier15': array([0, 0,

In [18]:
df = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.778740157480315, 'precision': 0.6666178939249581, 'recall': 0.6564790512076549, 'f1': 0.6595704345375312, 'kappa': 0.6030367074527252}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7858267716535433, 'precision': 0.674801338255859, 'recall': 0.6670127443811523, 'f1': 0.6693817306264792, 'kappa': 0.6161146499646608}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.7905511811023622, 'precision': 0.6848449875266764, 'recall': 0.6708511066799049, 'f1': 0.6755753052859143, 'kappa': 0.6236626220406126}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.7732283464566929, 'precision': 0.6525413496713812, 'recall': 0.6440518869553403, 'f1': 0.6457977870145115, 'kappa': 0.5942041848802894}
INFO:utils.ml:Metrics for classif

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.77874,0.666618,0.656479,0.65957,0.603037
RandomForestClassifier1,0.785827,0.674801,0.667013,0.669382,0.616115
RandomForestClassifier2,0.790551,0.684845,0.670851,0.675575,0.623663
RandomForestClassifier3,0.773228,0.652541,0.644052,0.645798,0.594204
RandomForestClassifier4,0.773228,0.652541,0.644052,0.645798,0.594204
RandomForestClassifier5,0.769291,0.655276,0.66443,0.658571,0.590847
RandomForestClassifier6,0.773228,0.652541,0.644052,0.645798,0.594204
RandomForestClassifier7,0.808661,0.7058,0.678182,0.687332,0.654231
RandomForestClassifier8,0.655906,0.568903,0.590958,0.555055,0.432277


In [22]:
pipeline.fitted_classifiers["XGBClassifier19"]

confusion_matrix(pipeline.predictions["GT"], pipeline.predictions["XGBClassifier9"])

array([[424,  52,  22],
       [ 67, 600,  11],
       [ 38,  21,  35]], dtype=int64)

___
SMOTETomek

In [21]:
from imblearn.combine import SMOTETomek

smote_tomek = SMOTETomek(sampling_strategy='auto', random_state=42)


pipeline.feature_matrix, pipeline.labels = smote_tomek.fit_resample(
    pipeline.backup_feature_matrix, pipeline.backup_labels)

print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

pipeline.fitted_classifiers = {}

(5082, 408)
(7313, 408)
(5082,)
(7313,)


In [22]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Top 10 features for RandomForestClassifier0: [('color_moments_lab_channel_0_skew', 0.02348399667068677), ('color_moments_hsv_channel_0_mean', 0.021685100888737506), ('var_rgb_channel_2', 0.013466685511191803), ('var_lab_channel_0', 0.011273139645395643), ('gradient_magnitude_std', 0.009455156180856468), ('var_lab_channel_2', 0.009016029113925229), ('color_moments_lab_channel_1_std', 0.009012604432114048), ('color_moments_rgb_channel_1_skew', 0.00830917831133495), ('var_rgb_channel_1', 0.007748303523353629), ('color_moments_lab_channel_0_mean', 0.007688567101377091)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 10.850361585617065 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Top 10 features for RandomForestClassifier1: [('color_moments_hsv_channel_0_mean', 0.023212922020289208), ('color_moments_lab_channel_0_skew', 0.01936895

list index out of range


INFO:utils.ml:Top 10 features for XGBClassifier3: [('lbp_rad2_bins16_4', 0.019037437), ('color_moments_hsv_channel_0_mean', 0.016402645), ('lbp_rad2_bins32_0', 0.014515458), ('lbp_rad1_bins64_26', 0.013283062), ('var_lab_channel_0', 0.01299207), ('lbp_rad3_bins64_10', 0.0103029655), ('lbp_rad1_bins32_5', 0.0092928), ('lbp_rad2_bins16_13', 0.009221017), ('lbp_rad2_bins32_23', 0.0089618955), ('lbp_rad1_bins64_31', 0.008737473)]
INFO:utils.ml:Fitted classifier: XGBClassifier3; Done in 15.687693357467651 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier4
INFO:utils.ml:Top 10 features for XGBClassifier4: [('lbp_rad2_bins16_4', 0.019077742), ('color_moments_hsv_channel_0_mean', 0.0157324), ('lbp_rad2_bins32_0', 0.014432087), ('lbp_rad1_bins64_26', 0.013589403), ('var_lab_channel_0', 0.012795185), ('lbp_rad3_bins64_10', 0.010317609), ('lbp_rad1_bins32_5', 0.010042402), ('lbp_rad2_bins16_13', 0.009111489), ('color_moments_lab_channel_0_skew', 0.009054037), ('lbp_rad1_bins64_31', 0.008760

In [23]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: XGBClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: XGBClassifier11


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier3': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier4': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier5': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier6': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier7': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier8': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier11': array([2, 0, 2, ..., 1, 0, 0], dtype=int64)}

In [25]:
df = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.7858267716535433, 'precision': 0.6635313225942864, 'recall': 0.6759984140415298, 'f1': 0.6686181637152407, 'kappa': 0.6203388638301988}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7803149606299212, 'precision': 0.6574319263895576, 'recall': 0.6749005688227081, 'f1': 0.6641131137164314, 'kappa': 0.6127100785225862}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.784251968503937, 'precision': 0.6587898919380095, 'recall': 0.6683731095851427, 'f1': 0.6627448862263009, 'kappa': 0.6166574130379223}
INFO:utils.ml:Metrics for classifier XGBClassifier3: {'accuracy': 0.8173228346456692, 'precision': 0.6899025243801352, 'recall': 0.68451248034881, 'f1': 0.686822576634054, 'kappa': 0.6709712289666816}
INFO:utils.ml:Metrics for classifier XGBClas

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.785827,0.663531,0.675998,0.668618,0.620339
RandomForestClassifier1,0.780315,0.657432,0.674901,0.664113,0.61271
RandomForestClassifier2,0.784252,0.65879,0.668373,0.662745,0.616657
XGBClassifier3,0.817323,0.689903,0.684512,0.686823,0.670971
XGBClassifier4,0.812598,0.6837,0.681207,0.682223,0.663053
XGBClassifier5,0.814173,0.687121,0.685423,0.686067,0.666102
XGBClassifier6,0.77874,0.637057,0.653636,0.642081,0.612138
XGBClassifier7,0.822047,0.69223,0.685119,0.688173,0.678997
XGBClassifier8,0.807874,0.678574,0.677902,0.678056,0.655008
