In [7]:
import numpy as np
from sklearn.metrics import confusion_matrix, recall_score, cohen_kappa_score

from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train"

In [14]:
percent = 100
random = False
batch_size = 48
th = 0.01

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.gaussian_smoothing(5)
factory.clahe(clip_limit=1.5)
factory.pad2square(fill=np.nan)
factory.resize((150,150))
factory.hair_removal()
factory.normalize2float()

factory_loader = FactoryLoader(path=VAL_PATH, batch_size=batch_size, factory=factory, percentage=percent, shuffle=random)

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(GradientExtractor(threshold=th))   # Add variance feature

# strategy.add_extractor(MeanExtractor())  # Add mean feature
# strategy.add_extractor(StdExtractor())   # Add standard deviation feature
strategy.add_extractor(VarExtractor("rgb", threshold=th))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("rgb", threshold=th))   # Add color moments feature


# strategy.add_extractor(MeanExtractor("lab"))  # Add mean feature
# strategy.add_extractor(StdExtractor("lab"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("lab", threshold=th))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("lab", threshold=th))   # Add color moments feature

# strategy.add_extractor(MeanExtractor("hsv"))  # Add mean feature
# strategy.add_extractor(StdExtractor("hsv"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("hsv", threshold=th))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("hsv", threshold=th))   # Add color moments feature

strategy.add_extractor(LBPExtractor(radius=1, n_points=64))
strategy.add_extractor(LBPExtractor(radius=2, n_points=64))
strategy.add_extractor(LBPExtractor(radius=3, n_points=64))

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation',]))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random, batch_size=batch_size)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train
INFO:utils.ml:Preprocessing steps


In [15]:
pipeline.feature_matrix = None
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/106 batches.
Processed 10/106 batches.
Processed 15/106 batches.
Processed 20/106 batches.
Processed 25/106 batches.
Processed 30/106 batches.
Processed 35/106 batches.
Processed 40/106 batches.
Processed 45/106 batches.
Processed 50/106 batches.
Processed 55/106 batches.
Processed 60/106 batches.
Processed 65/106 batches.
Processed 70/106 batches.
Processed 75/106 batches.
Processed 80/106 batches.
Processed 85/106 batches.
Processed 90/106 batches.
Processed 95/106 batches.
Processed 100/106 batches.
Processed 105/106 batches.


INFO:utils.ml:Feature extraction completed. Extracted 5082 features.


Processed 106/106 batches.


In [17]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

rf1 = RandomForestClassifier(n_estimators=100)
rf2 = RandomForestClassifier(n_estimators=150)
# Random Forest with Out-of-Bag Error (OOB)
rf3 = RandomForestClassifier(n_estimators=100, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf4 = RandomForestClassifier(n_estimators=100, bootstrap=False, random_state=42)
# Random Forest with Out-of-Bag Error (OOB)
rf5 = RandomForestClassifier(n_estimators=150, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf6 = RandomForestClassifier(n_estimators=150, bootstrap=False, random_state=42)
# Random Forest with Out-of-Bag Error (OOB)
rf7 = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf8 = RandomForestClassifier(n_estimators=200, bootstrap=False, random_state=42)


xgb1 = XGBClassifier(n_estimators=350)
xgb2 = XGBClassifier(n_estimators=450)
xgb3 = XGBClassifier(n_estimators=550)
xgb4 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=3, min_child_weight=4, subsample=0.8, colsample_bytree=0.8)
xgb5 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7) # 2ND
xgb6 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb7 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb8 = XGBClassifier(learning_rate=0.05, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb9 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb10 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb11 = XGBClassifier(learning_rate=0.1, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)


pipeline.classifiers = [rf1, rf2, rf3, rf4, rf5, rf6, rf7, rf8,
                        # svm1, svm2, svm3, svm6, svm7, svm8,
                        xgb1, xgb2, xgb3, xgb4, xgb5, xgb6, xgb7, xgb8, xgb9, xgb10, xgb11]
pipeline.fitted_classifiers = {}

In [18]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Top 10 features for RandomForestClassifier0: [('color_moments_lab_channel_0_skew', 0.0342975155693196), ('color_moments_hsv_channel_0_mean', 0.033545375593476374), ('var_lab_channel_0', 0.015602060472905275), ('var_rgb_channel_2', 0.013664066673240832), ('var_rgb_channel_0', 0.012676896424172114), ('gradient_magnitude_std', 0.01261425410469912), ('var_lab_channel_2', 0.01163617256257109), ('color_moments_rgb_channel_1_mean', 0.010941601988387128), ('color_moments_rgb_channel_0_skew', 0.010772026640488099), ('color_moments_lab_channel_2_mean', 0.00972442609331286)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 3.9151217937469482 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Top 10 features for RandomForestClassifier1: [('color_moments_hsv_channel_0_mean', 0.03548735228358175), ('color_moments_lab_channel_0_skew', 0.03335323267

In [19]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 1, ..., 1, 0, 0]),
 'XGBClassifier8': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier9': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier14': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier15': array([0

In [20]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics())
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8031496062992126, 'precision': 0.8646826601882781, 'recall': 0.5847400144732574, 'f1': 0.5723959382146234, 'kappa': 0.6279988564626264}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7960629921259843, 'precision': 0.6943406155047445, 'recall': 0.5726070036752798, 'f1': 0.5545305857805858, 'kappa': 0.6129543165366043}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.7992125984251969, 'precision': 0.7296168875297315, 'recall': 0.5812155923007917, 'f1': 0.5695056640414822, 'kappa': 0.6202776540112094}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.8110236220472441, 'precision': 0.8283856571252693, 'recall': 0.6013411582689259, 'f1': 0.6016475877441535, 'kappa': 0.6429709316490807}
INFO:utils.ml:Metrics for class

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.80315,0.864683,0.58474,0.572396,0.627999
RandomForestClassifier1,0.796063,0.694341,0.572607,0.554531,0.612954
RandomForestClassifier2,0.799213,0.729617,0.581216,0.569506,0.620278
RandomForestClassifier3,0.811024,0.828386,0.601341,0.601648,0.642971
RandomForestClassifier4,0.796063,0.860185,0.576195,0.561008,0.613772
RandomForestClassifier5,0.815748,0.873033,0.608412,0.611405,0.652117
RandomForestClassifier6,0.8,0.863084,0.576487,0.557452,0.621209
RandomForestClassifier7,0.812598,0.87086,0.606445,0.609295,0.646468
XGBClassifier8,0.838583,0.810838,0.649593,0.666988,0.697852


___
Balance the data

In [32]:
# Backup
pipeline.backup_feature_matrix = pipeline.feature_matrix.copy()
pipeline.backup_labels = pipeline.labels.copy()

In [33]:
print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(8139, 252)
(8139, 252)
(8139,)
(8139,)


In [34]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
pipeline.feature_matrix, pipeline.labels = smote.fit_resample(pipeline.feature_matrix, pipeline.labels)

In [35]:
pipeline.fitted_classifiers = {}
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Top 10 features for RandomForestClassifier0: [('color_moments_hsv_channel_0_mean', 0.02543376392934141), ('color_moments_lab_channel_0_skew', 0.01942977947940268), ('var_lab_channel_0', 0.012549320403435633), ('var_rgb_channel_2', 0.012251819087321918), ('gradient_magnitude_std', 0.011660451009505602), ('color_moments_hsv_channel_2_mean', 0.009261874273097919), ('var_rgb_channel_1', 0.009235080151220666), ('color_moments_lab_channel_0_mean', 0.00917947815955531), ('color_moments_rgb_channel_1_skew', 0.009086394315590095), ('var_lab_channel_1', 0.008908321611231002)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 8.520516872406006 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 12.410950422286987 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 8.394643306732178 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 12.935394763946533 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 12.600427865982056 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 18.30498695373535 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier6


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier6; Done in 23.632499933242798 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier7


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier7; Done in 72.63015031814575 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


list index out of range


INFO:utils.ml:Top 10 features for XGBClassifier8: [('lbp_rad2_bins64_29', 0.03065549), ('var_lab_channel_0', 0.023091719), ('color_moments_hsv_channel_0_mean', 0.02250098), ('lbp_rad1_bins64_63', 0.01604515), ('lbp_rad1_bins64_58', 0.014106697), ('color_moments_lab_channel_1_skew', 0.013196864), ('lbp_rad1_bins64_31', 0.013147571), ('color_moments_hsv_channel_2_mean', 0.011781767), ('color_moments_lab_channel_0_skew', 0.011108495), ('color_moments_lab_channel_1_std', 0.010039027)]
INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 29.020006895065308 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9
INFO:utils.ml:Top 10 features for XGBClassifier9: [('lbp_rad2_bins64_29', 0.029764876), ('var_lab_channel_0', 0.02245215), ('color_moments_hsv_channel_0_mean', 0.021784041), ('lbp_rad1_bins64_63', 0.016487056), ('lbp_rad1_bins64_58', 0.014526003), ('color_moments_lab_channel_1_skew', 0.013663395), ('lbp_rad1_bins64_31', 0.012834529), ('color_moments_hsv_channel_2_mean', 0.011563

In [36]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier8': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier12': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier14': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier15': array([2

In [37]:
pipeline.predict_with_classifiers(VAL_PATH)
import pandas as pd

df_smote = pd.DataFrame(data=pipeline.calculate_metrics())
df_smote = df_smote.T
df_smote

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18
INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'reca

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.786614,0.662426,0.664983,0.662622,0.619766
RandomForestClassifier1,0.79685,0.682785,0.678372,0.679187,0.636654
RandomForestClassifier2,0.799213,0.6936,0.686133,0.688365,0.640341
RandomForestClassifier3,0.810236,0.691184,0.667659,0.674971,0.656714
RandomForestClassifier4,0.799213,0.688686,0.685956,0.686167,0.641057
RandomForestClassifier5,0.811811,0.703783,0.677805,0.686315,0.659785
RandomForestClassifier6,0.800787,0.69257,0.68424,0.686748,0.643073
RandomForestClassifier7,0.819685,0.719698,0.686842,0.697324,0.673775
XGBClassifier8,0.829921,0.723607,0.699732,0.709228,0.691419


In [41]:
confusion_matrix(pipeline.predictions["GT"],pipeline.predictions["XGBClassifier15"])

array([[430,  47,  21],
       [ 60, 602,  16],
       [ 42,  19,  33]], dtype=int64)

___
Borderline SMOTE

In [26]:
from imblearn.over_sampling import BorderlineSMOTE
smote = BorderlineSMOTE(sampling_strategy='auto', random_state=42)

pipeline.feature_matrix, pipeline.labels = smote.fit_resample(
    pipeline.backup_feature_matrix, pipeline.backup_labels)

print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

pipeline.fitted_classifiers = {}

(5082, 252)
(8139, 252)
(5082,)
(8139,)


In [27]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 8.159039974212646 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 12.358706712722778 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 8.432939291000366 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 12.227961778640747 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 12.878711700439453 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 19.10480523109436 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier6


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier6; Done in 17.398176431655884 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier7


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier7; Done in 29.90160584449768 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


list index out of range


INFO:utils.ml:Top 10 features for XGBClassifier8: [('lbp_rad2_bins64_29', 0.03065549), ('var_lab_channel_0', 0.023091719), ('color_moments_hsv_channel_0_mean', 0.02250098), ('lbp_rad1_bins64_63', 0.01604515), ('lbp_rad1_bins64_58', 0.014106697), ('color_moments_lab_channel_1_skew', 0.013196864), ('lbp_rad1_bins64_31', 0.013147571), ('color_moments_hsv_channel_2_mean', 0.011781767), ('color_moments_lab_channel_0_skew', 0.011108495), ('color_moments_lab_channel_1_std', 0.010039027)]
INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 13.529041767120361 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9
INFO:utils.ml:Top 10 features for XGBClassifier9: [('lbp_rad2_bins64_29', 0.029764876), ('var_lab_channel_0', 0.02245215), ('color_moments_hsv_channel_0_mean', 0.021784041), ('lbp_rad1_bins64_63', 0.016487056), ('lbp_rad1_bins64_58', 0.014526003), ('color_moments_lab_channel_1_skew', 0.013663395), ('lbp_rad1_bins64_31', 0.012834529), ('color_moments_hsv_channel_2_mean', 0.011563

In [28]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier8': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier12': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier14': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier15': array([2

In [29]:
df_borderline_smote = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df_borderline_smote = df_borderline_smote.T
df_borderline_smote

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.7866141732283465, 'precision': 0.6611891561436692, 'recall': 0.6588741207218883, 'f1': 0.6588421825644163, 'kappa': 0.6183640339399531}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7968503937007874, 'precision': 0.681759451032252, 'recall': 0.6868243702229996, 'f1': 0.6833378203214346, 'kappa': 0.6383459675674721}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.7992125984251969, 'precision': 0.693600276913613, 'recall': 0.6861333490618472, 'f1': 0.6883651829363359, 'kappa': 0.6403409015188197}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.8102362204724409, 'precision': 0.6911836886634285, 'recall': 0.6676586468890688, 'f1': 0.6749709746451851, 'kappa': 0.6567136390350701}
INFO:utils.ml:Metrics for classif

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

INFO:utils.ml:Metrics for classifier XGBClassifier17: {'accuracy': 0.8338582677165355, 'precision': 0.7249014271694986, 'recall': 0.694981142167364, 'f1': 0.7056158589207094, 'kappa': 0.6982890624296304}
INFO:utils.ml:Metrics for classifier XGBClassifier18: {'accuracy': 0.8330708661417323, 'precision': 0.720582394349481, 'recall': 0.6941340961662504, 'f1': 0.7037801281975685, 'kappa': 0.6970733441644652}


cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value


Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.786614,0.661189,0.658874,0.658842,0.618364
RandomForestClassifier1,0.79685,0.681759,0.686824,0.683338,0.638346
RandomForestClassifier2,0.799213,0.6936,0.686133,0.688365,0.640341
RandomForestClassifier3,0.810236,0.691184,0.667659,0.674971,0.656714
RandomForestClassifier4,0.799213,0.688686,0.685956,0.686167,0.641057
RandomForestClassifier5,0.811811,0.703783,0.677805,0.686315,0.659785
RandomForestClassifier6,0.800787,0.69257,0.68424,0.686748,0.643073
RandomForestClassifier7,0.819685,0.719698,0.686842,0.697324,0.673775
XGBClassifier8,0.829921,0.723607,0.699732,0.709228,0.691419


In [31]:
confusion_matrix(pipeline.predictions["GT"], pipeline.predictions["XGBClassifier15"])

array([[430,  47,  21],
       [ 60, 602,  16],
       [ 42,  19,  33]], dtype=int64)