In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix, recall_score, cohen_kappa_score

from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train"

In [2]:
percent = 100
random = False
batch_size = 48
th = 0.01

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.gaussian_smoothing(5)
factory.clahe(clip_limit=1.5)
factory.pad2square(fill=np.nan)
factory.resize((175,175))
factory.hair_removal()
factory.normalize2float()

factory_loader = FactoryLoader(path=VAL_PATH, batch_size=batch_size, factory=factory, percentage=percent, shuffle=random)

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(GradientExtractor(threshold=th))   # Add variance feature

# strategy.add_extractor(MeanExtractor())  # Add mean feature
# strategy.add_extractor(StdExtractor())   # Add standard deviation feature
strategy.add_extractor(VarExtractor(threshold=th))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor(threshold=th))   # Add color moments feature


# strategy.add_extractor(MeanExtractor("lab"))  # Add mean feature
# strategy.add_extractor(StdExtractor("lab"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("lab", threshold=th))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("lab", threshold=th))   # Add color moments feature

# strategy.add_extractor(MeanExtractor("hsv"))  # Add mean feature
# strategy.add_extractor(StdExtractor("hsv"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("hsv", threshold=th))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("hsv", threshold=th))   # Add color moments feature

strategy.add_extractor(LBPExtractor(radius=1, n_points=16))
strategy.add_extractor(LBPExtractor(radius=2, n_points=16))
strategy.add_extractor(LBPExtractor(radius=3, n_points=16))
strategy.add_extractor(LBPExtractor(radius=1, n_points=32))
strategy.add_extractor(LBPExtractor(radius=2, n_points=32))
strategy.add_extractor(LBPExtractor(radius=3, n_points=32))
strategy.add_extractor(LBPExtractor(radius=1, n_points=64))
strategy.add_extractor(LBPExtractor(radius=2, n_points=64))
strategy.add_extractor(LBPExtractor(radius=3, n_points=64))

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation',]))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random, batch_size=batch_size)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train
INFO:utils.ml:Preprocessing steps


In [3]:
pipeline.feature_matrix = None
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/106 batches.
Processed 10/106 batches.
Processed 15/106 batches.
Processed 20/106 batches.
Processed 25/106 batches.
Processed 30/106 batches.
Processed 35/106 batches.
Processed 40/106 batches.
Processed 45/106 batches.
Processed 50/106 batches.
Processed 55/106 batches.
Processed 60/106 batches.
Processed 65/106 batches.
Processed 70/106 batches.
Processed 75/106 batches.
Processed 80/106 batches.
Processed 85/106 batches.
Processed 90/106 batches.
Processed 95/106 batches.
Processed 100/106 batches.
Processed 105/106 batches.


INFO:utils.ml:Feature extraction completed. Extracted 5082 features.


Processed 106/106 batches.


In [4]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

rf1 = RandomForestClassifier(n_estimators=100)
rf2 = RandomForestClassifier(n_estimators=150)
rf3 = RandomForestClassifier(n_estimators=250)

# Random Forest with Class Weight Balancing
rf4 = RandomForestClassifier(n_estimators=100, class_weight="balanced", random_state=42)
# Random Forest with Feature Selection via max_features
rf5 = RandomForestClassifier(n_estimators=100, max_features='sqrt', random_state=42)
# Random Forest with Min Samples per Leaf
rf6 = RandomForestClassifier(n_estimators=100, min_samples_leaf=5, random_state=42)
# Random Forest with Out-of-Bag Error (OOB)
rf7 = RandomForestClassifier(n_estimators=100, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf8 = RandomForestClassifier(n_estimators=100, bootstrap=False, random_state=42)
# Random Forest with High Number of Trees and Limited Depth
rf9 = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)

xgb1 = XGBClassifier(n_estimators=350)
xgb2 = XGBClassifier(n_estimators=450)
xgb3 = XGBClassifier(n_estimators=550)
xgb4 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=3, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb5 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb6 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8)
xgb7 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb8 = XGBClassifier(learning_rate=0.05, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb9 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb10 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb11 = XGBClassifier(learning_rate=0.1, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)


pipeline.classifiers = [rf1, rf2, rf3, rf4, rf5, rf6, rf7, rf8, rf9,
                        # svm1, svm2, svm3, svm6, svm7, svm8,
                        xgb1, xgb2, xgb3, xgb4, xgb5, xgb6, xgb7, xgb8, xgb9, xgb10, xgb11]
pipeline.fitted_classifiers = {}

In [5]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Top 10 features for RandomForestClassifier0: [('color_moments_lab_channel_0_skew', 0.037478817790159116), ('color_moments_hsv_channel_0_mean', 0.030887996724097196), ('var_rgb_channel_2', 0.014398198878147472), ('color_moments_rgb_channel_1_mean', 0.012574996170911497), ('var_lab_channel_0', 0.012036055001947168), ('color_moments_rgb_channel_0_std', 0.011218855393035609), ('var_rgb_channel_0', 0.011210819327735844), ('color_moments_lab_channel_1_std', 0.010488910369226714), ('var_rgb_channel_1', 0.009912286915170508), ('color_moments_lab_channel_0_mean', 0.009568225403615934)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 6.528913974761963 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Top 10 features for RandomForestClassifier1: [('color_moments_hsv_channel_0_mean', 0.03546070517459316), ('color_moments_lab_channel_0_skew', 0

In [6]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18
INFO:utils.ml:Predictions made with classifier: XGBClassifier19


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier8': array([0, 0, 1, ..., 1, 0, 0]),
 'XGBClassifier9': array([0, 0, 1, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 1, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 1, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([0, 0, 1, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier14': array([0, 0, 1, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier15': array([0, 0,

In [7]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics())
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8007874015748031, 'precision': 0.8631166627800478, 'recall': 0.5827319823447433, 'f1': 0.5706703242517351, 'kappa': 0.6232169911770087}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7952755905511811, 'precision': 0.859222349013144, 'recall': 0.5784019779614283, 'f1': 0.5665792304996756, 'kappa': 0.6127104171749207}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8047244094488188, 'precision': 0.8661529593786973, 'recall': 0.5832019473109913, 'f1': 0.5674049065454599, 'kappa': 0.6306442382565133}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.7874015748031497, 'precision': 0.7074605189786295, 'recall': 0.5792390676116882, 'f1': 0.5734422491823589, 'kappa': 0.5995450065165904}
INFO:utils.ml:Metrics for classi

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.800787,0.863117,0.582732,0.57067,0.623217
RandomForestClassifier1,0.795276,0.859222,0.578402,0.566579,0.61271
RandomForestClassifier2,0.804724,0.866153,0.583202,0.567405,0.630644
RandomForestClassifier3,0.787402,0.707461,0.579239,0.573442,0.599545
RandomForestClassifier4,0.792126,0.774522,0.576969,0.564908,0.607435
RandomForestClassifier5,0.792126,0.52403,0.567627,0.544801,0.606691
RandomForestClassifier6,0.792126,0.774522,0.576969,0.564908,0.607435
RandomForestClassifier7,0.812598,0.842912,0.615253,0.625219,0.647505
RandomForestClassifier8,0.759055,0.502641,0.538804,0.519604,0.538368


___
Balance the data

In [9]:
# Backup
pipeline.backup_feature_matrix = pipeline.feature_matrix.copy()
pipeline.backup_labels = pipeline.labels.copy()

In [10]:
print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(5082, 408)
(5082, 408)
(5082,)
(5082,)


In [11]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
pipeline.feature_matrix, pipeline.labels = smote.fit_resample(pipeline.feature_matrix, pipeline.labels)

In [12]:
pipeline.fitted_classifiers = {}
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 18.263119220733643 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 27.036556482315063 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 46.11542773246765 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 18.347679615020752 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 20.426493883132935 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 17.104398488998413 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier6


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier6; Done in 20.67967438697815 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier7


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier7; Done in 29.94664430618286 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier8


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier8; Done in 6.608959913253784 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


list index out of range


INFO:utils.ml:Top 10 features for XGBClassifier9: [('lbp_rad3_bins32_20', 0.036021594), ('lbp_rad2_bins16_4', 0.015654193), ('color_moments_hsv_channel_0_mean', 0.015541043), ('lbp_rad1_bins64_63', 0.012634068), ('var_lab_channel_0', 0.0118464), ('lbp_rad2_bins32_0', 0.011057287), ('lbp_rad1_bins64_58', 0.010788531), ('color_moments_lab_channel_0_skew', 0.008728428), ('lbp_rad1_bins32_0', 0.008696064), ('lbp_rad1_bins16_11', 0.008596509)]
INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 22.958089113235474 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10
INFO:utils.ml:Top 10 features for XGBClassifier10: [('lbp_rad3_bins32_20', 0.035467196), ('lbp_rad2_bins16_4', 0.015675148), ('color_moments_hsv_channel_0_mean', 0.015149022), ('lbp_rad1_bins64_63', 0.013032092), ('var_lab_channel_0', 0.0114054885), ('lbp_rad2_bins32_0', 0.011271506), ('lbp_rad1_bins64_58', 0.010983816), ('lbp_rad1_bins32_0', 0.009170669), ('color_moments_lab_channel_0_skew', 0.008914389), ('lbp_rad1_bi

In [13]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassi

{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier8': array([0, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier9': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier14': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier15': array([0, 0,

In [14]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.7952755905511811, 'precision': 0.6816273600288517, 'recall': 0.6818645951407462, 'f1': 0.6798966726278612, 'kappa': 0.6356534404378338}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.789763779527559, 'precision': 0.6649117000714871, 'recall': 0.6727031139684577, 'f1': 0.667982346922638, 'kappa': 0.6260999007608337}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.7968503937007874, 'precision': 0.6744010995554145, 'recall': 0.6773055946877954, 'f1': 0.6752572377310052, 'kappa': 0.6372296327788702}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.789763779527559, 'precision': 0.6691301363488296, 'recall': 0.6734139218015601, 'f1': 0.670107173269427, 'kappa': 0.6259349145063431}
INFO:utils.ml:Metrics for classifie

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.795276,0.681627,0.681865,0.679897,0.635653
RandomForestClassifier1,0.789764,0.664912,0.672703,0.667982,0.6261
RandomForestClassifier2,0.79685,0.674401,0.677306,0.675257,0.63723
RandomForestClassifier3,0.789764,0.66913,0.673414,0.670107,0.625935
RandomForestClassifier4,0.789764,0.66913,0.673414,0.670107,0.625935
RandomForestClassifier5,0.777165,0.648019,0.660894,0.653026,0.6059
RandomForestClassifier6,0.789764,0.66913,0.673414,0.670107,0.625935
RandomForestClassifier7,0.800787,0.670366,0.657283,0.661275,0.640977
RandomForestClassifier8,0.665354,0.579853,0.600623,0.563604,0.44956


___
Borderline SMOTE

In [15]:
from imblearn.over_sampling import BorderlineSMOTE
smote = BorderlineSMOTE(sampling_strategy='auto', random_state=42)

pipeline.feature_matrix, pipeline.labels = smote.fit_resample(
    pipeline.backup_feature_matrix, pipeline.backup_labels)

print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

pipeline.fitted_classifiers = {}

(5082, 408)
(8139, 408)
(5082,)
(8139,)


In [16]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Top 10 features for RandomForestClassifier0: [('color_moments_hsv_channel_0_mean', 0.022479863507657172), ('color_moments_lab_channel_0_skew', 0.01914817076035279), ('var_rgb_channel_2', 0.011903805996527428), ('var_lab_channel_0', 0.011571798702222883), ('color_moments_lab_channel_1_std', 0.009615274825500598), ('var_lab_channel_2', 0.009100913144114643), ('var_rgb_channel_1', 0.008655537101999585), ('color_moments_rgb_channel_1_skew', 0.008292855000602506), ('color_moments_lab_channel_0_mean', 0.008247429425519122), ('color_moments_rgb_channel_1_mean', 0.008140613091264702)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 19.095600366592407 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 30.11712074279785 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


list index out of range


INFO:utils.ml:Top 10 features for RandomForestClassifier2: [('color_moments_hsv_channel_0_mean', 0.020181560772535283), ('color_moments_lab_channel_0_skew', 0.01980511962289161), ('var_rgb_channel_2', 0.013376170249881296), ('var_lab_channel_0', 0.012876845800746606), ('var_rgb_channel_1', 0.010006570256993903), ('var_lab_channel_2', 0.008681272311879892), ('gradient_magnitude_std', 0.008580129876937058), ('color_moments_lab_channel_1_std', 0.008244604920297894), ('color_moments_rgb_channel_1_skew', 0.008133199375594171), ('color_moments_lab_channel_0_mean', 0.008078184515446127)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 50.68040084838867 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3
INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 19.31105947494507 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 18.360855102539062 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 15.865187644958496 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier6


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier6; Done in 19.369632482528687 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier7


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier7; Done in 28.48804473876953 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier8


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier8; Done in 6.702448606491089 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


list index out of range


INFO:utils.ml:Top 10 features for XGBClassifier9: [('lbp_rad3_bins32_20', 0.023834197), ('color_moments_hsv_channel_2_std', 0.019076854), ('lbp_rad2_bins16_4', 0.015954211), ('color_moments_hsv_channel_0_mean', 0.015064743), ('lbp_rad2_bins32_19', 0.014769614), ('var_lab_channel_0', 0.014143953), ('lbp_rad1_bins64_63', 0.014087767), ('color_moments_lab_channel_1_std', 0.010153811), ('lbp_rad1_bins64_58', 0.009402944), ('lbp_rad2_bins32_0', 0.009031722)]
INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 24.142266273498535 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10
INFO:utils.ml:Top 10 features for XGBClassifier10: [('lbp_rad3_bins32_20', 0.023440376), ('color_moments_hsv_channel_2_std', 0.019347608), ('lbp_rad2_bins16_4', 0.017258983), ('lbp_rad2_bins32_19', 0.014646686), ('color_moments_hsv_channel_0_mean', 0.014599767), ('lbp_rad1_bins64_63', 0.014055089), ('var_lab_channel_0', 0.013549805), ('color_moments_lab_channel_1_std', 0.010140001), ('lbp_rad1_bins64_58',

In [17]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18
INFO:utils.ml:Predictions made with classifier: XGBClassifier19


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier8': array([0, 0, 2, ..., 1, 0, 2]),
 'XGBClassifier9': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier14': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier15': array([0, 0,

In [18]:
df = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.7929133858267716, 'precision': 0.6884935006654184, 'recall': 0.6825556163018988, 'f1': 0.6836099715708404, 'kappa': 0.6301107419712071}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7937007874015748, 'precision': 0.6785804424822621, 'recall': 0.6737061848095326, 'f1': 0.6746201716999911, 'kappa': 0.6309286126270022}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.794488188976378, 'precision': 0.6767994768811763, 'recall': 0.6682666144399351, 'f1': 0.670546195820052, 'kappa': 0.6316770821295548}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.7952755905511811, 'precision': 0.6789563213884153, 'recall': 0.6721681176472664, 'f1': 0.6733981013283948, 'kappa': 0.6340755253930751}
INFO:utils.ml:Metrics for classif

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.792913,0.688494,0.682556,0.68361,0.630111
RandomForestClassifier1,0.793701,0.67858,0.673706,0.67462,0.630929
RandomForestClassifier2,0.794488,0.676799,0.668267,0.670546,0.631677
RandomForestClassifier3,0.795276,0.678956,0.672168,0.673398,0.634076
RandomForestClassifier4,0.795276,0.678956,0.672168,0.673398,0.634076
RandomForestClassifier5,0.779528,0.657932,0.665956,0.66067,0.608827
RandomForestClassifier6,0.795276,0.678956,0.672168,0.673398,0.634076
RandomForestClassifier7,0.790551,0.675961,0.656823,0.662379,0.62308
RandomForestClassifier8,0.666142,0.582032,0.611378,0.567466,0.450073
