In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix, recall_score, cohen_kappa_score

from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train"

In [2]:
percent = 100
random = False
batch_size = 48

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.gaussian_smoothing(5)
factory.clahe(clip_limit=1.5)
factory.pad2square(fill=np.nan)
factory.resize((150,150))
factory.hair_removal()
factory.normalize2float()

factory_loader = FactoryLoader(path=VAL_PATH, batch_size=batch_size, factory=factory, percentage=percent, shuffle=random)

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(GradientExtractor())   # Add variance feature

# strategy.add_extractor(MeanExtractor())  # Add mean feature
# strategy.add_extractor(StdExtractor())   # Add standard deviation feature
strategy.add_extractor(VarExtractor())   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor())   # Add color moments feature


# strategy.add_extractor(MeanExtractor("lab"))  # Add mean feature
# strategy.add_extractor(StdExtractor("lab"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("lab"))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("lab"))   # Add color moments feature

# strategy.add_extractor(MeanExtractor("hsv"))  # Add mean feature
# strategy.add_extractor(StdExtractor("hsv"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("hsv"))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("hsv"))   # Add color moments feature

strategy.add_extractor(LBPExtractor(radius=1, n_points=16))
strategy.add_extractor(LBPExtractor(radius=2, n_points=16))
strategy.add_extractor(LBPExtractor(radius=3, n_points=16))
strategy.add_extractor(LBPExtractor(radius=1, n_points=32))
strategy.add_extractor(LBPExtractor(radius=2, n_points=32))
strategy.add_extractor(LBPExtractor(radius=3, n_points=32))
strategy.add_extractor(LBPExtractor(radius=1, n_points=64))
strategy.add_extractor(LBPExtractor(radius=2, n_points=64))
strategy.add_extractor(LBPExtractor(radius=3, n_points=64))

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation',]))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random, batch_size=batch_size)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train
INFO:utils.ml:Preprocessing steps


In [3]:
pipeline.feature_matrix = None
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/106 batches.
Processed 10/106 batches.
Processed 15/106 batches.
Processed 20/106 batches.
Processed 25/106 batches.
Processed 30/106 batches.
Processed 35/106 batches.
Processed 40/106 batches.
Processed 45/106 batches.
Processed 50/106 batches.
Processed 55/106 batches.
Processed 60/106 batches.
Processed 65/106 batches.
Processed 70/106 batches.
Processed 75/106 batches.
Processed 80/106 batches.
Processed 85/106 batches.
Processed 90/106 batches.
Processed 95/106 batches.
Processed 100/106 batches.
Processed 105/106 batches.


INFO:utils.ml:Feature extraction completed. Extracted 5082 features.


Processed 106/106 batches.


In [4]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

rf1 = RandomForestClassifier(n_estimators=100)
rf2 = RandomForestClassifier(n_estimators=150)
rf3 = RandomForestClassifier(n_estimators=250)

# Random Forest with Class Weight Balancing
rf4 = RandomForestClassifier(n_estimators=100, class_weight="balanced", random_state=42)
# Random Forest with Feature Selection via max_features
rf5 = RandomForestClassifier(n_estimators=100, max_features='sqrt', random_state=42)
# Random Forest with Min Samples per Leaf
rf6 = RandomForestClassifier(n_estimators=100, min_samples_leaf=5, random_state=42)
# Random Forest with Out-of-Bag Error (OOB)
rf7 = RandomForestClassifier(n_estimators=100, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf8 = RandomForestClassifier(n_estimators=100, bootstrap=False, random_state=42)
# Random Forest with High Number of Trees and Limited Depth
rf9 = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)

xgb1 = XGBClassifier(n_estimators=350)
xgb2 = XGBClassifier(n_estimators=450)
xgb3 = XGBClassifier(n_estimators=550)
xgb4 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=3, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb5 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7) # 2ND
xgb6 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb7 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb8 = XGBClassifier(learning_rate=0.05, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb9 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb10 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb11 = XGBClassifier(learning_rate=0.1, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)


pipeline.classifiers = [rf1, rf2, rf3, rf4, rf5, rf6, rf7, rf8, rf9,
                        # svm1, svm2, svm3, svm6, svm7, svm8,
                        xgb1, xgb2, xgb3, xgb4, xgb5, xgb6, xgb7, xgb8, xgb9, xgb10, xgb11]
pipeline.fitted_classifiers = {}

In [5]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Top 10 features for RandomForestClassifier0: [('color_moments_lab_channel_0_skew', 0.03486104916749157), ('color_moments_hsv_channel_0_mean', 0.02739401950974569), ('var_rgb_channel_2', 0.015606259364939177), ('var_rgb_channel_0', 0.012082118198547489), ('gradient_magnitude_std', 0.010701986522257748), ('var_lab_channel_0', 0.01021169910311387), ('color_moments_lab_channel_2_mean', 0.009567257603904769), ('color_moments_rgb_channel_1_mean', 0.009507792787687087), ('color_moments_rgb_channel_0_std', 0.009208685985516447), ('color_moments_lab_channel_1_std', 0.009144975717276765)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 6.885035037994385 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Top 10 features for RandomForestClassifier1: [('color_moments_hsv_channel_0_mean', 0.03652376350265258), ('color_moments_lab_channel_0_skew',

In [6]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18
INFO:utils.ml:Predictions made with classifier: XGBClassifier19


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 0, ..., 1, 0, 0]),
 'RandomForestClassifier8': array([0, 0, 1, ..., 1, 0, 0]),
 'XGBClassifier9': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier14': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier15': array([0, 0,

In [9]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics())
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8039370078740158, 'precision': 0.8654687200481678, 'recall': 0.5819994973933265, 'f1': 0.5665321684685221, 'kappa': 0.6287630836272111}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7992125984251969, 'precision': 0.8622628648843166, 'recall': 0.5779834331362984, 'f1': 0.5629857991509434, 'kappa': 0.6193349852130126}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.794488188976378, 'precision': 0.6926974741448396, 'recall': 0.5718014214644305, 'f1': 0.5532216924792449, 'kappa': 0.610872147898402}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.794488188976378, 'precision': 0.6700972203064253, 'recall': 0.5779103358768659, 'f1': 0.5662816077677475, 'kappa': 0.6119563385030343}
INFO:utils.ml:Metrics for classifi

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.803937,0.865469,0.581999,0.566532,0.628763
RandomForestClassifier1,0.799213,0.862263,0.577983,0.562986,0.619335
RandomForestClassifier2,0.794488,0.692697,0.571801,0.553222,0.610872
RandomForestClassifier3,0.794488,0.670097,0.57791,0.566282,0.611956
RandomForestClassifier4,0.803937,0.865721,0.580367,0.56048,0.629683
RandomForestClassifier5,0.794488,0.859272,0.572335,0.553477,0.61077
RandomForestClassifier6,0.803937,0.865721,0.580367,0.56048,0.629683
RandomForestClassifier7,0.804724,0.828774,0.600818,0.603428,0.632097
RandomForestClassifier8,0.751969,0.497723,0.533491,0.514474,0.524689


___
Balance the data

In [11]:
# Backup
pipeline.backup_feature_matrix = pipeline.feature_matrix.copy()
pipeline.backup_labels = pipeline.labels.copy()

In [12]:
print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(5082, 408)
(5082, 408)
(5082,)
(5082,)


In [13]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
pipeline.feature_matrix, pipeline.labels = smote.fit_resample(pipeline.feature_matrix, pipeline.labels)

In [14]:
pipeline.fitted_classifiers = {}
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 11.52803111076355 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 21.87845754623413 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 43.43574810028076 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 18.802971124649048 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 17.831711053848267 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 15.289743185043335 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier6


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier6; Done in 18.760176181793213 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier7


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier7; Done in 27.446318864822388 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier8


list index out of range


INFO:utils.ml:Top 10 features for RandomForestClassifier8: [('color_moments_hsv_channel_0_mean', 0.06740527888263978), ('color_moments_lab_channel_0_skew', 0.04332614985342504), ('var_rgb_channel_2', 0.027517740393500398), ('var_lab_channel_0', 0.02741535922425282), ('color_moments_rgb_channel_1_skew', 0.01681013385308823), ('lbp_rad1_bins64_63', 0.01677368064347738), ('var_rgb_channel_1', 0.01634675443325031), ('gradient_magnitude_std', 0.016291281572177598), ('lbp_rad1_bins64_58', 0.015381181426195387), ('var_lab_channel_2', 0.014954494764560502)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier8; Done in 6.712302207946777 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9
INFO:utils.ml:Top 10 features for XGBClassifier9: [('color_moments_hsv_channel_0_mean', 0.016014671), ('lbp_rad1_bins16_15', 0.014396667), ('var_lab_channel_0', 0.014289732), ('lbp_rad2_bins16_4', 0.013287392), ('lbp_rad1_bins64_58', 0.012361867), ('lbp_rad2_bins32_0', 0.01118405), ('lbp_rad1_bins32_2'

list index out of range


INFO:utils.ml:Top 10 features for XGBClassifier13: [('lbp_rad1_bins16_15', 0.014105499), ('color_moments_hsv_channel_0_mean', 0.010853318), ('var_lab_channel_0', 0.009296798), ('lbp_rad2_bins16_4', 0.009287159), ('lbp_rad3_bins32_27', 0.008590454), ('color_moments_lab_channel_0_skew', 0.007942043), ('lbp_rad3_bins32_10', 0.0074796444), ('lbp_rad2_bins32_0', 0.007161235), ('lbp_rad1_bins64_58', 0.0070566195), ('lbp_rad3_bins64_20', 0.0068651675)]
INFO:utils.ml:Fitted classifier: XGBClassifier13; Done in 28.450448989868164 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier14
INFO:utils.ml:Top 10 features for XGBClassifier14: [('color_moments_hsv_channel_0_mean', 0.012291694), ('lbp_rad1_bins16_15', 0.012187568), ('color_moments_lab_channel_0_skew', 0.010407659), ('lbp_rad2_bins16_4', 0.010083145), ('var_lab_channel_0', 0.009851972), ('lbp_rad2_bins32_0', 0.009768347), ('lbp_rad1_bins64_58', 0.008704471), ('lbp_rad2_bins64_29', 0.0076849177), ('lbp_rad1_bins64_63', 0.007157581), ('lb

In [15]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18
INFO:utils.ml:Predictions made with classifier: XGBClassifier19


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier8': array([2, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier11': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier14': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier15': array([2, 0,

In [16]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.789763779527559, 'precision': 0.6687725269834749, 'recall': 0.6748690613832976, 'f1': 0.671153855591832, 'kappa': 0.6254688680454}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7960629921259843, 'precision': 0.682996301103254, 'recall': 0.6832782709322195, 'f1': 0.6823166461238389, 'kappa': 0.635625063696102}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.7960629921259843, 'precision': 0.6853267958306805, 'recall': 0.6836336748487706, 'f1': 0.6833212875894237, 'kappa': 0.6357348205307665}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.784251968503937, 'precision': 0.6749937537188156, 'recall': 0.686699852822449, 'f1': 0.6797728972741304, 'kappa': 0.6171129537692335}
INFO:utils.ml:Metrics for classifier Ran

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.789764,0.668773,0.674869,0.671154,0.625469
RandomForestClassifier1,0.796063,0.682996,0.683278,0.682317,0.635625
RandomForestClassifier2,0.796063,0.685327,0.683634,0.683321,0.635735
RandomForestClassifier3,0.784252,0.674994,0.6867,0.679773,0.617113
RandomForestClassifier4,0.784252,0.674994,0.6867,0.679773,0.617113
RandomForestClassifier5,0.781102,0.665415,0.685977,0.672855,0.615328
RandomForestClassifier6,0.784252,0.674994,0.6867,0.679773,0.617113
RandomForestClassifier7,0.809449,0.701146,0.678852,0.686258,0.656259
RandomForestClassifier8,0.66063,0.575571,0.590853,0.557212,0.441737


___
Borderline SMOTE

In [17]:
from imblearn.over_sampling import BorderlineSMOTE
smote = BorderlineSMOTE(sampling_strategy='auto', random_state=42)

pipeline.feature_matrix, pipeline.labels = smote.fit_resample(
    pipeline.backup_feature_matrix, pipeline.backup_labels)

print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

pipeline.fitted_classifiers = {}

(5082, 408)
(8139, 408)
(5082,)
(8139,)


In [18]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 11.122066259384155 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


list index out of range


INFO:utils.ml:Top 10 features for RandomForestClassifier1: [('color_moments_hsv_channel_0_mean', 0.02297385290492498), ('color_moments_lab_channel_0_skew', 0.020832386628807396), ('var_lab_channel_0', 0.012023290024017152), ('var_rgb_channel_2', 0.010968977307073762), ('color_moments_lab_channel_1_std', 0.00835500342303785), ('color_moments_rgb_channel_1_skew', 0.007909987310903125), ('color_moments_hsv_channel_2_mean', 0.007892295064412776), ('var_lab_channel_1', 0.0077837672535453), ('var_lab_channel_2', 0.007595388305281355), ('color_moments_rgb_channel_2_mean', 0.007577951040463)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 24.35762643814087 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2
INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 44.92874193191528 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 18.15615177154541 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 18.762396812438965 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 15.809272527694702 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier6


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier6; Done in 19.57761263847351 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier7


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier7; Done in 27.38223385810852 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier8


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier8; Done in 6.778321743011475 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 22.936209440231323 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier10; Done in 27.008683443069458 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier11


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier11; Done in 20.599786520004272 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier12


list index out of range


INFO:utils.ml:Top 10 features for XGBClassifier12: [('color_moments_hsv_channel_0_mean', 0.01428493), ('lbp_rad1_bins16_15', 0.013063311), ('lbp_rad2_bins32_15', 0.0107440585), ('lbp_rad2_bins64_29', 0.010591843), ('color_moments_lab_channel_0_skew', 0.009656879), ('lbp_rad1_bins32_22', 0.009267056), ('lbp_rad2_bins16_12', 0.008790834), ('color_moments_lab_channel_1_std', 0.007897703), ('lbp_rad1_bins64_63', 0.007850355), ('lbp_rad1_bins64_58', 0.0077710124)]
INFO:utils.ml:Fitted classifier: XGBClassifier12; Done in 7.641372442245483 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier13
INFO:utils.ml:Top 10 features for XGBClassifier13: [('lbp_rad1_bins16_15', 0.01235671), ('lbp_rad2_bins16_4', 0.010915965), ('color_moments_hsv_channel_0_mean', 0.010569359), ('var_lab_channel_0', 0.0091790175), ('color_moments_lab_channel_0_skew', 0.008099053), ('lbp_rad1_bins32_0', 0.007987072), ('lbp_rad1_bins64_33', 0.0075689596), ('lbp_rad1_bins64_26', 0.0075427997), ('lbp_rad2_bins32_15', 0.00

In [19]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/27 batches.
Processed 10/27 batches.
Processed 15/27 batches.
Processed 20/27 batches.
Processed 25/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6


Processed 27/27 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: XGBClassifier16
INFO:utils.ml:Predictions made with classifier: XGBClassifier17
INFO:utils.ml:Predictions made with classifier: XGBClassifier18
INFO:utils.ml:Predictions made with classifier: XGBClassifier19


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier4': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier5': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier6': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier7': array([2, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier8': array([0, 0, 2, ..., 1, 0, 2]),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier11': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier12': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier13': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier14': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier15': array([2, 0,

In [20]:
df = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df = df.T
df

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.7866141732283465, 'precision': 0.6697819824406168, 'recall': 0.6648053331760483, 'f1': 0.6659583133203424, 'kappa': 0.6179454731139825}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7913385826771654, 'precision': 0.6765822784810127, 'recall': 0.6713427487644673, 'f1': 0.6725716127571335, 'kappa': 0.6264788794921312}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.794488188976378, 'precision': 0.6788453198289264, 'recall': 0.6792405295568767, 'f1': 0.6779992868450059, 'kappa': 0.6332501294528459}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.7881889763779527, 'precision': 0.6695986800303347, 'recall': 0.6697315843427688, 'f1': 0.6683225885024416, 'kappa': 0.6222912612771979}
INFO:utils.ml:Metrics for classi

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.786614,0.669782,0.664805,0.665958,0.617945
RandomForestClassifier1,0.791339,0.676582,0.671343,0.672572,0.626479
RandomForestClassifier2,0.794488,0.678845,0.679241,0.677999,0.63325
RandomForestClassifier3,0.788189,0.669599,0.669732,0.668323,0.622291
RandomForestClassifier4,0.788189,0.669599,0.669732,0.668323,0.622291
RandomForestClassifier5,0.770079,0.653914,0.668332,0.659203,0.594148
RandomForestClassifier6,0.788189,0.669599,0.669732,0.668323,0.622291
RandomForestClassifier7,0.796063,0.684776,0.671416,0.676045,0.633198
RandomForestClassifier8,0.675591,0.576957,0.604815,0.571614,0.45829


In [22]:
confusion_matrix(pipeline.predictions["GT"], pipeline.predictions["XGBClassifier14"])

array([[426,  47,  25],
       [ 64, 597,  17],
       [ 34,  22,  38]], dtype=int64)

In [42]:
pipeline.fitted_classifiers["XGBClassifier7"].__dict__

{'n_estimators': 550,
 'objective': 'multi:softprob',
 'max_depth': 7,
 'max_leaves': None,
 'max_bin': None,
 'grow_policy': None,
 'learning_rate': 0.1,
 'verbosity': None,
 'booster': None,
 'tree_method': None,
 'gamma': None,
 'min_child_weight': 3,
 'max_delta_step': None,
 'subsample': 0.7,
 'sampling_method': None,
 'colsample_bytree': 0.7,
 'colsample_bylevel': None,
 'colsample_bynode': None,
 'reg_alpha': None,
 'reg_lambda': None,
 'scale_pos_weight': None,
 'base_score': None,
 'missing': nan,
 'num_parallel_tree': None,
 'random_state': None,
 'n_jobs': None,
 'monotone_constraints': None,
 'interaction_constraints': None,
 'importance_type': None,
 'device': None,
 'validate_parameters': None,
 'enable_categorical': False,
 'feature_types': None,
 'max_cat_to_onehot': None,
 'max_cat_threshold': None,
 'multi_strategy': None,
 'eval_metric': None,
 'early_stopping_rounds': None,
 'callbacks': None,
 'n_classes_': 3,
 '_Booster': <xgboost.core.Booster at 0x25f3c0be290>}

AttributeError: 'XGBClassifier' object has no attribute 'estimator'

___
SMOTETomek

In [20]:
print("DONE")

DONE
