In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix, recall_score, cohen_kappa_score

from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train"

In [2]:
percent = 100
random = False

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.gaussian_smoothing(5)
factory.clahe(clip_limit=1.5)
factory.pad2square(fill=0)
factory.resize((150,150))
factory.hair_removal()
factory.normalize2float()

factory_loader = FactoryLoader(path=VAL_PATH, batch_size=32, factory=factory, percentage=percent, shuffle=random)

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(MeanExtractor())  # Add mean feature
strategy.add_extractor(StdExtractor())   # Add standard deviation feature
strategy.add_extractor(VarExtractor())   # Add variance feature

strategy.add_extractor(MeanExtractor("lab"))  # Add mean feature
strategy.add_extractor(StdExtractor("lab"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("lab"))   # Add variance feature

strategy.add_extractor(MeanExtractor("hsv"))  # Add mean feature
strategy.add_extractor(StdExtractor("hsv"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("hsv"))   # Add variance feature

strategy.add_extractor(LBPExtractor(radius=1, n_points=8))
strategy.add_extractor(LBPExtractor(radius=2, n_points=8))
strategy.add_extractor(LBPExtractor(radius=1, n_points=16))
strategy.add_extractor(LBPExtractor(radius=2, n_points=16))

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train
INFO:utils.ml:Preprocessing steps


In [3]:
pipeline.feature_matrix = None
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/212 batches.
Processed 10/212 batches.
Processed 15/212 batches.
Processed 20/212 batches.
Processed 25/212 batches.
Processed 30/212 batches.
Processed 35/212 batches.
Processed 40/212 batches.
Processed 45/212 batches.
Processed 50/212 batches.
Processed 55/212 batches.
Processed 60/212 batches.
Processed 65/212 batches.
Processed 70/212 batches.
Processed 75/212 batches.
Processed 80/212 batches.
Processed 85/212 batches.
Processed 90/212 batches.
Processed 95/212 batches.
Processed 100/212 batches.
Processed 105/212 batches.
Processed 110/212 batches.
Processed 115/212 batches.
Processed 120/212 batches.
Processed 125/212 batches.
Processed 130/212 batches.
Processed 135/212 batches.
Processed 140/212 batches.
Processed 145/212 batches.
Processed 150/212 batches.
Processed 155/212 batches.
Processed 160/212 batches.
Processed 165/212 batches.
Processed 170/212 batches.
Processed 175/212 batches.
Processed 180/212 batches.
Processed 185/212 batches.
Processed 190/212 bat

INFO:utils.ml:Feature extraction completed. Extracted 5082 features.


Processed 212/212 batches.


In [4]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

rf1 = RandomForestClassifier(n_estimators=100)
rf2 = RandomForestClassifier(n_estimators=150)
rf3 = RandomForestClassifier(n_estimators=250)

xgb1 = XGBClassifier(n_estimators=350)
xgb2 = XGBClassifier(n_estimators=450)
xgb3 = XGBClassifier(n_estimators=550)
xgb4 = XGBClassifier(learning_rate=0.05, n_estimators=550, max_depth=3, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb5 = XGBClassifier(learning_rate=0.1, n_estimators=550, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7) # BEST
xgb6 = XGBClassifier(learning_rate=0.1, n_estimators=550, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8)
xgb7 = XGBClassifier(learning_rate=0.1, n_estimators=550, max_depth=5, min_child_weight=3, subsample=0.8, colsample_bytree=0.8, scale_pos_weight=10)
xgb8 = XGBClassifier(learning_rate=0.1, n_estimators=550, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb9 = XGBClassifier(learning_rate=0.01, n_estimators=550, max_depth=3, min_child_weight=1, subsample=1.0, colsample_bytree=1.0)

pipeline.classifiers = [rf1, rf2, rf3,
                        # svm1, svm2, svm3, svm6, svm7, svm8,
                        xgb1, xgb2, xgb3, xgb4, xgb5, xgb6, xgb7, xgb8, xgb9]

In [5]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 2.819310426712036 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 4.368316173553467 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2
INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 7.1671142578125 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier3
INFO:utils.ml:Fitted classifier: XGBClassifier3; Done in 2.3362555503845215 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier4
INFO:utils.ml:Fitted classifier: XGBClassifier4; Done in 2.6645538806915283 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier5
INFO:utils.ml:Fitted classifier: XGBClassifier5; Done in 3.1329047679901123 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier6
INFO:utils.ml:Fitted classifier: XGBClassifier6; Done in 1.60744142

In [6]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: XGBClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11


Processed 53/53 batches.


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 1, ..., 1, 0, 0]),
 'XGBClassifier3': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier4': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier5': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier7': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier8': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier9': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 0, 0], dtype=int64)}

In [7]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics())
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.7984251968503937, 'precision': 0.8196284596563687, 'recall': 0.5925863751245489, 'f1': 0.5928256909673607}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7968503937007874, 'precision': 0.7997445674997451, 'recall': 0.5970011975347571, 'f1': 0.6024964171371788}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.7984251968503937, 'precision': 0.7372200911215568, 'recall': 0.5868328646286647, 'f1': 0.5808147359358747}
INFO:utils.ml:Metrics for classifier XGBClassifier3: {'accuracy': 0.8149606299212598, 'precision': 0.7367808379293822, 'recall': 0.6372206960018572, 'f1': 0.6538559720177728}
INFO:utils.ml:Metrics for classifier XGBClassifier4: {'accuracy': 0.8149606299212598, 'precision': 0.7110662755024588, 'recall': 0.6284127282997553, 'f1': 0.6399684597440

Unnamed: 0,accuracy,precision,recall,f1
GT,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.798425,0.819628,0.592586,0.592826
RandomForestClassifier1,0.79685,0.799745,0.597001,0.602496
RandomForestClassifier2,0.798425,0.73722,0.586833,0.580815
XGBClassifier3,0.814961,0.736781,0.637221,0.653856
XGBClassifier4,0.814961,0.711066,0.628413,0.639968
XGBClassifier5,0.815748,0.72797,0.631959,0.645879
XGBClassifier6,0.803937,0.687712,0.605724,0.610109
XGBClassifier7,0.818898,0.728245,0.642911,0.659225
XGBClassifier8,0.815748,0.716935,0.634658,0.648623


In [8]:
from sklearn.metrics import confusion_matrix,recall_score, accuracy_score, f1_score, precision_score, cohen_kappa_score

classifier_key = "XGBClassifier7"
avg = "macro"

confusion_matrix(pipeline.predictions["GT"], pipeline.predictions[classifier_key])
print("Accuracy:")
print(accuracy_score(pipeline.predictions["GT"], pipeline.predictions[classifier_key], ))
print("Cohen's Kappa:")
print(cohen_kappa_score(pipeline.predictions["GT"], pipeline.predictions[classifier_key]))#average="weighted"))
print("Precision:")
print(precision_score(pipeline.predictions["GT"], pipeline.predictions[classifier_key], average=avg ))#average="weighted"))
print("Recall:")
print(recall_score(pipeline.predictions["GT"], pipeline.predictions[classifier_key], average=avg ))#average="weighted"))
print("F1:")
print(f1_score(pipeline.predictions["GT"], pipeline.predictions[classifier_key], average=avg ))#average="weighted"))

Accuracy:
0.8188976377952756
Cohen's Kappa:
0.6634249763786786
Precision:
0.7282446896324015
Recall:
0.6429110655891627
F1:
0.6592251380665989


In [9]:
pipeline.get_feature_names().__len__()

88

In [10]:
pipeline.save_feature_matrix_to_excel(f"./features_smoothing5_clahe1point5_pad2square0_resize150x150_hair_removal.xlsx")

INFO:utils.ml:Saving feature matrix to Excel...
INFO:utils.ml:Feature matrix saved to ./features_smoothing5_clahe1point5_pad2square0_resize150x150_hair_removal.xlsxfeatures_smoothing_clahe_pad2square_resize_hair_removal_01_norm.xlsx


'./features_smoothing5_clahe1point5_pad2square0_resize150x150_hair_removal.xlsxfeatures_smoothing_clahe_pad2square_resize_hair_removal_01_norm.xlsx'

___
Balance the data

In [11]:
# Backup
pipeline.backup_feature_matrix = pipeline.feature_matrix.copy()
pipeline.backup_labels = pipeline.labels.copy()

In [17]:
print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(5082, 88)
(8139, 88)
(5082,)
(8139,)


In [13]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
pipeline.feature_matrix, pipeline.labels = smote.fit_resample(pipeline.feature_matrix, pipeline.labels)

In [14]:
pipeline.fitted_classifiers = {}
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 5.28521203994751 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 7.99783730506897 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2
INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 13.3698148727417 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier3
INFO:utils.ml:Fitted classifier: XGBClassifier3; Done in 2.834153413772583 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier4
INFO:utils.ml:Fitted classifier: XGBClassifier4; Done in 3.4891340732574463 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier5
INFO:utils.ml:Fitted classifier: XGBClassifier5; Done in 4.219215631484985 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier6
INFO:utils.ml:Fitted classifier: XGBClassifier6; Done in 2.10445499420

In [15]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: XGBClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11


Processed 53/53 batches.


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([2, 0, 2, ..., 1, 2, 0]),
 'RandomForestClassifier1': array([2, 0, 2, ..., 1, 2, 0]),
 'RandomForestClassifier2': array([2, 0, 2, ..., 1, 2, 0]),
 'XGBClassifier3': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier4': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier5': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier7': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier8': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier10': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 2, 0], dtype=int64)}

In [16]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.794488188976378, 'precision': 0.6871083116138946, 'recall': 0.7033207832900673, 'f1': 0.6936605104697963, 'kappa': 0.636357851870705}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.7968503937007874, 'precision': 0.6922698637937822, 'recall': 0.7110823259144657, 'f1': 0.6999427059460456, 'kappa': 0.6408560004208975}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.7881889763779527, 'precision': 0.6793052499421813, 'recall': 0.7008427861953054, 'f1': 0.6879900190842756, 'kappa': 0.6262237471498782}
INFO:utils.ml:Metrics for classifier XGBClassifier3: {'accuracy': 0.8118110236220473, 'precision': 0.704219352103769, 'recall': 0.6855805825448281, 'f1': 0.6934786090143357, 'kappa': 0.6586282598622502}
INFO:utils.ml:Metrics for classifier XGBCla

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.794488,0.687108,0.703321,0.693661,0.636358
RandomForestClassifier1,0.79685,0.69227,0.711082,0.699943,0.640856
RandomForestClassifier2,0.788189,0.679305,0.700843,0.68799,0.626224
XGBClassifier3,0.811811,0.704219,0.685581,0.693479,0.658628
XGBClassifier4,0.807874,0.695917,0.679179,0.686358,0.651504
XGBClassifier5,0.807874,0.698234,0.682234,0.689173,0.651733
XGBClassifier6,0.75748,0.624336,0.644338,0.630325,0.57634
XGBClassifier7,0.816535,0.700467,0.691763,0.695754,0.668537
XGBClassifier8,0.813386,0.692662,0.687275,0.689695,0.663761


___
Borderline SMOTE

In [18]:
from imblearn.over_sampling import BorderlineSMOTE
smote = BorderlineSMOTE(sampling_strategy='auto', random_state=42)

pipeline.feature_matrix, pipeline.labels = smote.fit_resample(
    pipeline.backup_feature_matrix, pipeline.backup_labels)

print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

pipeline.fitted_classifiers = {}

(5082, 88)
(8139, 88)
(5082,)
(8139,)


In [19]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 5.341760873794556 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 8.146342515945435 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2
INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 13.905472755432129 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier3
INFO:utils.ml:Fitted classifier: XGBClassifier3; Done in 9.516165971755981 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier4
INFO:utils.ml:Fitted classifier: XGBClassifier4; Done in 6.324585199356079 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier5
INFO:utils.ml:Fitted classifier: XGBClassifier5; Done in 4.564074277877808 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier6
INFO:utils.ml:Fitted classifier: XGBClassifier6; Done in 2.31278991

In [20]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: XGBClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11


Processed 53/53 batches.


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([2, 0, 2, ..., 1, 2, 0]),
 'RandomForestClassifier1': array([2, 0, 2, ..., 1, 2, 0]),
 'RandomForestClassifier2': array([2, 0, 2, ..., 1, 2, 0]),
 'XGBClassifier3': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier4': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier5': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier7': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier8': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier10': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 2, 0], dtype=int64)}

In [21]:
df = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8, 'precision': 0.6959079180777294, 'recall': 0.709461331171671, 'f1': 0.7017483973936188, 'kappa': 0.6447262923249073}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.789763779527559, 'precision': 0.678971153047576, 'recall': 0.6930181026623282, 'f1': 0.6850035549785156, 'kappa': 0.6268011146770209}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8023622047244094, 'precision': 0.6957682104086468, 'recall': 0.7046496410546473, 'f1': 0.6996792626987901, 'kappa': 0.647736816514167}
INFO:utils.ml:Metrics for classifier XGBClassifier3: {'accuracy': 0.831496062992126, 'precision': 0.7251331063596395, 'recall': 0.7222737683930979, 'f1': 0.7234976684449933, 'kappa': 0.6971675617854836}
INFO:utils.ml:Metrics for classifier XGBClassifier4: {'accur

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.8,0.695908,0.709461,0.701748,0.644726
RandomForestClassifier1,0.789764,0.678971,0.693018,0.685004,0.626801
RandomForestClassifier2,0.802362,0.695768,0.70465,0.699679,0.647737
XGBClassifier3,0.831496,0.725133,0.722274,0.723498,0.697168
XGBClassifier4,0.830709,0.721285,0.718728,0.719771,0.695874
XGBClassifier5,0.831496,0.724991,0.722096,0.723338,0.697168
XGBClassifier6,0.764567,0.635587,0.659525,0.642893,0.58954
XGBClassifier7,0.819685,0.715936,0.716177,0.715972,0.676248
XGBClassifier8,0.813386,0.707687,0.711355,0.709411,0.66559


___
SMOTETomek

In [27]:
from imblearn.combine import SMOTETomek

smote_tomek = SMOTETomek(sampling_strategy='auto', random_state=42)


pipeline.feature_matrix, pipeline.labels = smote_tomek.fit_resample(
    pipeline.backup_feature_matrix, pipeline.backup_labels)

print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

pipeline.fitted_classifiers = {}

(5082, 88)
(7219, 88)
(5082,)
(7219,)


In [28]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 4.934349298477173 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1
INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 7.23221492767334 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2
INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 11.76577639579773 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier3
INFO:utils.ml:Fitted classifier: XGBClassifier3; Done in 2.8840653896331787 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier4
INFO:utils.ml:Fitted classifier: XGBClassifier4; Done in 3.5059194564819336 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier5
INFO:utils.ml:Fitted classifier: XGBClassifier5; Done in 4.0474889278411865 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier6
INFO:utils.ml:Fitted classifier: XGBClassifier6; Done in 1.9094085

In [29]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: XGBClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11


Processed 53/53 batches.


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([2, 0, 2, ..., 1, 2, 0]),
 'RandomForestClassifier1': array([2, 0, 2, ..., 1, 2, 0]),
 'RandomForestClassifier2': array([2, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier3': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier4': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier5': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier6': array([2, 0, 2, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier7': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier8': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier9': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier10': array([2, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 1, 2, 0], dtype=int64)}