In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix, recall_score, cohen_kappa_score

from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train"

INFO:numexpr.utils:Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [2]:
percent = 100
random = False
batch_size = 24
th = 0.1

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.pad2square(fill=np.nan)
factory.resize((150,150))
factory.normalize2float()

factory_loader = FactoryLoader(path=VAL_PATH, batch_size=batch_size, factory=factory, percentage=percent, shuffle=random)

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(GradientExtractor(threshold=th)) # Add gradient feature

strategy.add_extractor(ColorMomentsExtractor("rgb", threshold=th))   # Add color moments feature
strategy.add_extractor(ColorMomentsExtractor("lab", threshold=th))   # Add color moments feature
strategy.add_extractor(ColorMomentsExtractor("hsv", threshold=th))   # Add color moments feature

strategy.add_extractor(LBPExtractor(radius=1, n_points=8))
strategy.add_extractor(LBPExtractor(radius=2, n_points=16))
strategy.add_extractor(LBPExtractor(radius=3, n_points=24))
strategy.add_extractor(LBPExtractor(radius=4, n_points=32))
strategy.add_extractor(LBPExtractor(radius=5, n_points=40))

strategy.add_extractor(FourierTransformExtractor())
strategy.add_extractor(FFTExtractor())

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', "ASM"]))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random, batch_size=batch_size)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train
INFO:utils.ml:Preprocessing steps


In [3]:
pipeline.feature_matrix = None
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/212 batches.
Processed 10/212 batches.
Processed 15/212 batches.
Processed 20/212 batches.
Processed 25/212 batches.
Processed 30/212 batches.
Processed 35/212 batches.
Processed 40/212 batches.
Processed 45/212 batches.
Processed 50/212 batches.
Processed 55/212 batches.
Processed 60/212 batches.
Processed 65/212 batches.
Processed 70/212 batches.
Processed 75/212 batches.
Processed 80/212 batches.
Processed 85/212 batches.
Processed 90/212 batches.
Processed 95/212 batches.
Processed 100/212 batches.
Processed 105/212 batches.
Processed 110/212 batches.
Processed 115/212 batches.
Processed 120/212 batches.
Processed 125/212 batches.
Processed 130/212 batches.
Processed 135/212 batches.
Processed 140/212 batches.
Processed 145/212 batches.
Processed 150/212 batches.
Processed 155/212 batches.
Processed 160/212 batches.
Processed 165/212 batches.
Processed 170/212 batches.
Processed 175/212 batches.
Processed 180/212 batches.
Processed 185/212 batches.
Processed 190/212 bat

INFO:utils.ml:Feature extraction completed. Extracted 5082 features.


Processed 212/212 batches.


In [4]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

# Random Forest with Bootstrap Disabled
rf0 = RandomForestClassifier(n_estimators=100, bootstrap=False, random_state=42)
# Random Forest with Out-of-Bag Error (OOB)
rf1 = RandomForestClassifier(n_estimators=150, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf2 = RandomForestClassifier(n_estimators=150, bootstrap=False, random_state=42)
# Random Forest with Bootstrap Disabled
rf3 = RandomForestClassifier(n_estimators=200, bootstrap=False, random_state=42)

xgb4 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7) # 2ND
xgb5 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb6 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb7 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb8 = XGBClassifier(learning_rate=0.1, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)


xgb9 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb10 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb11 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb12 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)

xgb13 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb14 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb15 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb16 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)



pipeline.classifiers = [rf0, rf1, rf2, rf3,
                        # svm1, svm2, svm3, svm6, svm7, svm8,
                        xgb8, xgb9, xgb10, xgb11, xgb12, xgb13, xgb14, xgb15, xgb16]
pipeline.fitted_classifiers = {}

In [5]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 9.789878368377686 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 9.93915867805481 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 14.563047170639038 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 18.99941921234131 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier4


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier4; Done in 23.052658319473267 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier5


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier5; Done in 24.005728006362915 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier6


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier6; Done in 14.06064248085022 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier7


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier7; Done in 22.45315909385681 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 28.987399339675903 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 28.29932451248169 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier10; Done in 15.661589860916138 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier11


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier11; Done in 26.646110773086548 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier12


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier12; Done in 35.10671377182007 seconds
INFO:utils.ml:Fitting completed in 271.58 seconds.


'list' object has no attribute 'shape'


In [6]:
pipeline.predict_with_classifiers(VAL_PATH, percent)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9


Processed 53/53 batches.


INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 0, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 0, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 1, ..., 1, 0, 0]),
 'XGBClassifier4': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier5': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier7': array([0, 0, 0, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier8': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier9': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 0, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 0, ..., 0, 0, 0], dtype=int64)}

In [7]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics())
df = df.T
df

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8346456692913385, 'precision': 0.8501348298587298, 'recall': 0.6491228202838342, 'f1': 0.6699211081969052, 'kappa': 0.6905156447996881}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.8236220472440945, 'precision': 0.8775834183780041, 'recall': 0.6203255247816093, 'f1': 0.6277905765507396, 'kappa': 0.6681365343785726}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8401574803149606, 'precision': 0.8557009538299897, 'recall': 0.6565072818733668, 'f1': 0.6786787449865921, 'kappa': 0.7011591515011012}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.8440944881889764, 'precision': 0.8568884870061143, 'recall': 0.6562664390065125, 'f1': 0.6768904009047257, 'kappa': 0.7080785144114902}
INFO:utils.ml:Metrics for class

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.834646,0.850135,0.649123,0.669921,0.690516
RandomForestClassifier1,0.823622,0.877583,0.620326,0.627791,0.668137
RandomForestClassifier2,0.840157,0.855701,0.656507,0.678679,0.701159
RandomForestClassifier3,0.844094,0.856888,0.656266,0.67689,0.708079
XGBClassifier4,0.870866,0.879886,0.684989,0.709701,0.758647
XGBClassifier5,0.869291,0.841918,0.693524,0.718701,0.757007
XGBClassifier6,0.867717,0.82026,0.693285,0.718599,0.753618
XGBClassifier7,0.875591,0.846993,0.70932,0.738072,0.76892
XGBClassifier8,0.872441,0.868302,0.68615,0.710117,0.761702


In [8]:
highest = df[df["kappa"] == df.iloc[1:]["kappa"].max()]
idx = highest.index[0]

confusion_matrix(pipeline.predictions["GT"], pipeline.predictions[idx])

array([[450,  43,   5],
       [ 44, 632,   2],
       [ 43,  21,  30]], dtype=int64)

___
# Balance the data

In [9]:
# Backup
pipeline.backup_feature_matrix = pipeline.feature_matrix.copy()
pipeline.backup_labels = pipeline.labels.copy()

In [10]:
print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(5082, 449)
(5082, 449)
(5082,)
(5082,)


In [11]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)


pipeline.feature_matrix, pipeline.labels = smote.fit_resample(np.nan_to_num(pipeline.feature_matrix), pipeline.labels)

In [12]:
pipeline.fitted_classifiers = {}
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 18.917206048965454 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 19.130538940429688 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 28.45483160018921 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 38.126582860946655 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier4


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier4; Done in 29.060487747192383 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier5


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier5; Done in 30.756731271743774 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier6


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier6; Done in 17.686945915222168 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier7


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier7; Done in 28.462621927261353 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 49.25582575798035 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 51.571815967559814 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier10; Done in 27.18480658531189 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier11


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier11; Done in 47.282684087753296 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier12


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier12; Done in 63.33380889892578 seconds
INFO:utils.ml:Fitting completed in 449.23 seconds.


'list' object has no attribute 'shape'


In [13]:
pipeline.predict_with_classifiers(VAL_PATH, percent)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8


Processed 53/53 batches.


INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier4': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier5': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier7': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier8': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier9': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 0, 0, 0], dtype=int64)}

In [14]:
import pandas as pd

df_smote = pd.DataFrame(data=pipeline.calculate_metrics())
df_smote = df_smote.T
df_smote

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8330708661417323, 'precision': 0.7383018935759956, 'recall': 0.7146267868183963, 'f1': 0.7242203559450395, 'kappa': 0.697775183530521}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.8220472440944881, 'precision': 0.7280701754385964, 'recall': 0.728414190244944, 'f1': 0.7277564729912814, 'kappa': 0.6815587666199208}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8362204724409449, 'precision': 0.744320880117659, 'recall': 0.7171264610314729, 'f1': 0.7278714710976802, 'kappa': 0.7033283468776252}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.8322834645669291, 'precision': 0.7436964879039062, 'recall': 0.7170118999817762, 'f1': 0.7278387469207176, 'kappa': 0.6960274902350334}
INFO:utils.ml:Metrics for classifi

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.833071,0.738302,0.714627,0.72422,0.697775
RandomForestClassifier1,0.822047,0.72807,0.728414,0.727756,0.681559
RandomForestClassifier2,0.83622,0.744321,0.717126,0.727871,0.703328
RandomForestClassifier3,0.832283,0.743696,0.717012,0.727839,0.696027
XGBClassifier4,0.874803,0.803956,0.767252,0.78252,0.772297
XGBClassifier5,0.877165,0.802298,0.783821,0.792281,0.777741
XGBClassifier6,0.864567,0.766438,0.738591,0.750177,0.753742
XGBClassifier7,0.874016,0.790848,0.761007,0.773573,0.771259
XGBClassifier8,0.876378,0.807521,0.768413,0.784514,0.775058


In [15]:
highest = df_smote[df_smote["kappa"] == df_smote.iloc[1:]["kappa"].max()]
idx = highest.index[0]

confusion_matrix(pipeline.predictions["GT"], pipeline.predictions[idx])

array([[436,  38,  24],
       [ 45, 626,   7],
       [ 27,  15,  52]], dtype=int64)

___
# Borderline SMOTE

In [16]:
from imblearn.over_sampling import BorderlineSMOTE
smote = BorderlineSMOTE(sampling_strategy='auto', random_state=42)

pipeline.feature_matrix, pipeline.labels = smote.fit_resample(
    np.nan_to_num(pipeline.backup_feature_matrix), pipeline.backup_labels)

print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(5082, 449)
(8139, 449)
(5082,)
(8139,)


In [17]:
pipeline.fitted_classifiers = {}

pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 19.118394136428833 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 18.97667098045349 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 28.525821208953857 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 36.41775441169739 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier4


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier4; Done in 27.839827299118042 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier5


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier5; Done in 30.577001333236694 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier6


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier6; Done in 17.53214716911316 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier7


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier7; Done in 26.743179321289062 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 36.7254912853241 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 35.06117129325867 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier10; Done in 17.984735012054443 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier11


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier11; Done in 33.14340424537659 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier12


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier12; Done in 41.676307916641235 seconds
INFO:utils.ml:Fitting completed in 370.32 seconds.


'list' object has no attribute 'shape'


In [18]:
pipeline.predict_with_classifiers(VAL_PATH, percent)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8


Processed 53/53 batches.


INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 0, 0]),
 'XGBClassifier4': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier5': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier7': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier8': array([0, 0, 2, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier9': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 0, 2, 0], dtype=int64)}

In [19]:
df_borderline_smote = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df_borderline_smote = df_borderline_smote.T
df_borderline_smote

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8456692913385827, 'precision': 0.758533183110682, 'recall': 0.709708601556165, 'f1': 0.7259812537089193, 'kappa': 0.7187281068498723}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.8291338582677166, 'precision': 0.725879732475471, 'recall': 0.7211542460559617, 'f1': 0.723041623445261, 'kappa': 0.6931383421742985}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8448818897637795, 'precision': 0.7656733073790615, 'recall': 0.7117383108029935, 'f1': 0.7297464410378253, 'kappa': 0.716991163255795}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.8488188976377953, 'precision': 0.7766275922885119, 'recall': 0.7174286803902991, 'f1': 0.7371796865316793, 'kappa': 0.7238229857562248}
INFO:utils.ml:Metrics for classifier

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.845669,0.758533,0.709709,0.725981,0.718728
RandomForestClassifier1,0.829134,0.72588,0.721154,0.723042,0.693138
RandomForestClassifier2,0.844882,0.765673,0.711738,0.729746,0.716991
RandomForestClassifier3,0.848819,0.776628,0.717429,0.73718,0.723823
XGBClassifier4,0.873228,0.798155,0.758171,0.774101,0.769351
XGBClassifier5,0.871654,0.793672,0.760065,0.773807,0.767041
XGBClassifier6,0.864567,0.758818,0.748465,0.753167,0.755611
XGBClassifier7,0.873228,0.79951,0.770389,0.782583,0.770492
XGBClassifier8,0.874016,0.806437,0.758485,0.777028,0.770377


In [20]:
highest = df_borderline_smote[df_borderline_smote["kappa"] == df_borderline_smote.iloc[1:]["kappa"].max()]
idx = highest.index[0]

confusion_matrix(pipeline.predictions["GT"], pipeline.predictions[idx])

array([[442,  39,  17],
       [ 45, 625,   8],
       [ 33,  17,  44]], dtype=int64)