In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix, recall_score, cohen_kappa_score, classification_report

from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train"

INFO:numexpr.utils:Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [2]:
percent = 100
random = False
batch_size = 24

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.pad2square(fill=np.nan)
factory.resize((240,240))
factory.normalize2float()

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(GradientExtractor()) # Add gradient feature

strategy.add_extractor(ColorMomentsExtractor("rgb",))   # Add color moments feature
strategy.add_extractor(ColorMomentsExtractor("lab",))   # Add color moments feature
strategy.add_extractor(ColorMomentsExtractor("hsv",))   # Add color moments feature

strategy.add_extractor(LBPExtractor(radius=1, n_points=8))
strategy.add_extractor(LBPExtractor(radius=2, n_points=16))
strategy.add_extractor(LBPExtractor(radius=3, n_points=24))
strategy.add_extractor(LBPExtractor(radius=4, n_points=32))
strategy.add_extractor(LBPExtractor(radius=5, n_points=40))

strategy.add_extractor(FourierTransformExtractor())
strategy.add_extractor(FFTExtractor())

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random, batch_size=batch_size)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\train
INFO:utils.ml:Preprocessing steps


In [3]:
pipeline.feature_matrix = None
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/212 batches.
Processed 10/212 batches.
Processed 15/212 batches.
Processed 20/212 batches.
Processed 25/212 batches.
Processed 30/212 batches.
Processed 35/212 batches.
Processed 40/212 batches.
Processed 45/212 batches.
Processed 50/212 batches.
Processed 55/212 batches.
Processed 60/212 batches.
Processed 65/212 batches.
Processed 70/212 batches.
Processed 75/212 batches.
Processed 80/212 batches.
Processed 85/212 batches.
Processed 90/212 batches.
Processed 95/212 batches.
Processed 100/212 batches.
Processed 105/212 batches.
Processed 110/212 batches.
Processed 115/212 batches.
Processed 120/212 batches.
Processed 125/212 batches.
Processed 130/212 batches.
Processed 135/212 batches.
Processed 140/212 batches.
Processed 145/212 batches.
Processed 150/212 batches.
Processed 155/212 batches.
Processed 160/212 batches.
Processed 165/212 batches.
Processed 170/212 batches.
Processed 175/212 batches.
Processed 180/212 batches.
Processed 185/212 batches.
Processed 190/212 bat

INFO:utils.ml:Feature extraction completed. Extracted 5082 features.


Processed 212/212 batches.


In [4]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

# Random Forest with Bootstrap Disabled
rf0 = RandomForestClassifier(n_estimators=100, bootstrap=False, random_state=42)
# Random Forest with Out-of-Bag Error (OOB)
rf1 = RandomForestClassifier(n_estimators=150, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf2 = RandomForestClassifier(n_estimators=150, bootstrap=False, random_state=42)
# Random Forest with Bootstrap Disabled
rf3 = RandomForestClassifier(n_estimators=200, bootstrap=False, random_state=42)

xgb4 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7) # 2ND
xgb5 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb6 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb7 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb8 = XGBClassifier(learning_rate=0.1, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)

xgb9 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb10 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb11 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb12 = XGBClassifier(learning_rate=0.1, n_estimators=750, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)

xgb13 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb14 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8) # THIS
xgb15 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb16 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)



pipeline.classifiers = [rf0, rf1, rf2, rf3,
                        # svm1, svm2, svm3, svm6, svm7, svm8,
                        xgb8, xgb9, xgb10, xgb11, xgb12, xgb13, xgb14, xgb15, xgb16]
pipeline.fitted_classifiers = {}

In [5]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 11.62134861946106 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 12.594945192337036 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 17.977266311645508 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 26.157633781433105 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier4


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier4; Done in 35.68646478652954 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier5


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier5; Done in 36.88324475288391 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier6


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier6; Done in 21.292981147766113 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier7


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier7; Done in 34.51046657562256 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 81.51508021354675 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 91.8739275932312 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier10; Done in 50.23602366447449 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier11


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier11; Done in 147.32272934913635 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier12


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier12; Done in 184.80187320709229 seconds
INFO:utils.ml:Fitting completed in 752.47 seconds.


'list' object has no attribute 'shape'


In [6]:
pipeline.predict_with_classifiers(VAL_PATH, percent)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3


Processed 53/53 batches.


INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier1': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 1, ..., 1, 0, 0]),
 'RandomForestClassifier3': array([0, 0, 1, ..., 1, 0, 0]),
 'XGBClassifier4': array([0, 0, 0, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier5': array([0, 0, 0, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier7': array([0, 0, 0, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier8': array([0, 0, 0, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier9': array([0, 0, 0, ..., 1, 0, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 0, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 0, ..., 1, 2, 0], dtype=int64)}

In [7]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics())
df = df.T
df

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8330708661417323, 'precision': 0.8371003321460903, 'recall': 0.6332226540001719, 'f1': 0.6442892315275682, 'kappa': 0.6866372748476482}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.8236220472440945, 'precision': 0.8778598539230957, 'recall': 0.6151051201605517, 'f1': 0.6168326606191982, 'kappa': 0.6683810260977419}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8362204724409449, 'precision': 0.8450085468318136, 'recall': 0.6412981367508571, 'f1': 0.656596256891353, 'kappa': 0.6927743352127878}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.8377952755905512, 'precision': 0.8650558746179421, 'recall': 0.6424591228782576, 'f1': 0.6582313615011299, 'kappa': 0.695585394801145}
INFO:utils.ml:Metrics for classif

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.833071,0.8371,0.633223,0.644289,0.686637
RandomForestClassifier1,0.823622,0.87786,0.615105,0.616833,0.668381
RandomForestClassifier2,0.83622,0.845009,0.641298,0.656596,0.692774
RandomForestClassifier3,0.837795,0.865056,0.642459,0.658231,0.695585
XGBClassifier4,0.874016,0.861224,0.708337,0.738553,0.765971
XGBClassifier5,0.868504,0.85907,0.707416,0.738604,0.755846
XGBClassifier6,0.870079,0.850614,0.70499,0.734824,0.758469
XGBClassifier7,0.866929,0.863552,0.700147,0.73048,0.752525
XGBClassifier8,0.873228,0.862376,0.710722,0.741938,0.76452


In [8]:
highest = df[df["kappa"] == df.iloc[1:]["kappa"].max()]
idx = highest.index[0]

confusion_matrix(pipeline.predictions["GT"], pipeline.predictions[idx])

array([[451,  43,   4],
       [ 47, 630,   1],
       [ 43,  22,  29]], dtype=int64)

___
# Balance the data

In [9]:
# Backup
pipeline.backup_feature_matrix = pipeline.feature_matrix.copy()
pipeline.backup_labels = pipeline.labels.copy()

In [10]:
print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(5082, 575)
(5082, 575)
(5082,)
(5082,)


In [12]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)


pipeline.feature_matrix, pipeline.labels = smote.fit_resample(np.nan_to_num(pipeline.feature_matrix), pipeline.labels)

In [13]:
pipeline.fitted_classifiers = {}
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 34.538575887680054 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 35.69594883918762 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 53.216355323791504 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 67.01090383529663 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier4


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier4; Done in 78.19164228439331 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier5


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier5; Done in 101.76847815513611 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier6


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier6; Done in 60.98328185081482 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier7


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier7; Done in 79.27406692504883 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 66.67689085006714 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 82.40269589424133 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier10; Done in 64.83668684959412 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier11


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier11; Done in 125.18765187263489 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier12


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier12; Done in 122.34761166572571 seconds
INFO:utils.ml:Fitting completed in 972.16 seconds.


'list' object has no attribute 'shape'


In [14]:
pipeline.predict_with_classifiers(VAL_PATH, percent)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6


Processed 53/53 batches.


INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 2, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 0, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 2, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 2, 0]),
 'XGBClassifier4': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier5': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier7': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier8': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier9': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 1, 2, 0], dtype=int64)}

In [15]:
import pandas as pd

df_smote = pd.DataFrame(data=pipeline.calculate_metrics())
df_smote = df_smote.T
df_smote

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8346456692913385, 'precision': 0.7385549184963662, 'recall': 0.7251288465227255, 'f1': 0.7307646821673245, 'kappa': 0.7020537844866879}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.8236220472440945, 'precision': 0.7208206472426656, 'recall': 0.7195232949623134, 'f1': 0.7198468262984393, 'kappa': 0.6837297828532802}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8354330708661417, 'precision': 0.7496725255882598, 'recall': 0.7349615621842167, 'f1': 0.7410891335309939, 'kappa': 0.7037108973843778}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.841732283464567, 'precision': 0.7632529521902027, 'recall': 0.7423045599834851, 'f1': 0.7510249656003661, 'kappa': 0.7144937456380522}
INFO:utils.ml:Metrics for classi

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.834646,0.738555,0.725129,0.730765,0.702054
RandomForestClassifier1,0.823622,0.720821,0.719523,0.719847,0.68373
RandomForestClassifier2,0.835433,0.749673,0.734962,0.741089,0.703711
RandomForestClassifier3,0.841732,0.763253,0.742305,0.751025,0.714494
XGBClassifier4,0.873228,0.788992,0.772378,0.779928,0.770802
XGBClassifier5,0.874016,0.800282,0.770526,0.783145,0.771594
XGBClassifier6,0.87874,0.801004,0.785338,0.792486,0.781043
XGBClassifier7,0.877953,0.797974,0.775683,0.785542,0.779044
XGBClassifier8,0.877953,0.796326,0.775683,0.784933,0.77901


In [16]:
highest = df_smote[df_smote["kappa"] == df_smote.iloc[1:]["kappa"].max()]
idx = highest.index[0]

confusion_matrix(pipeline.predictions["GT"], pipeline.predictions[idx])

array([[440,  33,  25],
       [ 45, 627,   6],
       [ 30,  12,  52]], dtype=int64)

___
# Borderline SMOTE

In [17]:
from imblearn.over_sampling import BorderlineSMOTE
smote = BorderlineSMOTE(sampling_strategy='auto', random_state=42)

pipeline.feature_matrix, pipeline.labels = smote.fit_resample(
    np.nan_to_num(pipeline.backup_feature_matrix), pipeline.backup_labels)

print(pipeline.backup_feature_matrix.shape)
print(pipeline.feature_matrix.shape)

print(pipeline.backup_labels.shape)
print(pipeline.labels.shape)

(5082, 575)
(8139, 575)
(5082,)
(8139,)


In [18]:
pipeline.fitted_classifiers = {}

pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 24.1924045085907 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 23.540268182754517 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 36.463462829589844 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 54.31857109069824 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier4


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier4; Done in 50.940120697021484 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier5


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier5; Done in 55.51070690155029 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier6


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier6; Done in 27.62213659286499 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier7


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier7; Done in 38.590735912323 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 51.78501224517822 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 51.77098798751831 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier10; Done in 28.229331970214844 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier11


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier11; Done in 50.00513434410095 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier12


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier12; Done in 62.14029312133789 seconds
INFO:utils.ml:Fitting completed in 555.11 seconds.


'list' object has no attribute 'shape'


In [19]:
pipeline.predict_with_classifiers(VAL_PATH, percent)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\multiclass\val


Processed 5/53 batches.
Processed 10/53 batches.
Processed 15/53 batches.
Processed 20/53 batches.
Processed 25/53 batches.
Processed 30/53 batches.
Processed 35/53 batches.
Processed 40/53 batches.
Processed 45/53 batches.
Processed 50/53 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: XGBClassifier4
INFO:utils.ml:Predictions made with classifier: XGBClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8


Processed 53/53 batches.


INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12


{'GT': array([0, 0, 0, ..., 2, 2, 2]),
 'RandomForestClassifier0': array([0, 0, 2, ..., 1, 2, 0]),
 'RandomForestClassifier1': array([0, 0, 2, ..., 1, 2, 0]),
 'RandomForestClassifier2': array([0, 0, 2, ..., 1, 2, 0]),
 'RandomForestClassifier3': array([0, 0, 2, ..., 1, 2, 0]),
 'XGBClassifier4': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier5': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier6': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier7': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier8': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier9': array([0, 0, 0, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier10': array([0, 0, 2, ..., 1, 2, 0], dtype=int64),
 'XGBClassifier11': array([0, 0, 2, ..., 0, 2, 0], dtype=int64),
 'XGBClassifier12': array([0, 0, 2, ..., 1, 2, 0], dtype=int64)}

In [20]:
df_borderline_smote = pd.DataFrame(data=pipeline.calculate_metrics(
    ["accuracy", "precision", "recall", "f1", "kappa"],))
df_borderline_smote = df_borderline_smote.T
df_borderline_smote

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8354330708661417, 'precision': 0.7545066977840088, 'recall': 0.7168125209051861, 'f1': 0.7309485246077422, 'kappa': 0.7012847580162327}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.8307086614173228, 'precision': 0.7395689197777174, 'recall': 0.7314786038020152, 'f1': 0.7347017160165189, 'kappa': 0.6960586132124618}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8401574803149606, 'precision': 0.7550621203976208, 'recall': 0.7256935858667205, 'f1': 0.7374396614933398, 'kappa': 0.7103427664900477}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.8393700787401575, 'precision': 0.7462055294090083, 'recall': 0.7189153274114471, 'f1': 0.7297087046373831, 'kappa': 0.7090335709761324}
INFO:utils.ml:Metrics for class

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.835433,0.754507,0.716813,0.730949,0.701285
RandomForestClassifier1,0.830709,0.739569,0.731479,0.734702,0.696059
RandomForestClassifier2,0.840157,0.755062,0.725694,0.73744,0.710343
RandomForestClassifier3,0.83937,0.746206,0.718915,0.729709,0.709034
XGBClassifier4,0.879528,0.798377,0.782953,0.79001,0.782377
XGBClassifier5,0.88189,0.817062,0.793946,0.804306,0.786268
XGBClassifier6,0.876378,0.795367,0.786917,0.790864,0.777315
XGBClassifier7,0.880315,0.811866,0.783977,0.796123,0.78308
XGBClassifier8,0.88189,0.80371,0.787304,0.794872,0.786429


In [34]:
from sklearn.metrics import classification_report
highest = df_borderline_smote[df_borderline_smote["kappa"] == df_borderline_smote.iloc[1:]["kappa"].max()]
idx = highest.index[0]

print(confusion_matrix(pipeline.predictions["GT"], pipeline.predictions["XGBClassifier9"]))
idx

[[441  38  19]
 [ 46 624   8]
 [ 26  14  54]]


'XGBClassifier8'