In [1]:
from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\train"

INFO:numexpr.utils:Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [2]:
percent = 100
random = False
batch_size = 24

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.pad2square(fill=np.nan)
factory.resize((200,200))
factory.normalize2float()

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(GradientExtractor()) # Add gradient feature

strategy.add_extractor(ColorMomentsExtractor("rgb", ))   # Add color moments feature
strategy.add_extractor(ColorMomentsExtractor("lab", ))   # Add color moments feature
strategy.add_extractor(ColorMomentsExtractor("hsv", ))   # Add color moments feature

strategy.add_extractor(LBPExtractor(radius=1, n_points=8))
strategy.add_extractor(LBPExtractor(radius=2, n_points=16))
strategy.add_extractor(LBPExtractor(radius=3, n_points=24))
strategy.add_extractor(LBPExtractor(radius=4, n_points=32))
strategy.add_extractor(LBPExtractor(radius=5, n_points=40))

strategy.add_extractor(FourierTransformExtractor())
strategy.add_extractor(FFTExtractor())

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', "ASM"]))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random, batch_size=batch_size)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\train
INFO:utils.ml:Preprocessing steps


In [3]:
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/634 batches.
Processed 10/634 batches.
Processed 15/634 batches.
Processed 20/634 batches.
Processed 25/634 batches.
Processed 30/634 batches.
Processed 35/634 batches.
Processed 40/634 batches.
Processed 45/634 batches.
Processed 50/634 batches.
Processed 55/634 batches.
Processed 60/634 batches.
Processed 65/634 batches.
Processed 70/634 batches.
Processed 75/634 batches.


  skew_val = skew(channel.flatten(), nan_policy='omit')
  kurtosis_val = kurtosis(channel.flatten(), nan_policy='omit')


Processed 80/634 batches.
Processed 85/634 batches.
Processed 90/634 batches.
Processed 95/634 batches.
Processed 100/634 batches.
Processed 105/634 batches.
Processed 110/634 batches.
Processed 115/634 batches.
Processed 120/634 batches.
Processed 125/634 batches.
Processed 130/634 batches.
Processed 135/634 batches.
Processed 140/634 batches.
Processed 145/634 batches.
Processed 150/634 batches.
Processed 155/634 batches.
Processed 160/634 batches.
Processed 165/634 batches.
Processed 170/634 batches.
Processed 175/634 batches.
Processed 180/634 batches.
Processed 185/634 batches.
Processed 190/634 batches.
Processed 195/634 batches.
Processed 200/634 batches.
Processed 205/634 batches.
Processed 210/634 batches.
Processed 215/634 batches.
Processed 220/634 batches.
Processed 225/634 batches.
Processed 230/634 batches.
Processed 235/634 batches.
Processed 240/634 batches.
Processed 245/634 batches.
Processed 250/634 batches.
Processed 255/634 batches.
Processed 260/634 batches.
Proce

INFO:utils.ml:Feature extraction completed. Extracted 15195 features.


Processed 634/634 batches.


In [4]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

rf0 = RandomForestClassifier(n_estimators=300, max_features='sqrt', random_state=42) # Random Forest with Feature Selection via max_features
rf1 = RandomForestClassifier(n_estimators=300, oob_score=True, random_state=42) # Random Forest with Out-of-Bag Error (OOB)
rf2 = RandomForestClassifier(n_estimators=300, bootstrap=False, random_state=42) # Random Forest with Bootstrap Disabled
rf3 = RandomForestClassifier(n_estimators=100, max_features='sqrt', random_state=42) # Random Forest with Feature Selection via max_features
rf4 = RandomForestClassifier(n_estimators=100, oob_score=True, random_state=42) # Random Forest with Out-of-Bag Error (OOB)
rf5 = RandomForestClassifier(n_estimators=100, bootstrap=False, random_state=42) # Random Forest with Bootstrap Disabled

xgb6 = XGBClassifier(n_estimators=350)
xgb7 = XGBClassifier(n_estimators=450)
xgb8 = XGBClassifier(n_estimators=550)
xgb9 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb10 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb11 = XGBClassifier(learning_rate=0.1, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb12 = XGBClassifier(learning_rate=0.1, n_estimators=500, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb13 = XGBClassifier(learning_rate=0.1, n_estimators=1250, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb14 = XGBClassifier(learning_rate=0.1, n_estimators=1250, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb15 = XGBClassifier(learning_rate=0.1, n_estimators=1250, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb16 = XGBClassifier(learning_rate=0.1, n_estimators=1250, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb17 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb18 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb19 = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb20 = XGBClassifier(learning_rate=0.1, n_estimators=1000, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)



pipeline.classifiers = [rf0, rf1, rf2, rf3, rf4, rf5, 
                        # svm1, svm2, svm3, svm6, svm7, svm8,
                       xgb6, xgb7, xgb8, xgb9, xgb10, xgb11, xgb12, xgb13, xgb14, xgb15, xgb16, xgb17, xgb18, xgb19, xgb20 ]
pipeline.fitted_classifiers = {}

In [5]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 119.9068911075592 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 121.99450850486755 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 179.1523401737213 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 39.61761927604675 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 38.024558544158936 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 55.522435665130615 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier6


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier6; Done in 18.77456021308899 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier7


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier7; Done in 24.09827995300293 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier8


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier8; Done in 28.15670371055603 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 25.07862138748169 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier10; Done in 29.228170156478882 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier11


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier11; Done in 35.02079463005066 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier12


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier12; Done in 27.82181143760681 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier13


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier13; Done in 73.63275504112244 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier14


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier14; Done in 64.63442420959473 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier15


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier15; Done in 50.57254600524902 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier16


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier16; Done in 41.89902949333191 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier17


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier17; Done in 34.68984007835388 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier18


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier18; Done in 40.65733242034912 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier19


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier19; Done in 40.80505108833313 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier20


'list' object has no attribute 'shape'


INFO:utils.ml:Fitted classifier: XGBClassifier20; Done in 33.83465766906738 seconds
INFO:utils.ml:Fitting completed in 1123.15 seconds.


'list' object has no attribute 'shape'


In [6]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\val


Processed 5/159 batches.
Processed 10/159 batches.
Processed 15/159 batches.
Processed 20/159 batches.
Processed 25/159 batches.
Processed 30/159 batches.
Processed 35/159 batches.
Processed 40/159 batches.
Processed 45/159 batches.
Processed 50/159 batches.
Processed 55/159 batches.
Processed 60/159 batches.
Processed 65/159 batches.
Processed 70/159 batches.
Processed 75/159 batches.
Processed 80/159 batches.
Processed 85/159 batches.
Processed 90/159 batches.
Processed 95/159 batches.
Processed 100/159 batches.
Processed 105/159 batches.
Processed 110/159 batches.
Processed 115/159 batches.
Processed 120/159 batches.
Processed 125/159 batches.
Processed 130/159 batches.
Processed 135/159 batches.
Processed 140/159 batches.
Processed 145/159 batches.
Processed 150/159 batches.
Processed 155/159 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0


Processed 159/159 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: XGBClassifier6
INFO:utils.ml:Predictions made with classifier: XGBClassifier7
INFO:utils.ml:Predictions made with classifier: XGBClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO

{'GT': array([0, 0, 0, ..., 1, 1, 1]),
 'RandomForestClassifier0': array([0, 0, 0, ..., 0, 0, 1]),
 'RandomForestClassifier1': array([0, 0, 0, ..., 0, 0, 1]),
 'RandomForestClassifier2': array([0, 0, 0, ..., 0, 0, 1]),
 'RandomForestClassifier3': array([0, 0, 0, ..., 0, 0, 1]),
 'RandomForestClassifier4': array([0, 0, 0, ..., 0, 0, 1]),
 'RandomForestClassifier5': array([0, 0, 0, ..., 0, 0, 1]),
 'XGBClassifier6': array([0, 0, 0, ..., 0, 1, 1]),
 'XGBClassifier7': array([0, 0, 0, ..., 0, 1, 1]),
 'XGBClassifier8': array([0, 0, 0, ..., 0, 1, 1]),
 'XGBClassifier9': array([0, 0, 0, ..., 1, 0, 1]),
 'XGBClassifier10': array([0, 0, 0, ..., 0, 0, 1]),
 'XGBClassifier11': array([0, 0, 0, ..., 0, 0, 1]),
 'XGBClassifier12': array([0, 0, 1, ..., 0, 0, 1]),
 'XGBClassifier13': array([0, 0, 0, ..., 1, 0, 1]),
 'XGBClassifier14': array([0, 0, 0, ..., 0, 0, 1]),
 'XGBClassifier15': array([0, 0, 0, ..., 0, 0, 1]),
 'XGBClassifier16': array([0, 0, 0, ..., 0, 0, 1]),
 'XGBClassifier17': array([0, 0, 

In [12]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics())
df = df.T
df

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8242887249736565, 'precision': 0.8242495287163645, 'recall': 0.8242035201030735, 'f1': 0.824223718952559, 'kappa': 0.6484486338002668}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.8242887249736565, 'precision': 0.8242495287163645, 'recall': 0.8242035201030735, 'f1': 0.824223718952559, 'kappa': 0.6484486338002668}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8361433087460485, 'precision': 0.8361429143253492, 'recall': 0.836020453639851, 'f1': 0.8360668121374513, 'kappa': 0.6721394517365231}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.821390937829294, 'precision': 0.8213893926973226, 'recall': 0.8212544584408751, 'f1': 0.8213034349448987, 'kappa': 0.6426149096250445}
INFO:utils.ml:Metrics for classifie

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.824289,0.82425,0.824204,0.824224,0.648449
RandomForestClassifier1,0.824289,0.82425,0.824204,0.824224,0.648449
RandomForestClassifier2,0.836143,0.836143,0.83602,0.836067,0.672139
RandomForestClassifier3,0.821391,0.821389,0.821254,0.821303,0.642615
RandomForestClassifier4,0.821391,0.821389,0.821254,0.821303,0.642615
RandomForestClassifier5,0.830348,0.830373,0.830196,0.830257,0.660524
XGBClassifier6,0.83746,0.837558,0.837636,0.837457,0.674969
XGBClassifier7,0.840358,0.840571,0.840594,0.840358,0.680801
XGBClassifier8,0.841675,0.841878,0.841907,0.841675,0.683432


In [8]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

highest = df[df["accuracy"] == df.iloc[1:]["accuracy"].max()]
idx = highest.index[0]

print(classification_report(pipeline.predictions["GT"], pipeline.predictions[idx]))
print(confusion_matrix(pipeline.predictions["GT"], pipeline.predictions[idx]))


              precision    recall  f1-score   support

           0       0.85      0.85      0.85      1931
           1       0.85      0.84      0.84      1865

    accuracy                           0.85      3796
   macro avg       0.85      0.85      0.85      3796
weighted avg       0.85      0.85      0.85      3796

[[1645  286]
 [ 298 1567]]


In [10]:
df_sorted = df.sort_values("accuracy", ascending=False)
df_sorted.head(8).index
df_sorted

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
XGBClassifier15,0.846154,0.846142,0.846052,0.846089,0.692181
XGBClassifier14,0.846154,0.846142,0.846052,0.846089,0.692181
XGBClassifier19,0.8451,0.84508,0.845007,0.845038,0.690078
XGBClassifier18,0.8451,0.84508,0.845007,0.845038,0.690078
XGBClassifier16,0.8451,0.845096,0.844989,0.845031,0.690067
XGBClassifier17,0.842466,0.842422,0.842409,0.842415,0.68483
XGBClassifier13,0.842466,0.842418,0.842418,0.842418,0.684836
XGBClassifier9,0.842202,0.84215,0.842168,0.842159,0.684318
XGBClassifier20,0.841939,0.841911,0.841854,0.841879,0.683759
