In [8]:
from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\train"

In [9]:
percent = 100
random = False
batch_size = 48

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.gaussian_smoothing(5)
factory.clahe(clip_limit=1.5)
factory.pad2square(fill=np.nan)
factory.resize((150,150))
factory.hair_removal()
factory.normalize2float()

factory_loader = FactoryLoader(path=VAL_PATH, batch_size=batch_size, factory=factory, percentage=percent, shuffle=random)

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(GradientExtractor())   # Add variance feature

# strategy.add_extractor(MeanExtractor())  # Add mean feature
# strategy.add_extractor(StdExtractor())   # Add standard deviation feature
strategy.add_extractor(VarExtractor())   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor())   # Add color moments feature


# strategy.add_extractor(MeanExtractor("lab"))  # Add mean feature
# strategy.add_extractor(StdExtractor("lab"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("lab"))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("lab"))   # Add color moments feature

# strategy.add_extractor(MeanExtractor("hsv"))  # Add mean feature
# strategy.add_extractor(StdExtractor("hsv"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("hsv"))   # Add variance feature
strategy.add_extractor(ColorMomentsExtractor("hsv"))   # Add color moments feature

# strategy.add_extractor(LBPExtractor(radius=1, n_points=16))
# strategy.add_extractor(LBPExtractor(radius=2, n_points=16))
# strategy.add_extractor(LBPExtractor(radius=3, n_points=16))
# strategy.add_extractor(LBPExtractor(radius=1, n_points=32))
# strategy.add_extractor(LBPExtractor(radius=2, n_points=32))
# strategy.add_extractor(LBPExtractor(radius=3, n_points=32))
strategy.add_extractor(LBPExtractor(radius=1, n_points=8))
strategy.add_extractor(LBPExtractor(radius=2, n_points=16))
strategy.add_extractor(LBPExtractor(radius=3, n_points=24))
strategy.add_extractor(LBPExtractor(radius=4, n_points=32))

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation',]))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random, batch_size=batch_size)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\train
INFO:utils.ml:Preprocessing steps


In [10]:
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/317 batches.
Processed 10/317 batches.
Processed 15/317 batches.
Processed 20/317 batches.
Processed 25/317 batches.
Processed 30/317 batches.
Processed 35/317 batches.
Processed 40/317 batches.
Processed 45/317 batches.
Processed 50/317 batches.
Processed 55/317 batches.
Processed 60/317 batches.
Processed 65/317 batches.
Processed 70/317 batches.
Processed 75/317 batches.
Processed 80/317 batches.
Processed 85/317 batches.
Processed 90/317 batches.
Processed 95/317 batches.
Processed 100/317 batches.
Processed 105/317 batches.
Processed 110/317 batches.
Processed 115/317 batches.
Processed 120/317 batches.
Processed 125/317 batches.
Processed 130/317 batches.
Processed 135/317 batches.
Processed 140/317 batches.
Processed 145/317 batches.
Processed 150/317 batches.
Processed 155/317 batches.
Processed 160/317 batches.
Processed 165/317 batches.
Processed 170/317 batches.
Processed 175/317 batches.
Processed 180/317 batches.
Processed 185/317 batches.
Processed 190/317 bat

INFO:utils.ml:Feature extraction completed. Extracted 15195 features.


Processed 317/317 batches.


In [11]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

rf1 = RandomForestClassifier(n_estimators=100)
rf2 = RandomForestClassifier(n_estimators=150)
rf3 = RandomForestClassifier(n_estimators=250)

# Random Forest with Class Weight Balancing
rf4 = RandomForestClassifier(n_estimators=100, class_weight="balanced", random_state=42)
# Random Forest with Feature Selection via max_features
rf5 = RandomForestClassifier(n_estimators=100, max_features='sqrt', random_state=42)
# Random Forest with Min Samples per Leaf
rf6 = RandomForestClassifier(n_estimators=100, min_samples_leaf=5, random_state=42)
# Random Forest with Out-of-Bag Error (OOB)
rf7 = RandomForestClassifier(n_estimators=100, oob_score=True, random_state=42)
# Random Forest with Bootstrap Disabled
rf8 = RandomForestClassifier(n_estimators=100, bootstrap=False, random_state=42)
# Random Forest with High Number of Trees and Limited Depth
rf9 = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)

xgb1 = XGBClassifier(n_estimators=350)
xgb2 = XGBClassifier(n_estimators=450)
xgb3 = XGBClassifier(n_estimators=550)
xgb4 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=3, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb5 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=3, subsample=0.7, colsample_bytree=0.7)
xgb6 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=5, gamma=0.2, subsample=0.8, colsample_bytree=0.8)
xgb7 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=5, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1)
xgb8 = XGBClassifier(learning_rate=0.05, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb9 = XGBClassifier(learning_rate=0.05, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb10 = XGBClassifier(learning_rate=0.1, n_estimators=400, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)
xgb11 = XGBClassifier(learning_rate=0.1, n_estimators=500, max_depth=7, min_child_weight=1, subsample=0.8, colsample_bytree=0.8)

pipeline.classifiers = [rf1, rf2, rf3, rf4, rf5, rf6, rf7, rf8, rf9,
                        # svm1, svm2, svm3, svm6, svm7, svm8,
                        xgb1, xgb2, xgb3, xgb4, xgb5, xgb6, xgb7, xgb8, xgb9, xgb10, xgb11]
pipeline.fitted_classifiers = {}

In [12]:
pipeline.fit_classifiers()

INFO:utils.ml:Fitting classifiers...
INFO:utils.ml:Fitting classifier: RandomForestClassifier0
INFO:utils.ml:Fitted classifier: RandomForestClassifier0; Done in 12.84220838546753 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier1


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier1; Done in 19.7077956199646 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier2


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier2; Done in 32.533379316329956 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier3


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier3; Done in 13.058355808258057 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier4


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier4; Done in 12.8689706325531 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier5


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier5; Done in 10.565808773040771 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier6


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier6; Done in 13.142911434173584 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier7


list index out of range


INFO:utils.ml:Fitted classifier: RandomForestClassifier7; Done in 18.813143253326416 seconds
INFO:utils.ml:Fitting classifier: RandomForestClassifier8


list index out of range


INFO:utils.ml:Top 10 features for RandomForestClassifier8: [('lbp_rad2_bins64_10', 0.04959506148998771), ('color_moments_lab_channel_2_std', 0.03601970841756416), ('color_moments_lab_channel_0_std', 0.035481840307283234), ('lbp_rad3_bins64_10', 0.034724624807077345), ('lbp_rad1_bins64_10', 0.032731239640340005), ('lbp_rad1_bins64_40', 0.031967272872539164), ('lbp_rad2_bins64_40', 0.03184603822964966), ('lbp_rad1_bins64_43', 0.026985478245191596), ('lbp_rad1_bins64_41', 0.02421066538319526), ('lbp_rad1_bins64_37', 0.023272663639091896)]
INFO:utils.ml:Fitted classifier: RandomForestClassifier8; Done in 4.036341428756714 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier9
INFO:utils.ml:Fitted classifier: XGBClassifier9; Done in 3.2836296558380127 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier10


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier10; Done in 4.083376407623291 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier11


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier11; Done in 5.101630687713623 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier12


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier12; Done in 1.8052551746368408 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier13


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier13; Done in 4.206547498703003 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier14


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier14; Done in 2.5418179035186768 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier15


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier15; Done in 2.6903717517852783 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier16


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier16; Done in 6.258235216140747 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier17


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier17; Done in 5.093478441238403 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier18


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier18; Done in 4.815093517303467 seconds
INFO:utils.ml:Fitting classifier: XGBClassifier19


list index out of range


INFO:utils.ml:Fitted classifier: XGBClassifier19; Done in 5.891888380050659 seconds
INFO:utils.ml:Fitting completed in 183.37 seconds.


list index out of range


In [13]:
pipeline.predict_with_classifiers(VAL_PATH)

INFO:utils.ml:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\val


Processed 5/80 batches.
Processed 10/80 batches.
Processed 15/80 batches.
Processed 20/80 batches.
Processed 25/80 batches.
Processed 30/80 batches.
Processed 35/80 batches.
Processed 40/80 batches.
Processed 45/80 batches.
Processed 50/80 batches.
Processed 55/80 batches.
Processed 60/80 batches.
Processed 65/80 batches.
Processed 70/80 batches.
Processed 75/80 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier0
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier1


Processed 80/80 batches.


INFO:utils.ml:Predictions made with classifier: RandomForestClassifier2
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier3
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier4
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier5
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier6
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier7
INFO:utils.ml:Predictions made with classifier: RandomForestClassifier8
INFO:utils.ml:Predictions made with classifier: XGBClassifier9
INFO:utils.ml:Predictions made with classifier: XGBClassifier10
INFO:utils.ml:Predictions made with classifier: XGBClassifier11
INFO:utils.ml:Predictions made with classifier: XGBClassifier12
INFO:utils.ml:Predictions made with classifier: XGBClassifier13
INFO:utils.ml:Predictions made with classifier: XGBClassifier14
INFO:utils.ml:Predictions made with classifier: XGBClassifier15
INFO:utils.ml:Predictions made with classifier: X

{'GT': array([0, 0, 0, ..., 1, 1, 1]),
 'RandomForestClassifier0': array([0, 0, 0, ..., 0, 0, 1]),
 'RandomForestClassifier1': array([0, 0, 1, ..., 0, 0, 1]),
 'RandomForestClassifier2': array([0, 0, 0, ..., 0, 0, 1]),
 'RandomForestClassifier3': array([0, 0, 1, ..., 0, 0, 1]),
 'RandomForestClassifier4': array([0, 0, 0, ..., 0, 0, 1]),
 'RandomForestClassifier5': array([0, 0, 1, ..., 0, 0, 1]),
 'RandomForestClassifier6': array([0, 0, 0, ..., 0, 0, 1]),
 'RandomForestClassifier7': array([0, 0, 1, ..., 0, 0, 1]),
 'RandomForestClassifier8': array([0, 0, 1, ..., 0, 0, 1]),
 'XGBClassifier9': array([0, 0, 1, ..., 0, 0, 1]),
 'XGBClassifier10': array([0, 0, 1, ..., 0, 0, 1]),
 'XGBClassifier11': array([0, 0, 1, ..., 0, 0, 1]),
 'XGBClassifier12': array([0, 0, 0, ..., 0, 0, 1]),
 'XGBClassifier13': array([0, 0, 1, ..., 0, 0, 1]),
 'XGBClassifier14': array([0, 0, 0, ..., 1, 0, 1]),
 'XGBClassifier15': array([0, 0, 0, ..., 1, 0, 1]),
 'XGBClassifier16': array([0, 0, 0, ..., 0, 0, 1]),
 'XGBC

In [14]:
import pandas as pd

df = pd.DataFrame(data=pipeline.calculate_metrics())
df.T

INFO:utils.ml:Metrics for classifier GT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'kappa': 1.0}
INFO:utils.ml:Metrics for classifier RandomForestClassifier0: {'accuracy': 0.8013698630136986, 'precision': 0.8014435482415778, 'recall': 0.8015205556859091, 'f1': 0.8013643490388374, 'kappa': 0.6027870289852457}
INFO:utils.ml:Metrics for classifier RandomForestClassifier1: {'accuracy': 0.8113804004214963, 'precision': 0.8114412443442116, 'recall': 0.8115249568560373, 'f1': 0.8113740647250647, 'kappa': 0.6227988099147128}
INFO:utils.ml:Metrics for classifier RandomForestClassifier2: {'accuracy': 0.8024236037934668, 'precision': 0.8024597797829476, 'recall': 0.8025471251473419, 'f1': 0.8024143344340389, 'kappa': 0.604872544679103}
INFO:utils.ml:Metrics for classifier RandomForestClassifier3: {'accuracy': 0.797945205479452, 'precision': 0.797893090456028, 'recall': 0.7979619944381426, 'f1': 0.7979115325203379, 'kappa': 0.595831172422371}
INFO:utils.ml:Metrics for classifie

cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated with a value
cannot access local variable 'report' where it is not associated

Unnamed: 0,accuracy,precision,recall,f1,kappa
GT,1.0,1.0,1.0,1.0,1.0
RandomForestClassifier0,0.80137,0.801444,0.801521,0.801364,0.602787
RandomForestClassifier1,0.81138,0.811441,0.811525,0.811374,0.622799
RandomForestClassifier2,0.802424,0.80246,0.802547,0.802414,0.604873
RandomForestClassifier3,0.797945,0.797893,0.797962,0.797912,0.595831
RandomForestClassifier4,0.800053,0.800053,0.800143,0.800038,0.600105
RandomForestClassifier5,0.799262,0.799447,0.799477,0.799262,0.598624
RandomForestClassifier6,0.800053,0.800053,0.800143,0.800038,0.600105
RandomForestClassifier7,0.815068,0.815083,0.815178,0.815057,0.630147
RandomForestClassifier8,0.758957,0.759465,0.759301,0.758946,0.51817
