In [1]:
from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\train"

In [2]:
percent = 100
random = False

# Initialize the FactoryLoader
factory = PreprocessingFactory()
factory.gaussian_smoothing(5)
factory.clahe()
factory.hair_removal()
factory.normalize2float()
factory.pad2square(fill=np.nan)
factory.resize((200,200))

factory_loader = FactoryLoader(path=VAL_PATH, batch_size=24, factory=factory, percentage=percent, shuffle=random)

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()

strategy.add_extractor(MeanExtractor())  # Add mean feature
strategy.add_extractor(StdExtractor())   # Add standard deviation feature
strategy.add_extractor(VarExtractor())   # Add variance feature

strategy.add_extractor(StdExtractor("lab"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("lab"))   # Add variance feature

strategy.add_extractor(StdExtractor("hsv"))   # Add standard deviation feature
strategy.add_extractor(VarExtractor("hsv"))   # Add variance feature

strategy.add_extractor(LBPExtractor(radius=1, n_points=8))
strategy.add_extractor(LBPExtractor(radius=1, n_points=16))

strategy.add_extractor(GLCMExtractor(properties=['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation',]))

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[], percentage=percent, verbose=True, shuffle=random)

INFO:utils.ml:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\train
INFO:utils.ml:Preprocessing steps


In [3]:
pipeline.loader.get_transformation_steps()

{'smoothing': {'kernel_size': 5},
 'clahe': {'clip_limit': 2.0, 'tile_grid_size': (8, 8)},
 'hair_removal': {},
 '01_norm': {},
 'pad2square': {'fill': nan},
 'resize': {'size': (200, 200)}}

In [4]:
pipeline.get_feature_names()

['mean_rgb_channel_0',
 'mean_rgb_channel_1',
 'mean_rgb_channel_2',
 'std_rgb_channel_0',
 'std_rgb_channel_1',
 'std_rgb_channel_2',
 'var_rgb_channel_0',
 'var_rgb_channel_1',
 'var_rgb_channel_2',
 'std_lab_channel_0',
 'std_lab_channel_1',
 'std_lab_channel_2',
 'var_lab_channel_0',
 'var_lab_channel_1',
 'var_lab_channel_2',
 'std_hsv_channel_0',
 'std_hsv_channel_1',
 'std_hsv_channel_2',
 'var_hsv_channel_0',
 'var_hsv_channel_1',
 'var_hsv_channel_2',
 'lbp_rad1_bins8_0',
 'lbp_rad1_bins8_1',
 'lbp_rad1_bins8_2',
 'lbp_rad1_bins8_3',
 'lbp_rad1_bins8_4',
 'lbp_rad1_bins8_5',
 'lbp_rad1_bins8_6',
 'lbp_rad1_bins8_7',
 'lbp_rad1_bins8_8',
 'lbp_rad1_bins8_9',
 'lbp_rad1_bins16_0',
 'lbp_rad1_bins16_1',
 'lbp_rad1_bins16_2',
 'lbp_rad1_bins16_3',
 'lbp_rad1_bins16_4',
 'lbp_rad1_bins16_5',
 'lbp_rad1_bins16_6',
 'lbp_rad1_bins16_7',
 'lbp_rad1_bins16_8',
 'lbp_rad1_bins16_9',
 'lbp_rad1_bins16_10',
 'lbp_rad1_bins16_11',
 'lbp_rad1_bins16_12',
 'lbp_rad1_bins16_13',
 'lbp_rad1_bi

In [5]:
pipeline.run_feature_extraction()

INFO:utils.ml:Running feature extraction...


Processed 5/475 batches.
Processed 10/475 batches.
Processed 15/475 batches.
Processed 20/475 batches.
Processed 25/475 batches.
Processed 30/475 batches.
Processed 35/475 batches.
Processed 40/475 batches.
Processed 45/475 batches.
Processed 50/475 batches.
Processed 55/475 batches.
Processed 60/475 batches.
Processed 65/475 batches.
Processed 70/475 batches.
Processed 75/475 batches.
Processed 80/475 batches.
Processed 85/475 batches.
Processed 90/475 batches.
Processed 95/475 batches.
Processed 100/475 batches.
Processed 105/475 batches.
Processed 110/475 batches.
Processed 115/475 batches.
Processed 120/475 batches.
Processed 125/475 batches.
Processed 130/475 batches.
Processed 135/475 batches.
Processed 140/475 batches.
Processed 145/475 batches.
Processed 150/475 batches.
Processed 155/475 batches.
Processed 160/475 batches.
Processed 165/475 batches.
Processed 170/475 batches.
Processed 175/475 batches.
Processed 180/475 batches.
Processed 185/475 batches.
Processed 190/475 bat

INFO:utils.ml:Feature extraction completed. Extracted 15195 features.


Processed 475/475 batches.
