In [1]:
from sklearn.ensemble import RandomForestClassifier

from utils.loader import FactoryLoader
from utils.ml import MLPipeline
from utils.preprocessing import PreprocessingFactory
from utils.feature_extraction import *
from utils.utils import *

In [2]:
VAL_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\val"
TRAIN_PATH = r"C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\train"

In [3]:
# # Initialize the FactoryLoader
# factory = PreprocessingFactory()
# # factory.hair_removal()
# factory.normalize2float()
# factory.pad2square(fill=np.nan)
# factory.resize((200,200))
# factory_loader = FactoryLoader(path=VAL_PATH, batch_size=24, factory=factory, percentage=20)
# 
# # Create the feature extraction pipeline strategy and add desired features
# strategy = FeatureExtractionStrategy()
# strategy.add_extractor(MeanExtractor())  # Add mean feature
# strategy.add_extractor(StdExtractor())   # Add standard deviation feature
# strategy.add_extractor(VarExtractor())   # Add variance feature
# strategy.add_extractor(MaxExtractor())   # Add maximum pixel value feature
# 
# # Extract features using the pipeline
# # feature_matrix = extract_features(factory_loader.get_loader(), strategy)
# 
# print("Feature matrix shape:", feature_matrix.shape)  # Shape: (num_images, num_features)
# print("Example feature vector for one image:", feature_matrix[0])
# print("Example feature dict for one image:", strategy.get_feature_names())

___

# With logger

In [20]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier 


percent = 2

# Initialize the FactoryLoader
factory = PreprocessingFactory()
# factory.hair_removal()
factory.normalize2float()
factory.pad2square(fill=0)
factory.resize((200,200))
factory_loader = FactoryLoader(path=VAL_PATH, batch_size=24, factory=factory, percentage=percent, shuffle=True)

# Create the feature extraction pipeline strategy and add desired features
strategy = FeatureExtractionStrategy()
strategy.add_extractor(MeanExtractor())  # Add mean feature
strategy.add_extractor(StdExtractor())   # Add standard deviation feature
strategy.add_extractor(VarExtractor())   # Add variance feature
strategy.add_extractor(MaxExtractor())   # Add maximum pixel value feature

# Create classifiers
svm = SVC(C=1., kernel='linear', gamma="scale", probability=False, random_state=42, tol=0.0001)
rf = RandomForestClassifier(n_estimators=12)

pipeline = MLPipeline(dataset_path=TRAIN_PATH, preprocessing_factory=factory, feature_strategy=strategy, classifiers=[svm, rf], percentage=10, verbose=True, shuffle=True)

INFO:__main__:MLPipeline initialized with dataset path: C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\train
INFO:__main__:Preprocessing steps


In [21]:
pipeline.run_feature_extraction()

INFO:__main__:Running feature extraction...
INFO:__main__:Feature extraction completed. Extracted 1519 features.


In [25]:
print("Preprocessing steps")
print(pipeline.loader.get_transformation_steps())
print("\nFeature names")
print(pipeline.get_feature_names())

Preprocessing steps
{'01_norm': {}, 'pad2square': {'fill': 0}, 'resize': {'size': (200, 200)}}

Feature names
['mean_channel_0', 'mean_channel_1', 'mean_channel_2', 'std_channel_0', 'std_channel_1', 'std_channel_2', 'var_channel_0', 'var_channel_1', 'var_channel_2', 'max_channel_0', 'max_channel_1', 'max_channel_2']


In [26]:
pipeline.feature_matrix

array([[0.5252485 , 0.35609394, 0.2570804 , ..., 1.        , 0.9104314 ,
        0.8444204 ],
       [0.6843221 , 0.45394096, 0.49159136, ..., 0.9843137 , 0.77254903,
        0.827451  ],
       [0.63147455, 0.5096835 , 0.45047495, ..., 0.75151056, 0.6609412 ,
        0.63774747],
       ...,
       [0.4831145 , 0.43564323, 0.45877856, ..., 0.79607844, 0.74509805,
        0.80784315],
       [0.36791363, 0.30910826, 0.34987226, ..., 0.78039217, 0.77254903,
        0.79607844],
       [0.4339162 , 0.57684594, 0.6866498 , ..., 0.55209255, 0.78304315,
        0.96735686]], dtype=float32)

In [27]:
pipeline.fit_classifiers()

INFO:__main__:Fitting classifiers...
INFO:__main__:Fitted classifier: SVC


[1 0 1 ... 0 1 0]


INFO:__main__:Fitted classifier: RandomForestClassifier
INFO:__main__:All classifiers have been fitted.


[1 0 1 ... 0 1 0]


In [30]:
pipeline.fitted_classifiers

{'SVC': SVC(kernel='linear', random_state=42, tol=0.0001),
 'RandomForestClassifier': RandomForestClassifier(n_estimators=12)}

In [28]:
predictions = pipeline.predict_with_classifiers(new_dataset_path=VAL_PATH, percentage=percent)


INFO:__main__:Predicting with classifiers on dataset: C:\Users\gimes\Src\repos\CADx-Project\dataset\binary\val
INFO:__main__:Predictions made with classifier: SVC
INFO:__main__:Predictions made with classifier: RandomForestClassifier


In [29]:
predictions

{'SVC': array([1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1,
        1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
        1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
        0, 1, 0, 1, 0, 0, 0, 0, 0], dtype=int64),
 'RandomForestClassifier': array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1,
        1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
        1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
        1, 0, 0, 1, 0, 0, 0, 0, 1], dtype=int64)}