In [None]:
import numpy as np 
import matplotlib.pyplot as plt 
import matplotlib.cm as mpl_cm
import pandas as pd
import cv2  
from os import listdir 
from os.path import isfile, join 
import os 
from sklearn.model_selection import train_test_split,cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler,PolynomialFeatures 
from sklearn.base import BaseEstimator,TransformerMixin
from sklearn.pipeline import Pipeline 
import random 
from defisheye import Defisheye
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay,classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, VotingClassifier
from sklearn.decomposition import PCA, IncrementalPCA
from sklearn.svm import LinearSVC 
import joblib 



def display_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    ConfusionMatrixDisplay(cm).plot()


class  ImagePreprocessing():
    """
    A Custom class to preform preprocessings. 

    This transformer takes an iterable of images (e.g., a list or a NumPy array)
    and resizes each image to a target size using OpenCV's cv2.resize function.

    The input `X` is expected to be a collection where each element
    is a valid image represented as a NumPy array.

    Attributes:
        height (int): The target height for the resized images.
        width (int): The target width for the resized images.
        
    """
    def __init__(self, height=128, width=128):
        self.height = height
        self.width = width


    def resize(self, X):
        """
        Resizes each image in the input iterable.
        and flatens them to a 1d representation: 
        an unflattened version maybe obtained by using the method unflaten 

        Args:
            X (iterable): The input data, expected to be an iterable of images.
                          Each element should be a NumPy array representing an image.

        Returns:
            np.ndarray: A new NumPy array containing a flattend representation of the resized images.
        """
        if not hasattr(X, '__iter__'):
            raise TypeError("Input must be an iterable (e.g., a list or NumPy array).")
        
        resized_images = []
        for i, image in enumerate(X):
            # Check if the individual item is a valid image (a NumPy array)
            if not isinstance(image, np.ndarray) or image.ndim not in [2, 3]:
                raise ValueError(f"Element at index {i} is not a valid image (expected 2D or 3D NumPy array), got {type(image)}.")

            try:
                # Resize the image using OpenCV
                resized_image = cv2.resize(image, (self.width, self.height), interpolation=cv2.INTER_CUBIC)
                resized_images.append(resized_image)
            except Exception as e:
                # This handles potential errors from cv2.resize for malformed arrays
                raise RuntimeError(f"Could not resize image at index {i}. Original shape: {image.shape}. Error: {e}")

        # It's a good practice for transformers to return a NumPy array
        # as a standard output type for subsequent steps in a pipeline.
        return np.array(resized_images)

    def flatten(self, X ): 
        flattend_images = []
        for i, image in enumerate(X):
            flattend_images.append(image.flatten())
        return np.array(flattend_images)

    def add_grayscale(self,X): 
        if not hasattr(X, '__iter__'):
            raise TypeError("Input must be an iterable (e.g., a list or NumPy array).")
        
        rgb_gray_images = []
        for i, image in enumerate(X):
            # Check if the individual item is a valid image (a NumPy array)
            if not isinstance(image, np.ndarray) or image.ndim not in [ 3]:
                raise ValueError(f"Element at index {i} is not a valid image (expected 2D or 3D NumPy array), got {type(image)}.")

            try:
                # Resize the image using OpenCV
                grey_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) #(mxn)
                grey_image=grey_image.reshape(grey_image.shape+(1,))#(mxnx1)
                im_rgb_Gray= np.concat((image,grey_image),axis=2)#(mxnx3)+(mxnx1)=(mxnx4) (rgb gray)
                rgb_gray_images.append(im_rgb_Gray)
            except Exception as e:
                # This handles potential errors from cv2.resize for malformed arrays
                raise RuntimeError(f"Could not resize image at index {i}. Original shape: {image.shape}. Error: {e}")
        return rgb_gray_images
    
    def add_rotation_copies(self,X,y):
        augmented_images=[] 
        augmented_labels=[]
        for i, image in enumerate(X):
            height, width = image.shape[:2]
 
            # Define the rotation center
            center = (width // 2, height // 2)
 
            # Define the rotation angle
            angles = [90,180,270]  # Rotate by 45 degrees
 
            # Define the scaling factor 
            scale = 1.0  # No scaling
            augmented_images.append(image)
            augmented_labels.append(y[i])
            for angle in angles:
                # Get the rotation matrix
                rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)
 
                # Perform the rotation
                rotated_image = cv2.warpAffine(image, rotation_matrix, (width, height))
                augmented_images.append(rotated_image)
                augmented_labels.append(y[i])
        return augmented_images,np.array(augmented_labels)


## Load in images

In [2]:
dirpath="./CCSN_v2"
class_dirs=listdir(dirpath)
lists_image_names=[]
for dir in class_dirs: 
    lists_image_names.append(listdir(dirpath+"/"+dir))


images= []
y=[] 
for  i in range(len(class_dirs)): 
    for image_name in lists_image_names[i]: 
        im=cv2.imread( dirpath+"/"+class_dirs[i]+"/"+image_name )
        im= cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        images.append(im)
        y.append(class_dirs[i])

y=np.array(y)


# Preprocess images & data division 

In [3]:
n=32 #determined in from trails in slask 

imResizer=ImagePreprocessing(height=n, width=n)   
images_rgb_gray= imResizer.add_grayscale(images)
resized_images= imResizer.resize(images_rgb_gray)
augmented_resized_images,augmented_y =imResizer.add_rotation_copies(resized_images,y)
images_training_data= imResizer.flatten(augmented_resized_images)

X_train_val, X_test, y_train_val , y_test =train_test_split(images_training_data, augmented_y ,test_size=0.1 ,random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_train_val,y_train_val, test_size=0.2, random_state=42)

#clear memory of unnecessary data
del images, images_rgb_gray, augmented_resized_images, images_training_data,y,augmented_y


# Train model + cross validation 

In [None]:
n_comp=10 #determined from trails in slask 

pipeline_steps = {
    "logreg": [('scaler', StandardScaler()),
    ('pca', PCA(n_components=n_comp, random_state=21)),
    ('poly', PolynomialFeatures(2)),
    ('logreg',LogisticRegression(random_state=42))],
    "RF": [('scaler', StandardScaler()),
    ('pca', PCA(n_components=n_comp, random_state=21)),
    ('poly', PolynomialFeatures(2)),
    ('RF-CLF',RandomForestClassifier(random_state=42))],
    "ET": [('scaler', StandardScaler()),
    ('pca', PCA(n_components=n_comp, random_state=21)),
    ('poly', PolynomialFeatures(2)),
    ('ET-CLF',RandomForestClassifier(random_state=42))],
}



models = {
    'LogisticRegression': {
        'model': Pipeline(steps=pipeline_steps['logreg'] ) ,
        'params': {
            'logreg__C': [0.01, 0.1, 1, 10, 100],
            'logreg__penalty': ['l1', 'l2'],
            'logreg__solver': ['saga'], 
            'logreg__max_iter': [500,1000] 
        }
    },
    'RandomForestClassifier': {
        'model':Pipeline(steps=pipeline_steps['RF'] ),
        'params': {
            'RF-CLF__n_estimators': [100, 200, 300],
            'RF-CLF__max_depth': [ 10, 20, 30]
        }
    },
    'ExtraTreesClassifier': {
        'model': Pipeline(steps=pipeline_steps['ET'] ),
        'params': {
            'ET-CLF__n_estimators': [100, 200, 300],
            'ET-CLF__max_depth': [ 10, 20, 30]
        }
    },
}

best_models = {}
best_parameters={}

for name, config in models.items():
    print(f"Training {name} with GridSearchCV...")
    grid_search = GridSearchCV(
        estimator=config['model'],
        param_grid=config['params'],
        cv=5,  # Use 5-fold cross-validation
        scoring='accuracy',
        n_jobs=-1  # Use all available cores
    )
    
    # Train on the training set
    grid_search.fit(X_train, y_train)
    
    print(f"Best parameters for {name}: {grid_search.best_params_}")
    print(f"Best cross-validation score for {name}: {grid_search.best_score_:.4f}")
    
    # Store the best model
    best_models[name] = grid_search.best_estimator_
    best_parameters[name] = grid_search.best_params_
    print("-" * 50)




Training LogisticRegression with GridSearchCV...




Best parameters for LogisticRegression: {'logreg__C': 0.01, 'logreg__max_iter': 1000, 'logreg__penalty': 'l1', 'logreg__solver': 'saga'}
Best cross-validation score for LogisticRegression: 0.2711
--------------------------------------------------
Training RandomForestClassifier with GridSearchCV...
Best parameters for RandomForestClassifier: {'RF-CLF__max_depth': 20, 'RF-CLF__n_estimators': 300}
Best cross-validation score for RandomForestClassifier: 0.3686
--------------------------------------------------
Training ExtraTreesClassifier with GridSearchCV...
Best parameters for ExtraTreesClassifier: {'ET-CLF__max_depth': 20, 'ET-CLF__n_estimators': 300}
Best cross-validation score for ExtraTreesClassifier: 0.3686
--------------------------------------------------
