In [None]:
import cv2 as cv
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier, plot_tree
from skimage.feature import hog
from skimage import exposure
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier

In [None]:
class_label_encoding = {
    'SPOILED': 0,
    'HALF': 1,
    'FRESH': 2
}

# Loading Images

In [None]:
def load_images(file_path, output_x, output_y):
    for file_name in os.listdir(file_path):
        class_name = file_name.split('-')[0]
        if (class_name == '_classes.csv'): continue
        img = cv.imread(file_path + file_name).astype('float32')
        img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        img = cv.resize(img, (128, 128), interpolation = cv.INTER_AREA)
        img /= 255
        output_x.append(img)
        output_y.append(class_label_encoding[class_name])

In [None]:
train_x = []
train_y = []
test_x = []
test_y = []

load_images('data/train/', train_x, train_y)
load_images('data/valid/', test_x, test_y)

In [None]:
train_x[0]

# Feature Extraction

## Color Histogram
Jason

In [None]:
def extract_color_histogram(image, bins=32):
    """
    Extract color histogram features from an image.
    
    Parameters:
    - image: The input image (should be in RGB format)
    - bins: Number of bins for the histogram
    
    Returns:
    - histogram_features: Flattened histogram features
    """
    # Extract histograms for each channel
    hist_r = cv.calcHist([image], [0], None, [bins], [0, 1])  # Changed range to [0, 1] since you're normalizing images
    hist_g = cv.calcHist([image], [1], None, [bins], [0, 1])
    hist_b = cv.calcHist([image], [2], None, [bins], [0, 1])
    
    # Normalize the histograms
    cv.normalize(hist_r, hist_r, 0, 1, cv.NORM_MINMAX)
    cv.normalize(hist_g, hist_g, 0, 1, cv.NORM_MINMAX)
    cv.normalize(hist_b, hist_b, 0, 1, cv.NORM_MINMAX)
    
    # Flatten and concatenate the histograms
    histogram_features = np.concatenate([
        hist_r.flatten(), 
        hist_g.flatten(), 
        hist_b.flatten()
    ])
    
    return histogram_features

In [None]:
def plot_color_histogram(image, bins=32, title="Color Histogram"):
    """
    Plot the color histogram of an image.
    
    Parameters:
    - image: The input image (should be in RGB format)
    - bins: Number of bins for the histogram
    - title: Title for the plot
    
    Returns:
    - None (displays the plot)
    """
    # Create a figure with subplots
    fig, ax = plt.subplots(1, 4, figsize=(16, 4))
    
    # Display the original image
    ax[0].imshow(image)
    ax[0].set_title('Original Image')
    ax[0].axis('off')
    
    # Get histogram features using your existing function
    features = extract_color_histogram(image, bins)
    
    # Split the features back into channels
    channel_length = len(features) // 3
    hist_r = features[:channel_length].reshape(bins, 1)
    hist_g = features[channel_length:2*channel_length].reshape(bins, 1)
    hist_b = features[2*channel_length:].reshape(bins, 1)
    
    # Define colors and channels
    colors = ['r', 'g', 'b']
    channels = ['Red', 'Green', 'Blue']
    hists = [hist_r, hist_g, hist_b]
    
    # Plot histograms for each channel
    for i, (hist, col, chan) in enumerate(zip(hists, colors, channels)):
        ax[i+1].plot(hist, color=col)
        ax[i+1].set_xlim([0, bins])
        ax[i+1].set_title(f'{chan} Histogram')
        ax[i+1].set_xlabel('Bins')
        ax[i+1].set_ylabel('# of Pixels')
        ax[i+1].grid(True, alpha=0.3)
    
    plt.suptitle(title, fontsize=16)
    plt.tight_layout()
    plt.show()
    
    return features

In [None]:
image = train_x[0]  # Get the first image
plot_color_histogram(image, bins=32, title="Meat Sample Color Histogram")
print()

In [None]:
# Extract features from training and testing sets
train_features = []
for img in train_x:
    hist_features = extract_color_histogram(img)
    train_features.append(hist_features)
train_features = np.array(train_features)


test_features = []
for img in test_x:
    hist_features = extract_color_histogram(img)
    test_features.append(hist_features)
test_features = np.array(test_features)

print(train_features)

## Local Binary Pattern
Aiden

In [None]:
def get_pixel(img, center, x, y):
    """
    Gets Local Binary Patterns values for pixel adjacent to the selected one.
    
    Parameters:
    - img: The image containing the pixels
    - center: The pixel Local Binary Patterns is being applied to
    - x: The x coordinate of the adjacent pixel
    - y: The y coordinate of the adjacent pixel
    
    Returns:
    - new_val: The output value for the comparison between the center pixel and the adjacent pixel
    """
    new_value = 0
      
    try: 
        # if local neighbourhood pixel value is greater than or equal to center pixel values then set it to 1 
        if img[x][y] >= center: 
            new_value = 1
              
    except: 
        # exception required when neighbourhood value of center pixel value is null
        pass
      
    return new_value 
   
# Function for calculating LBP 
def lbp_calculated_pixel(img, x, y):
    """
    Apply the Local Binary Patterns to a single pixel.
    
    Parameters:
    - img: The image containing the pixel
    - x: The x coordinate of the selected pixel
    - y: The y coordinate of the selected pixel
    
    Returns:
    - val: The output value for the selected pixel after applying Local Binary Patterns
    """
    center = img[x][y] 
   
    val_ar = [] 
      
    # top_left 
    val_ar.append(get_pixel(img, center, x-1, y-1)) 
      
    # top 
    val_ar.append(get_pixel(img, center, x-1, y)) 
      
    # top_right 
    val_ar.append(get_pixel(img, center, x-1, y + 1)) 
      
    # right 
    val_ar.append(get_pixel(img, center, x, y + 1)) 
      
    # bottom_right 
    val_ar.append(get_pixel(img, center, x + 1, y + 1)) 
      
    # bottom 
    val_ar.append(get_pixel(img, center, x + 1, y)) 
      
    # bottom_left 
    val_ar.append(get_pixel(img, center, x + 1, y-1)) 
      
    # left 
    val_ar.append(get_pixel(img, center, x, y-1)) 
       
    # convert binary values to decimal 
    power_val = [1, 2, 4, 8, 16, 32, 64, 128] 
   
    val = 0
      
    for i in range(len(val_ar)): 
        val += val_ar[i] * power_val[i] 
          
    return val


def lbp_output(img_bgr):
    """
    Apply the Local Binary Patterns filter to an image.
    
    Parameters:
    - img_bgr: The image to be transformed
    
    Returns:
    - image_lbp: The output image with the Local Binary Patterns filter applied
    """
    height, width, _ = img_bgr.shape 
   
    # convert RGB to gray 
    img_gray = cv.cvtColor(img_bgr, 
                            cv.COLOR_BGR2GRAY) 
       
    # create numpy array as same height and width of RGB image 
    img_lbp = np.zeros((height, width), 
                       np.float32) 
       
    for i in range(0, height): 
        for j in range(0, width): 
            img_lbp[i, j] = lbp_calculated_pixel(img_gray, i, j)

    return img_lbp

In [None]:
img_bgr = train_x[0]
img_lbp = lbp_output(img_bgr)
  
plt.imshow(img_bgr) 
plt.show()
   
plt.imshow(img_lbp, cmap ="gray")
plt.show()

In [None]:
def save_images_lbp(imgs, labels, train_test='train'):
    """
    Save images with the Local Binary Patterns filter applied and unique file names based on label.
    
    Parameters:
    - imgs: List of images to be transformed
    - labels: List of labels for each image
    - train_test: The string value either 'train' or 'test' which determines the output directory
    """
    label_text = ['SPOILED', 'HALF', 'FRESH']
    for image in range(len(imgs)):
        lbp_image = lbp_output(imgs[image])
        filename = f'data/lbp/{train_test}/{label_text[labels[image]]}-{image}-lbp.jpg'
        cv.imwrite(filename, lbp_image)

In [None]:
# save_images_lbp(train_x, train_y, train_test='train')

In [None]:
# save_images_lbp(test_x, test_y, train_test='test')

## Histograms of Oriented Gradients
Fiona

In [None]:
# Adapted from here: https://scikit-image.org/docs/stable/auto_examples/features_detection/plot_hog.html
def make_hog(image, visualize=False):
    features, hog_image = hog(
            image,
            orientations=8,
            pixels_per_cell=(16, 16),
            cells_per_block=(1, 1),
            visualize=True,
            channel_axis=-1
        )

    if visualize:
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True)

        ax1.axis('off')
        ax1.imshow(image, cmap=plt.cm.gray)
        ax1.set_title('Input image')

        hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

        ax2.axis('off')
        ax2.imshow(hog_image_rescaled, cmap=plt.cm.gray)
        ax2.set_title('Histogram of Oriented Gradients')
        plt.show()
    return features

In [None]:
make_hog(train_x[1], True)

In [None]:
train_features_hog = []
test_features_hog = []

for image in train_x:
    train_features_hog.append(make_hog(image))

for image in test_x:
    test_features_hog.append(make_hog(image))

# Classification

## Decision Tree
Jason

In [None]:
def train_decision_tree(x_train_tree, y_train_tree, x_test_tree, y_test_tree, max_depth=5, show_tree=True, feature_names=None):
    """
    Train a decision tree classifier on any type of features, with optional histogram visualization.
    
    Parameters:
    - x_train_tree: Training features
    - y_train_tree: Training labels
    - x_test_tree: Test features
    - y_test_tree: Test labels
    - max_depth: Maximum depth of the decision tree
    - show_tree: Whether to visualize the decision tree
    - feature_names: Names of features (will be auto-generated if None)
    
    Returns:
    - dt_classifier: Trained decision tree classifier
    - accuracy: Classification accuracy on test set
    - report: Classification report
    - conf_matrix: Confusion matrix
    """

    # Create a list of class names
    class_names=['SPOILED', 'HALF', 'FRESH']
    
    # Create and train a Decision Tree classifier
    dt_classifier = DecisionTreeClassifier(max_depth=max_depth, random_state=42)
    dt_classifier.fit(x_train_tree, y_train_tree)
    
    # Make predictions
    predictions = dt_classifier.predict(x_test_tree)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test_tree, predictions)
    report = classification_report(y_test_tree, predictions, target_names=class_names)
    conf_matrix = confusion_matrix(y_test_tree, predictions)
    
    # Print results
    print(f"Decision Tree Accuracy: {accuracy:.4f}")
    print("\nClassification Report:")
    print(report)
    
    # Display confusion matrix
    plt.figure(figsize=(8, 6))
    plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=45)
    plt.yticks(tick_marks, class_names)
    
    # Add text annotations in the confusion matrix
    thresh = conf_matrix.max() / 2
    for i in range(conf_matrix.shape[0]):
        for j in range(conf_matrix.shape[1]):
            plt.text(j, i, format(conf_matrix[i, j], 'd'),
                    horizontalalignment="center",
                    color="white" if conf_matrix[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
    
    # Show decision tree if requested
    if show_tree:
        # Create feature names if not provided
        if feature_names is None:
            feature_names = [f"Feature_{i}" for i in range(x_train_tree.shape[1])]
            
        plt.figure(figsize=(15, 10))
        plot_tree(dt_classifier, 
                  feature_names=feature_names,
                  class_names=class_names,
                  filled=True, 
                  rounded=True, 
                  fontsize=8)
        plt.title("Decision Tree for Classification")
        plt.tight_layout()
        plt.show()
    
    return dt_classifier, accuracy, report, conf_matrix

In [None]:
# # Create feature names for the histogram features
# bins_per_channel = train_features.shape[1] // 3
# channels = ['Red', 'Green', 'Blue']
# feature_names = []
# for channel in channels:
#     for index in range(bins_per_channel):
#         feature_names.append(f"{channel} Bin {index}")

# #x_train_tree, y_train_tree, x_test_tree, y_test_tree, max_depth=5, show_tree=True, feature_names=None

# # Train the decision tree with histogram visualization
# model, acc, report = train_decision_tree(x_train_tree=train_features, y_train_tree=train_y, x_test_tree=test_features,
#                                          y_test_tree=test_y, max_depth=3, feature_names=feature_names)

## Random Forest
Aiden

In [None]:
def train_random_forest(x_train_forest, y_train_forest, x_test_forest, y_test_forest, n_estimators=100, criterion='gini', max_depth=None,
                        min_samples_split=2, min_samples_leaf=1, max_features='sqrt'):
    """
    Train and generate evaluation metrics for a Random Forest classifier given training and testing data.
    
    Parameters:
    - x_train_forest: Training data
    - y_train_forest: Training labels
    - x_test_forest: Test data
    - y_test_forest: Test labels
    - n_estimators: The number of trees in the forest
    - criterion: The function to measure the quality of a split
    - max_depth: Maximum depth of the tree
    - min_samples_split: The minimum number of samples required to split an internal node
    - min_samples_leaf: The minimum number of samples required to be at a leaf node
    - max_features: The number of features to consider when looking for the best split
    
    Returns:
    - accuracy: The accuracy score for the model's predictions
    - precision: The precision score for the model's predictions
    - recall: The recall score for the model's predictions
    - f1: The f1 score for the model's predictions
    - confusion: The confusion matrix fro the model's predictions
    """
    # Create Random Forest classifer object
    clf = RandomForestClassifier(n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split,
                                min_samples_leaf=min_samples_leaf, max_features=max_features, random_state=42)
    
    # Train Random Forest Classifer
    clf.fit(x_train_forest,y_train_forest)
    
    #Predict the response for test dataset
    y_pred = clf.predict(x_test_forest)
    
    accuracy = accuracy_score(y_test_forest, y_pred)
    precision = precision_score(y_test_forest, y_pred)
    recall = recall_score(y_test_forest, y_pred)
    f1 = f1_score(y_test_forest, y_pred)
    confusion = confusion_matrix(y_test_forest, y_pred)

    return accuracy, precision, recall, f1, confusion

## Random Forest Tests

In [None]:
def test_rf(rf_train_x, rf_train_y, rf_test_x, rf_test_y):
    """
    Run a series of tests with different combinations of hyperparameters for the Random Forest classifier.
    
    Parameters:
    - rf_train_x: Training data
    - rf_train_y: Training lables
    - rf_test_x: Test data
    - rf_test_y: Test labels
    
    Returns:
    - pd.DataFrame(all_tests): The dataframe containing all hyperparameter values and results for each test
    - confusion: List of confusion matrices for each test
    """
    # Hyperparameter tuning values
    all_n_estimators = [10,50,100,500,1000]
    all_criterion = ['gini', 'entropy', 'log_loss']
    all_max_depth = [None]
    all_min_samples_split = [2]
    all_min_samples_leaf = [1]
    all_max_features = ['sqrt', 'log2', None]

    # Run tests with all combinations and output results
    all_tests = {'n_estimators': [], 'criterion': [], 'max_depth': [], 'min_samples_split': [], 'min_samples_leaf': [], 'max_features': [],
                'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
    confusion_matrices = []
    for n_estimators in all_n_estimators:
        for criterion in all_criterion:
            for max_depth in all_max_depth:
                for min_samples_split in all_min_samples_split:
                    for min_samples_leaf in all_min_samples_leaf:
                        for max_features in all_max_features:
                            all_tests['n_estimators'].append(n_estimators)
                            all_tests['criterion'].append(criterion)
                            all_tests['max_depth'].append(max_depth)
                            all_tests['min_samples_split'].append(min_samples_split)
                            all_tests['min_samples_leaf'].append(min_samples_leaf)
                            all_tests['max_features'].append(max_features)
                            accuracy, precision, recall, f1, confusion = train_random_forest(rf_train_x, rf_train_y, rf_test_x, rf_test_y, n_estimators,
                                                                                            criterion, max_depth, min_samples_split, min_samples_leaf,
                                                                                            max_features)
                            for i, feature in enumerate(all_tests):
                                feature_vals = [n_estimators, criterion, max_depth, min_samples_split, min_samples_leaf, max_features,
                                               accuracy, precision, recall, f1]
                                all_tests[feature].append(feature_vals[i])
                            confusion_matrices.append(confusion)

    return pd.DataFrame(all_tests), confusion

## Knn
Fiona

In [None]:
def train_knn(x_train, y_train, n_neighbors=5):
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(x_train, y_train)

    return knn

In [None]:
def knn_report(knn, x_test, y_test):
    predictions = knn.predict(x_test)
    accuracy = accuracy_score(y_test, predictions)
    report = classification_report(y_test, predictions, target_names=['SPOILED', 'HALF', 'FRESH'])
    print(f"Knn Accuracy: {accuracy:.4f}")
    print(report)

In [None]:
# Adapted from here: https://medium.com/@agrawalsam1997/hyperparameter-tuning-of-knn-classifier-a32f31af25c7
def tune_knn(x_train, y_train, x_test, y_test, title,
             n_start=1, n_stop=10, n_step=1):
    train_scores = {}
    test_scores = {}
    f1_scores = {}
    models = {}

    n_neighbors = np.arange(n_start, n_stop, n_step)
    for n in n_neighbors:
        knn = train_knn(x_train, y_train, n_neighbors=n)
        train_scores[n] = knn.score(x_train, y_train)
        test_scores[n] = knn.score(x_test, y_test)
        f1_scores[n] = f1_score(y_test, knn.predict(x_test), average='macro')
        models[n] = knn

    plt.plot(n_neighbors, train_scores.values(), label="Train Accuracy")
    plt.plot(n_neighbors, test_scores.values(), label="Test Accuracy")
    plt.plot(n_neighbors, f1_scores.values(), label="F1 Score", linestyle='--')
    plt.xlabel("Number of Neighbors")
    plt.ylabel("Accuracy")
    plt.title(f"KNN ({title}): Varying Number of Neighbors")
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
print("HOG")
knn = train_knn(train_features_hog, train_y)
knn_report(knn, test_features_hog, test_y)

In [None]:
#tune_knn(train_features, train_y, test_features, test_y, "Color Histograms", n_stop=50)
#tune_knn(train_features_hog, train_y, test_features_hog, test_y, "HOG", n_stop=50)

# Neural Network
Aiden

In [None]:
def train_neural_network(x_train_mlp, y_train_mlp, x_test_mlp, y_test_mlp, hidden_layer_sizes, max_iter=100, activation='relu'):
    """
    Train and generate evaluation metrics for an MLP classifier given training and testing data.
    
    Parameters:
    - x_train_mlp: Training data
    - y_train_mlp: Training labels
    - x_test_mlp: Test data
    - y_test_mlp: Test labels
    - hidden_layer_sizes: The size of each hidden layer in tuple form (each entry is a hidden layer)
    - max_iter: The number of iterations for tuning weights and biases
    - activation: The activation function of the perceptrons
    
    Returns:
    - accuracy: The accuracy score for the model's predictions
    - precision: The precision score for the model's predictions
    - recall: The recall score for the model's predictions
    - f1: The f1 score for the model's predictions
    - confusion: The confusion matrix fro the model's predictions
    """
    # Create MLP classifer object
    mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, max_iter=max_iter, activation=activation)
    
    # Train MLP Classifer
    mlp.fit(x_train_mlp,y_train_mlp)
    
    #Predict the response for test dataset
    y_pred = mlp.predict(x_test_mlp)
    
    accuracy = accuracy_score(y_test_mlp, y_pred)
    precision = precision_score(y_test_mlp, y_pred)
    recall = recall_score(y_test_mlp, y_pred)
    f1 = f1_score(y_test_mlp, y_pred)
    confusion = confusion_matrix(y_test_mlp, y_pred)

    return accuracy, precision, recall, f1, confusion

## Neural Network Tests

In [None]:
def test_nn(nn_train_x, nn_train_y, nn_test_x, nn_test_y):
    """
    Run a series of tests with different combinations of hyperparameters for the MLP classifier.
    
    Parameters:
    - nn_train_x: Training data
    - nn_train_y: Training lables
    - nn_test_x: Test data
    - nn_test_y: Test labels
    
    Returns:
    - pd.DataFrame(all_tests): The dataframe containing all hyperparameter values and results for each test
    - confusion: List of confusion matrices for each test
    """
    # Hyperparameter tuning values
    all_hidden_layer_sizes = [(128,64,32,16,8,4), (64,32,16,8,4), (32,16,8,4), (16,8,4), (8,4), (4)]
    all_max_iter = [1000, 100, 10]
    all_activation = ['relu', 'logistic']

    # Run tests with all combinations and output results
    all_tests = {'hidden_layer_sizes': [], 'max_iter': [], 'activation': [],
                'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
    confusion_matrices = []
    for hidden_layer_sizes in all_hidden_layer_sizes:
        for max_iter in all_max_iter:
            for activation in all_activation:
                all_tests['hidden_layer_sizes'].append(hidden_layer_sizes)
                all_tests['max_iter'].append(max_iter)
                all_tests['activation'].append(activation)
                accuracy, precision, recall, f1, confusion = train_neural_network(nn_train_x, nn_train_y, nn_test_x, nn_test_y, hidden_layer_sizes,
                                                                                max_iter, activation)
                for i, feature in enumerate(all_tests):
                    feature_vals = [hidden_layer_sizes, max_iter, activation, accuracy, precision, recall, f1]
                    all_tests[feature].append(feature_vals[i])
                confusion_matrices.append(confusion)

    return pd.DataFrame(all_tests), confusion

# PCA Function

In [None]:
def do_pca(xColumns, components=2):
    pca = PCA(n_components = components)
    data_pca = pca.fit_transform(xColumns)
    return data_pca

# Create Dataframes

In [None]:
def transform_images_to_dataframe(images, filter_type, pca_components=2, labels=None):
    """
    Transform a set of images into a pandas DataFrame with PCA of extracted features.
    
    Parameters:
    - images: List of images (numpy arrays)
    - filter_type: Type of feature extraction ('HIS', 'LBP', or 'HOG')
    - pca_components: Number of PCA components to keep (default=2)
    - labels: Optional list of labels for the images
    
    Returns:
    - DataFrame with PCA components and labels (if provided)
    """
    # Extract features from all images
    all_features = []
    
    for i, img in enumerate(images):
        if filter_type.upper() == "HIS":
            # Extract color histogram features
            features = extract_color_histogram(img)
        
        elif filter_type.upper() == "LBP":
            # Extract LBP features
            lbp_img = lbp_output(img)
            features = lbp_img.flatten()
        
        elif filter_type.upper() == "HOG":
            # Extract HOG features
            features = make_hog(img)
        
        all_features.append(features)
    
    # Convert to numpy array for PCA
    feature_array = np.array(all_features)
    
    # Apply PCA
    pca_result = do_pca(feature_array, components=pca_components)
    
    # Create DataFrame with PCA results
    columns = [f'pca_{i+1}' for i in range(pca_components)]
    df = pd.DataFrame(pca_result, columns=columns)
    
    # Add label if provided
    if labels is not None:
        df['label'] = labels
    
    return df

In [None]:
# Transform training data into a DataFrame
train_df_hist = transform_images_to_dataframe(train_x, "HIS", pca_components=16)

# Transform testing data into a DataFrame
test_df_hist = transform_images_to_dataframe(test_x, "HIS", pca_components=16)

# Transform training data into a DataFrame
train_df_lbp = transform_images_to_dataframe(train_x, "lbp")

# Transform testing data into a DataFrame
test_df_lbp = transform_images_to_dataframe(test_x, "lbp")

# Transform training data into a DataFrame
train_df_hog = transform_images_to_dataframe(train_x, "hog")

# Transform testing data into a DataFrame
test_df_hog = transform_images_to_dataframe(test_x, "hog")

In [None]:
train_df_hist.head(2)

In [None]:
train_df_lbp.head(2)

In [None]:
train_df_hog.head(2)

In [None]:
def run_decision_tree_on_pca_dataframes(train_dfs, test_dfs, train_labels, test_labels, feature_names=None, max_depth=3):
    """
    Run decision tree classification on multiple PCA-transformed dataframes.
    
    Parameters:
    - train_dfs: Dictionary of training dataframes {'name': dataframe}
    - test_dfs: Dictionary of testing dataframes {'name': dataframe}
    - train_labels: Training labels
    - test_labels: Testing labels
    - feature_names: Optional dictionary of feature names for each dataframe
    - max_depth: Maximum depth for decision tree
    
    Returns:
    - results: Dictionary containing trained models and their performance metrics
    """
    results = {}
    
    for name in train_dfs.keys():
        print(f"\n--- Decision Tree Classification for {name} features ---")
        
        # Get the dataframes
        train_df = train_dfs[name]
        test_df = test_dfs[name]
        
        # Determine feature names if not provided
        names = None
        if feature_names and name in feature_names:
            names = feature_names[name]
        else:
            names = train_df.columns.tolist()
        
        # Train decision tree
        model, accuracy, report, conf_matrix = train_decision_tree(
            x_train_tree=train_df.values,
            y_train_tree=train_labels,
            x_test_tree=test_df.values,
            y_test_tree=test_labels,
            max_depth=max_depth,
            show_tree=True,
            feature_names=names
        )
        
        # Store results
        results[name] = {
            'model': model,
            'accuracy': accuracy,
            'report': report,
            'confusion_matrix': conf_matrix
        }
    
    # Compare results
    print("\n--- Comparison of Decision Tree Results ---")
    for name, result in results.items():
        print(f"{name}: Accuracy = {result['accuracy']:.4f}")
    
    return results

In [None]:
train_dfs = {
    'Histogram': train_df_hist,
    'LBP': train_df_lbp,
    'HOG': train_df_hog
}

test_dfs = {
    'Histogram': test_df_hist,
    'LBP': test_df_lbp,
    'HOG': test_df_hog
}

# Define feature names for better visualization
pca_components = 16
feature_names = {
    'Histogram': [f'Histogram PCA {i+1}' for i in range(pca_components)],
    'LBP': [f'LBP PCA {i+1}' for i in range(pca_components)],
    'HOG': [f'HOG PCA {i+1}' for i in range(pca_components)]
}

# Run the analysis
results = run_decision_tree_on_pca_dataframes(
    train_dfs=train_dfs,
    test_dfs=test_dfs,
    train_labels=train_y,
    test_labels=test_y,
    feature_names=feature_names,
    max_depth=3
)