### Step 1: Setup imports & libraries

In [None]:
import numpy as np
import pathlib
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

### Step 2: Configure Variables

In [None]:
model_name = "svm" # The name of the model to save.
img_height = 32 # Height of the input images to be resized to before being fed into the model.
img_width = 32 # Width of the input images to be resized to before being fed into the model.
training_dir = "images/controlled_background/" # Path to the training dataset.
seed=123 # Seed for reproducibility.

### Step 3: Set Directory for Training Data

In [None]:
data_dir = pathlib.Path(training_dir).with_suffix("")

### Step 4. Load & Resize Image Data

In [None]:
def load_images_from_folder(folder, image_size=(32, 32)):
    images = []
    labels = []
    
    for subfolder in os.listdir(folder):
        subfolder_path = os.path.join(folder, subfolder)
        
        if os.path.isdir(subfolder_path):
            for filename in os.listdir(subfolder_path):
                img_path = os.path.join(subfolder_path, filename)
                img = cv2.imread(img_path)
                
                if img is not None:
                    img = cv2.resize(img, image_size)
                    img_flat = img.flatten()  # Flatten image to 1D array
                    images.append(img_flat)
                    labels.append(subfolder) # Use the folder name as the label

    return np.array(images), np.array(labels)

image_data, labels = load_images_from_folder(data_dir, (img_height, img_width))

### Step 5. Partition Data

In [None]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=seed)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

### Step 6. Normalise Pixel Values of Dataset Images

In [None]:
# Standardize the pixel values for SVM (mean 0, variance 1)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Step 7. Train the Model

In [None]:
svm = SVC(kernel='rbf', gamma=0.001, C=10, random_state=seed)
svm.fit(X_train_scaled, y_train)

### Step 8. Test the Model

In [None]:
y_pred = svm.predict(X_test_scaled)

### Step 9. Evaluate the Model
Calculate & visualise the model accuracy, confusion matrix & classification report

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy (SVM): {accuracy * 100:.2f}%")

conf_matrix = confusion_matrix(y_test, y_pred)
class_names = np.unique(labels)

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
plt.ylabel('Actual Label')
plt.xlabel('Predicted Label')
plt.title('SVM Confusion Matrix')
plt.savefig(f"{model_name}_confusion_matrix.png")
plt.show()

print("SVM Classification Report:\n", classification_report(y_test, y_pred))

### Optional: Hyperparameter Tuning
Used to determine the parameter configuration that results in the highest accuracy

In [None]:
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'gamma': [1, 0.1, 0.01, 0.001],  # Kernel coefficient for 'rbf', 'poly', 'sigmoid'
    'kernel': ['rbf']  # Test with 'rbf' kernel
}

grid = GridSearchCV(SVC(random_state=seed), param_grid, refit=True, verbose=2)
grid.fit(X_train_scaled, y_train)

print(f"Best Parameters: {grid.best_params_}")