### Sign Language Classification with SVM

In [None]:
# 1. Imports
import os
import csv
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from string import ascii_lowercase
import seaborn as sns
from sklearn.model_selection import GridSearchCV

#### Data Loading

In [None]:
DATASET_PATH = "./data/sign_lang_train/images_manipulated/"
CSV_FILE = "labels_manipulated.csv"
IMG_SIZE = (128, 128)
CLASS_NAMES = list(map(str, range(10))) + list(ascii_lowercase)
CLASSES = len(CLASS_NAMES)

In [None]:
import csv
# Utility function

def read_csv(csv_file):
    with open(csv_file, newline='') as f:
        reader = csv.reader(f)
        data = list(reader)
    return data

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, utils, io
from torchvision.utils import make_grid
from PIL import Image
import matplotlib.pyplot as plt
import os

from string import ascii_lowercase

class SignLangDataset(Dataset):
    """Sign language dataset"""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.data = read_csv(os.path.join(root_dir,csv_file))
        self.root_dir = root_dir
        self.transform = transform
        # List of class names in order
        self.class_names = CLASS_NAMES

    def __len__(self):
        """
        Calculates the length of the dataset-
        """
        return len(self.data)

    def __getitem__(self, idx):
        """
        Returns one sample (dict consisting of an image and its label)
        """
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Read the image and labels
        image_path = os.path.join(self.root_dir, self.data[idx][1])
        
        # Read image as a grayscale PIL image
        image = Image.open(image_path).convert("L")  # 'L' = grayscale mode

        if self.transform:
            image = self.transform(image)

        # The label is the index of the class name in the list ['0','1',...,'9','a','b',...'z']
        # because we should have integer labels in the range 0-35 (for 36 classes)
        label = self.class_names.index(self.data[idx][0])
        
        sample = {'image': image, 'label': label}

        return sample
    
    def show_sample(self, idx: int) -> None:
        """
        Displays a grayscale image with its corresponding class label (0-35).
        
        Args:
            image (numpy.ndarray): The image to display. Shape should be (1, H, W) or (H, W).
            label (int): Integer label in range 0-35.
        """

        sample = self[idx]
        image = sample["image"]
        label_index = sample["label"]
        label = self.class_names[label_index]

        # Flatten channel dimension if needed
        if image.ndim == 3 and image.shape[0] == 1:
            image = image[0] 
        
        plt.imshow(image, cmap="gray")
        plt.title(f"Label: {label}")
        plt.axis("off")
        plt.show()

# Define label mapping
class_names = list(map(str, range(10))) + list(ascii_lowercase)

# Transform Images to 128x128
transform = transforms.Compose([
    transforms.ToTensor(),                   # Converts HxW numpy → CxHxW tensor
    transforms.Resize(IMG_SIZE),             # Resize to 128x128
])

In [None]:
def dataset_to_numpy(dataset):
    """
    Converts a PyTorch Dataset into NumPy arrays for use with sklearn.
    Assumes images are transformed to shape [1, 128, 128].

    Returns:
        X: np.ndarray of shape [num_samples, 128*128]
        y: np.ndarray of shape [num_samples]
    """
    X = []
    y = []
    for i in range(len(dataset)):
        sample = dataset[i]
        image = sample['image'].numpy().squeeze()  # [1, 128, 128] -> [128, 128]
        X.append(image.flatten())
        y.append(sample['label'])

    return np.array(X), np.array(y)

#### Training

In [None]:

# Load and split data
sign_dataset = SignLangDataset(CSV_FILE, DATASET_PATH, transform)
X, y = dataset_to_numpy(sign_dataset)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Train samples:", len(X_train))
print("Validation samples:", len(X_val))

# 4. Train SVM

param_grid = {
    'C': [1, 10, 100],
    'gamma': [0.01, 0.001],
    'kernel': ['rbf', 'linear', 'poly'],
    'coef0': [0, 1, 5],  # Only relevant for 'poly'
    'degree': [2, 3, 4]  # Only relevant for 'poly'
}

grid_search = GridSearchCV(SVC(), param_grid, scoring='accuracy', cv=3, verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best params:", grid_search.best_params_)
print("Best accuracy:", grid_search.best_score_)

svm = grid_search.best_estimator_

# 5. Evaluate
y_pred = svm.predict(X_val)
acc = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {acc:.2%}")
print(classification_report(y_val, y_pred, target_names=CLASS_NAMES))

# 6. Confusion Matrix (optional)
cm = confusion_matrix(y_val, y_pred)
np.fill_diagonal(cm, 0)  # Zero out diagonal to highlight errors

plt.figure(figsize=(12, 10))
sns.heatmap(cm, xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES,
            annot=False, cmap="Blues", linewidths=0.5)
plt.title("Confusion Matrix (without diagonal)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()
