# American Sign Language Classification using Convolutional Neural Networks

This is a Convolutional Neural Network Implementation for the classification of American Sign Language.
The implementation is based on an example shown from the "Getting Started with Deep Learning Course" offered by NVIDIA Deep Learning Institute

**Author: Ujval Madhu**\
**C-Log: 01-22-2024**

### Importing Packages

In [76]:
import torch.nn as nn
import pandas as pd
import torch
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
import intel_extension_for_pytorch as ipex

device = torch.device("cuda" if torch.cuda.is_available() else "xpu" if torch.xpu.is_available() else "cpu")

### Loading data

In [22]:
train_df = pd.read_csv("data/sign_mnist_train.csv")    # Training Data Frame
valid_df = pd.read_csv("data/sign_mnist_valid.csv")    # Validation Data Frame

## Preprocessing the dataset as an image 

In [37]:
IMG_CH  = 1            # Input Image Channels = 1 B/W     
IMG_HT  = 28           # Input Image Height   = 28 
IMG_WT  = 28           # Input Image Width   = 28 

class MyDataset(Dataset):
    def __init__(self, base_df):
        x_df = base_df.copy()                    # Copy the base class to avoid modifying the original dataset
        y_df = x_df.pop('label')
        x_df = x_df.values/255                   # Normalizing the dataset
        x_df = x_df.reshape(-1, 1, 28, 28)       # Reshaping vector of size 1x784 into matrix 1x28x28
        self.xs = torch.tensor(x_df).float().to(device)      # converting dataframes to tensors for Pytorch processing and moving to GPU if available
        self.ys = torch.tensor(y_df).to(device)

    def __getitem__(self, idx):                  # Required Method for Pytorch datasets: Returns a single data point and its label at given index
        x = self.xs[idx]
        y = self.ys[idx]
        return x,y

    def __len__(self):                           # Required Method for Pytorch datasets: Returns total number of samples
        return len(self.xs)

### Creating DataLoaders for Training and Validation

In [51]:
BATCH_SIZE = 32;

train_data = MyDataset(train_df)
train_loader = DataLoader(train_data, batch_size = BATCH_SIZE, shuffle = True)
train_N = len(train_loader.dataset)

valid_data = MyDataset(valid_df)
valid_loader = DataLoader(valid_data, batch_size = BATCH_SIZE)
valid_N = len(valid_loader.dataset)

### Creating The Convolutional Neural Network

In [46]:
n_classes = 24           # j and z are not included as they require movement to classify so 26 - 2 = 24 alphabet classes
kernel_size = 3          # standard and efficient kernel size 3x3 offers many advantages
flattened_img_size = 75 * 3 * 3         # input dimension before flatten operation 75 channels of 3x3 matrices

model = nn.Sequential(

    # First Convolution
    nn.Conv2d(IMG_CH, 25, kernel_size, stride = 1, padding = 1),        # Convolutional Layer with 25  3 x 3 kernels, Output Size = 25 x 28 x 28
    nn.BatchNorm2d(25),
    nn.ReLU(),
    nn.MaxPool2d(2, stride = 2),                                        # Output Size = 25 x 14 x 14

    # Second Convolution
    nn.Conv2d(25, 50, kernel_size, stride = 1, padding = 1),            # Convolutional Layer with 50  3 x 3 kernels, Output Size = 50 x 14 x 14
    nn.BatchNorm2d(50),
    nn.ReLU(),
    nn.Dropout(.2),
    nn.MaxPool2d(2, stride = 2),                                        # Output size = 50 x 7 x 7

    # Third Convolution
    nn.Conv2d(50, 75, kernel_size, stride = 1, padding = 1),            # Convolutional Layer with 75  3 x 3 kernels, Output Size = 75 x 7 x 7
    nn.BatchNorm2d(75),
    nn.ReLU(),
    nn.MaxPool2d(2, stride = 2),                                        # Output size = 75 x 3 x 3

    # Flatten to Dense Network
    nn.Flatten(),                                                       # Flattens the 3D tensor of size 75 x 3 x 3 to a 1D tensor of size 75*3*3 = 675
    nn.Linear(flattened_img_size, 512),
    nn.Dropout(0.3),
    nn.ReLU(),
    nn.Linear(512, n_classes)                                           # Dont need an activation function after as PyTorch's CrossEntropyLoss 
                                                                        # is numerically more when taking raw logits rather probabilities from a softmax 

)

### Compiling the Model

In [47]:
model = torch.compile(model.to(device))

### Training the Model

In [57]:
loss_function = nn.CrossEntropyLoss()              # We use the categorical cross entropy for calculating the loss function
optimizer     = Adam(model.parameters())           # Adam Optimizer for minimizing the loss

In [58]:
def get_batch_accuracy(output, y, N):
    
    # selects index of highest value along dimension 1 = Predicted Value
    pred = output.argmax(dim = 1, keepdim = True)
    
    correct = pred.eq(y.view_as(pred)).sum().item()
    # y.view_as(pred) reshapes y to match pred's shape
    # pred.eq(y) checks where predictions equal actual labels
    # sum() counts how many correct predictions
    # item() gets the actual number as a Python scalar
    
    return correct / N           # For adding accuracy of all batches per epoch

In [59]:
def train():
    loss = 0
    accuracy = 0

    model.train()                                   # Sets Model to Training Mode
    for x, y in train_loader:
        output = model(x)                           # Forward Propagation
        optimizer.zero_grad()                       # Clearing Previous Gradients
        batch_loss = loss_function(output, y)       # Calculating Loss of the batch
        batch_loss.backward()                       # Backpropagating
        optimizer.step()                            # Weight Adjustment

        loss += batch_loss.item()                          
        accuracy += get_batch_accuracy(output, y, train_N)

    print('Training Metrics: Loss: {:.4f}, Accuracy: {:.4f}'. format(loss, accuracy))

In [60]:
def validate():
    loss = 0
    accuracy = 0

    model.eval()                                                 # Sets model to validation mode
    with torch.no_grad():                                        # Turns off gradient computation
        for x,y in valid_loader:
            output = model(x)                                    # Forward pass

            loss += loss_function(output, y).item()
            accuracy += get_batch_accuracy(output, y, valid_N)

    print('Validation Metrics: Loss: {:.4f}, Accuracy: {:.4f}'. format(loss, accuracy))

In [61]:
epochs = 20

for epoch in range(epochs):
    print('Epoch: {}'. format(epoch))
    train()
    validate()

Epoch: 0
Training Metrics: Loss: 285.1434, Accuracy: 0.8994
Validation Metrics: Loss: 37.6811, Accuracy: 0.9414
Epoch: 1
Training Metrics: Loss: 15.6213, Accuracy: 0.9955
Validation Metrics: Loss: 33.3949, Accuracy: 0.9537
Epoch: 2
Training Metrics: Loss: 15.8443, Accuracy: 0.9949
Validation Metrics: Loss: 31.6670, Accuracy: 0.9582
Epoch: 3
Training Metrics: Loss: 9.9800, Accuracy: 0.9968
Validation Metrics: Loss: 41.4296, Accuracy: 0.9385
Epoch: 4
Training Metrics: Loss: 10.9114, Accuracy: 0.9962
Validation Metrics: Loss: 49.7890, Accuracy: 0.9313
Epoch: 5
Training Metrics: Loss: 7.2893, Accuracy: 0.9973
Validation Metrics: Loss: 21.4413, Accuracy: 0.9738
Epoch: 6
Training Metrics: Loss: 9.9572, Accuracy: 0.9966
Validation Metrics: Loss: 17.6262, Accuracy: 0.9788
Epoch: 7
Training Metrics: Loss: 2.3523, Accuracy: 0.9992
Validation Metrics: Loss: 20.3253, Accuracy: 0.9739
Epoch: 8
Training Metrics: Loss: 7.4049, Accuracy: 0.9976
Validation Metrics: Loss: 22.6906, Accuracy: 0.9773
Epoch

### Shutting down IPython Kernel (Python Interpreter in the Jupyter Notebook)

In [78]:
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}