The dataset for Sign Language MNIST comes from https://www.kaggle.com/datasets/datamunge/sign-language-mnist

In [1]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
import pandas as pd
from PIL import Image
from torchvision.io import read_image
import numpy as np

In [2]:
class ModifiedLeNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.neural_network = nn.Sequential(
            
            # Layer 1
            nn.Conv2d(1,6,5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU(),
            
            # Layer 2
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU(),
            
            # Layer 3
            nn.Conv2d(16, 120, 5),
            nn.ReLU(),
            
            # FC
            nn.Flatten(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 25),
        )

    def forward(self, x):
        return self.neural_network(x)

In [3]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_path, transform=None, target_transform=None):
        self.data = pd.read_csv(csv_path)
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = Image.fromarray(np.reshape(self.data.iloc[idx, 1:], (28, 28)).astype(np.uint8))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [4]:
transform = transforms.Compose(
    [
        transforms.Pad(2),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ]
)


training_set = CustomImageDataset("data/signs/sign_mnist_train.csv", transform=transform)
test_set = CustomImageDataset("data/signs/sign_mnist_test.csv", transform=transform)

training_loader = DataLoader(training_set, batch_size=4, shuffle=True)
test_loader = DataLoader(test_set, batch_size=4, shuffle=False)

In [5]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, data in enumerate(dataloader): 
        (X, y) = data # len(X) = 10
        
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 1000 == 0 and batch > 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [6]:
model = ModifiedLeNet()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
loss_fn = torch.nn.CrossEntropyLoss()

epochs = 3
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(training_loader, model, loss_fn, optimizer)
    test_loop(test_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 3.154349  [ 4000/27455]
loss: 3.070931  [ 8000/27455]
loss: 1.754682  [12000/27455]
loss: 1.248746  [16000/27455]
loss: 0.459929  [20000/27455]
loss: 0.371831  [24000/27455]
Test Error: 
 Accuracy: 83.6%, Avg loss: 0.511846 

Epoch 2
-------------------------------
loss: 0.316371  [ 4000/27455]
loss: 0.008893  [ 8000/27455]
loss: 0.000030  [12000/27455]
loss: 0.034138  [16000/27455]
loss: 0.000096  [20000/27455]
loss: 0.011527  [24000/27455]
Test Error: 
 Accuracy: 91.0%, Avg loss: 0.377763 

Epoch 3
-------------------------------
loss: 0.001889  [ 4000/27455]
loss: 0.000772  [ 8000/27455]
loss: 0.001391  [12000/27455]
loss: 0.001600  [16000/27455]
loss: 0.000037  [20000/27455]
loss: 0.000912  [24000/27455]
Test Error: 
 Accuracy: 91.3%, Avg loss: 0.374232 

Done!
