The dataset for Sign Language MNIST comes from https://www.kaggle.com/datasets/datamunge/sign-language-mnist

In [1]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
import pandas as pd
from PIL import Image
from torchvision.io import read_image
import numpy as np

In [2]:
class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.neural_network = nn.Sequential(
            
            # Layer 1
            nn.Conv2d(1,6,5),
            nn.Tanh(),
            nn.AvgPool2d(2, 2),
            nn.Tanh(),
            
            # Layer 2
            nn.Conv2d(6, 16, 5),
            nn.Tanh(),
            nn.AvgPool2d(2, 2),
            nn.Tanh(),
            
            # Layer 3
            nn.Conv2d(16, 120, 5),
            nn.Tanh(),
            
            # FC
            nn.Flatten(),
            nn.Linear(120, 84),
            nn.Tanh(),
            nn.Linear(84, 25),
        )

    def forward(self, x):
        return self.neural_network(x)

In [3]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_path, transform=None, target_transform=None):
        self.data = pd.read_csv(csv_path)
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = read_image(Image.fromarray(np.reshape(self.data.iloc[idx, 1:], (28, 28)).astype(np.uint8)))
#         image = np.reshape(self.data.iloc[idx, 1:], (28, 28)).astype(np.uint8)
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [4]:
transform = transforms.Compose(
    [
        transforms.Pad(2),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ]
)


training_set = CustomImageDataset("data/signs/sign_mnist_train.csv", transform=transform)
test_set = CustomImageDataset("data/signs/sign_mnist_test.csv")

training_loader = DataLoader(training_set, batch_size=4, shuffle=True)
test_loader = DataLoader(test_set, batch_size=4, shuffle=False)

In [5]:
model = LeNet()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
loss_fn = torch.nn.CrossEntropyLoss()

In [6]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, data in enumerate(dataloader): 
        (X, y) = data # len(X) = 10
        
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 1000 == 0 and batch > 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [7]:
model = LeNet()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
loss_fn = torch.nn.CrossEntropyLoss()

epochs = 3
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(training_loader, model, loss_fn, optimizer)
    test_loop(test_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------


RuntimeError: image::read_file() Expected a value of type 'str' for argument '_0' but instead found type 'Image'.
Position: 0
Value: <PIL.Image.Image image mode=L size=28x28 at 0x2A2586DA010>
Declaration: image::read_file(str _0) -> Tensor _0
Cast error details: Unable to cast Python instance of type <class 'PIL.Image.Image'> to C++ type '?' (#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for details)