In [1]:
import torch
from torchvision import datasets, transforms

# Define transformations for the dataset
transform = transforms.Compose([
    transforms.Grayscale(),  # Ensure images are grayscale
    transforms.ToTensor(),   # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize with mean and std
])

# Load training and testing datasets
train_dataset = datasets.EMNIST(root='./data', split='letters', train=True, download=True, transform=transform)
test_dataset = datasets.EMNIST(root='./data', split='letters', train=False, download=True, transform=transform)

# DataLoaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

Downloading https://biometrics.nist.gov/cs_links/EMNIST/gzip.zip to ./data/EMNIST/raw/gzip.zip


100%|██████████| 561753746/561753746 [00:27<00:00, 20762719.80it/s]


Extracting ./data/EMNIST/raw/gzip.zip to ./data/EMNIST/raw


# PyTorch Docs

Convolution -> https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

Max Pool -> https://pytorch.org/docs/stable/generated/torch.nn.functional.max_pool2d.html

ReLU (Rectified Linear Unit) -> https://pytorch.org/docs/stable/generated/torch.nn.functional.relu.html

Fully Connected (FC Linear Layer) -> https://pytorch.org/docs/stable/generated/torch.nn.Linear.html

Flatten has been done for you, do it before you start putting FC layers

In [4]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        # # Convolutional layers
        # self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)

        # # Fully connected layers
        # self.fc1 = nn.Linear(64 * 3 * 3, 128)  # Flattened size from conv3

    def forward(self, x):
        # # Convolutional layers with ReLU and MaxPooling
        # x = F.relu(F.max_pool2d(self.conv1(x), 2))  # Output: (16, 14, 14)

        # Flatten
        x = x.view(x.size(0), -1)  # Shape: (batch_size, 64*3*3)

        # # Fully connected layers
        # x = F.relu(self.fc1(x))
        
        return x

In [5]:
model = SimpleCNN()
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total Parameters: {total_params}")
# Keep it below 150k params

Total Parameters: 100635


In [6]:
import torch.optim as optim

# Don't worry about this stuff, its outside our scope. If you want to know more, ask one of us

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [7]:
import torch
from torch.utils.data import random_split

# Define the train-validation split so you know that your model is not overfitting

train_size = int(0.9 * len(train_dataset))  # 90% for training
val_size = len(train_dataset) - train_size  # Remaining 10% for validation

train_subset, val_subset = random_split(train_dataset, [train_size, val_size])

# DataLoaders for train and validation subsets
train_loader = torch.utils.data.DataLoader(train_subset, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_subset, batch_size=64, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Outside scope of workshop for syntax but the process has been explained to you during Gradient Descent
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    
    # Training loop
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)
    
    train_loss = running_loss / len(train_loader)
    train_accuracy = (correct_train / total_train) * 100
    
    # Validation loop
    model.eval()
    val_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)
    
    val_loss /= len(val_loader)
    val_accuracy = (correct_val / total_val) * 100
    
    # Print results for this epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")


Epoch [1/10], Train Loss: 0.5621, Train Accuracy: 82.38%, Val Loss: 0.2844, Val Accuracy: 90.54%
Epoch [2/10], Train Loss: 0.2525, Train Accuracy: 91.67%, Val Loss: 0.2406, Val Accuracy: 92.05%
Epoch [3/10], Train Loss: 0.2083, Train Accuracy: 92.99%, Val Loss: 0.2004, Val Accuracy: 93.29%
Epoch [4/10], Train Loss: 0.1821, Train Accuracy: 93.74%, Val Loss: 0.1903, Val Accuracy: 93.46%
Epoch [5/10], Train Loss: 0.1646, Train Accuracy: 94.26%, Val Loss: 0.1832, Val Accuracy: 93.77%
Epoch [6/10], Train Loss: 0.1504, Train Accuracy: 94.62%, Val Loss: 0.1853, Val Accuracy: 93.93%
Epoch [7/10], Train Loss: 0.1387, Train Accuracy: 94.96%, Val Loss: 0.1841, Val Accuracy: 93.77%
Epoch [8/10], Train Loss: 0.1287, Train Accuracy: 95.24%, Val Loss: 0.1890, Val Accuracy: 93.89%
Epoch [9/10], Train Loss: 0.1191, Train Accuracy: 95.56%, Val Loss: 0.1899, Val Accuracy: 93.87%
Epoch [10/10], Train Loss: 0.1142, Train Accuracy: 95.64%, Val Loss: 0.1973, Val Accuracy: 93.80%


In [8]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 93.46%


In [8]:
torch.save(model.state_dict(), "simple_cnn_ocr.pth")