Import dependencies

In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

Preprocess and load EMNIST dataset

In [3]:


# Define transformation for data preprocessing
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Ensure it's grayscale
    transforms.Resize((28, 28)),                  # Resize to 28x28 pixels
    transforms.ToTensor(),                        # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))          # Normalize between -1 and 1
])

# Load dataset (using EMNIST Balanced for both letters and digits)
train_dataset = datasets.EMNIST(root='./data', split='balanced', train=True, transform=transform, download=True)
test_dataset = datasets.EMNIST(root='./data', split='balanced', train=False, transform=transform, download=True)

# DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)


Define logic behind CNN

In [4]:


# Define a CNN for OCR
class OCRCNN(nn.Module):
    def __init__(self):
        super(OCRCNN, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        
        # Max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(64 * 7 * 7, 128)  # Adjust for 28x28 image size after conv and pooling layers
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 47)  # 47 classes for the EMNIST Balanced dataset (letters + digits)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # Convolution -> ReLU -> MaxPool
        x = self.pool(F.relu(self.conv2(x)))
        
        x = x.view(-1, 64 * 7 * 7)  # Flatten the output for fully connected layers
        
        x = F.relu(self.fc1(x))     # Fully connected layer 1 with ReLU
        x = F.relu(self.fc2(x))     # Fully connected layer 2 with ReLU
        x = self.fc3(x)             # Output layer (logits, no activation here)
        return x


Initialize model and begin training using CUDA for GPU acceleration.

In [8]:


# Initialize model, loss function, and optimizer
model = OCRCNN()
criterion = nn.CrossEntropyLoss()  # For classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer
num_epochs = 100
if torch.cuda.is_available():
    print("CUDA Available. Continuing with 100 epochs...")
else:
    num_epochs = 0

# Move to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model.to(device)

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the parameter gradients

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')


CUDA Available. Continuing with 100 epochs
Epoch [1/100], Loss: 0.7869
Epoch [2/100], Loss: 0.4295
Epoch [3/100], Loss: 0.3733
Epoch [4/100], Loss: 0.3381
Epoch [5/100], Loss: 0.3131
Epoch [6/100], Loss: 0.2893
Epoch [7/100], Loss: 0.2709
Epoch [8/100], Loss: 0.2551
Epoch [9/100], Loss: 0.2407
Epoch [10/100], Loss: 0.2276
Epoch [11/100], Loss: 0.2173
Epoch [12/100], Loss: 0.2067
Epoch [13/100], Loss: 0.1974
Epoch [14/100], Loss: 0.1895
Epoch [15/100], Loss: 0.1814
Epoch [16/100], Loss: 0.1729
Epoch [17/100], Loss: 0.1666
Epoch [18/100], Loss: 0.1606
Epoch [19/100], Loss: 0.1556
Epoch [20/100], Loss: 0.1489
Epoch [21/100], Loss: 0.1430
Epoch [22/100], Loss: 0.1402
Epoch [23/100], Loss: 0.1360
Epoch [24/100], Loss: 0.1305
Epoch [25/100], Loss: 0.1274
Epoch [26/100], Loss: 0.1225
Epoch [27/100], Loss: 0.1199
Epoch [28/100], Loss: 0.1175
Epoch [29/100], Loss: 0.1137
Epoch [30/100], Loss: 0.1114
Epoch [31/100], Loss: 0.1041
Epoch [32/100], Loss: 0.1058
Epoch [33/100], Loss: 0.1030
Epoch [34

KeyboardInterrupt: 

Evaluate model on remaining data.

In [10]:
model.eval()  # Set model to evaluation mode
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the model on the test images: {accuracy:.2f}%')


Accuracy of the model on the test images: 85.24%
