# HW 6: Character Classification using CNNs with PyTorch

## Step 1: Data Acquisition + Cleanup

In [77]:
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import re


def preprocess_dataframe(df):
    # Regular expression pattern to match the pixel columns
    rxcy_pattern = re.compile(r'r\d+c\d+')
    pixel_columns_times = [
        col for col in df.columns if rxcy_pattern.match(col)]

    # Select the 'm_label' column and the pixel value columns
    df = df[['m_label'] + pixel_columns_times]

    # Get unique characters and create mapping dictionaries
    unique_chars_times = sorted(df['m_label'].unique())
    num_classes_times = len(unique_chars_times)
    char_to_index_times = {char: index for index,
                           char in enumerate(unique_chars_times)}
    index_to_char_times = {index: char for index,
                           char in enumerate(unique_chars_times)}

    # Normalize and reshape the pixel values
    Xs_times = df.drop('m_label', axis=1).to_numpy(dtype=np.float32) / 255.0
    Xs_times = Xs_times.reshape(Xs_times.shape[0], 20, 20)

    # Encode the labels
    Ys_times = np.array([char_to_index_times[char] for char in df['m_label']])

    return Xs_times, Ys_times, char_to_index_times, index_to_char_times, num_classes_times


# Load the dataframe from the CSV file
df_times = pd.read_csv('fonts/TIMES.csv')

# Preprocess the dataframe
Xs_times, Ys_times, char_to_index_times, index_to_char_times, num_classes_times = preprocess_dataframe(
    df_times)

# Reshape Xs to match PyTorch's input format
# This indicates that the height and width of the input images are both 20 pixels
Xs_times = np.reshape(Xs_times, (-1, 1, 20, 20))

# Create a TensorDataset from Xs and Ys
dataset_times = TensorDataset(torch.from_numpy(
    Xs_times), torch.from_numpy(Ys_times))

print("num_classes_times:", num_classes_times)

# Split the dataset into training and testing sets
X_train_times, X_test_times, Y_train_times, Y_test_times = train_test_split(
    Xs_times, Ys_times, test_size=0.2, random_state=42)

# Create TensorDatasets for training and testing
train_dataset_times = TensorDataset(
    torch.from_numpy(X_train_times), torch.from_numpy(Y_train_times))
test_dataset_times = TensorDataset(
    torch.from_numpy(X_test_times), torch.from_numpy(Y_test_times))


num_classes_times: 3087


In [78]:
print("Training dataset times shape:",
      train_dataset_times[0][0].shape, train_dataset_times[0][1].shape)
print("Testing dataset times shape:",
      test_dataset_times[0][0].shape, test_dataset_times[0][1].shape)

print("Number of samples in the training dataset times:", len(train_dataset_times))
print("Number of samples in the testing dataset times:", len(test_dataset_times))


Training dataset times shape: torch.Size([1, 20, 20]) torch.Size([])
Testing dataset times shape: torch.Size([1, 20, 20]) torch.Size([])
Number of samples in the training dataset times: 10184
Number of samples in the testing dataset times: 2546


## Step 2: Build a PyTorch Network

In [79]:
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        c1Out = 64  
        c2Out = 128  
        self.conv1 = nn.Conv2d(1, c1Out, 3)
        # after pooling: 9 x 9 x c1Out
        self.pool = nn.MaxPool2d(2, 2)  
        self.conv2 = nn.Conv2d(c1Out, c2Out, 3)
        # 7 x 7 x c2Out = 7 x 7 x 128 = 6272

        self.pooledOutputSize = c2Out * 7 * 7 
        self.fc1 = nn.Linear(self.pooledOutputSize, 5000)
        self.fc2 = nn.Linear(5000, 4000)
        self.fc3 = nn.Linear(4000, num_classes)

    def forward(self, x):  
        x = self.pool(F.relu(self.conv1(x)))  
        x = F.relu(self.conv2(x)) 
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x)) 
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    #compute the output size after our convolution layers
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = Net(num_classes_times)

In [80]:
def train_model(model, epochs, dataset):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    trainloader = torch.utils.data.DataLoader(
        dataset, batch_size=32, shuffle=True, num_workers=0)

    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            if i % 50 == 49:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 200))
                running_loss = 0.0

    print('Finished Training')


In [81]:
print( "Training... times old model" )
train_model( net, 100, train_dataset_times )

Training... times old model
[1,    50] loss: 1.980
[1,   100] loss: 1.929
[1,   150] loss: 1.873
[1,   200] loss: 1.827
[1,   250] loss: 1.828
[1,   300] loss: 1.756
[2,    50] loss: 1.539
[2,   100] loss: 1.450
[2,   150] loss: 1.350
[2,   200] loss: 1.307
[2,   250] loss: 1.232
[2,   300] loss: 1.195
[3,    50] loss: 0.798
[3,   100] loss: 0.814
[3,   150] loss: 0.762
[3,   200] loss: 0.771
[3,   250] loss: 0.752
[3,   300] loss: 0.703
[4,    50] loss: 0.512
[4,   100] loss: 0.533
[4,   150] loss: 0.544
[4,   200] loss: 0.529
[4,   250] loss: 0.548
[4,   300] loss: 0.522
[5,    50] loss: 0.408
[5,   100] loss: 0.410
[5,   150] loss: 0.412
[5,   200] loss: 0.427
[5,   250] loss: 0.440
[5,   300] loss: 0.440
[6,    50] loss: 0.333
[6,   100] loss: 0.367
[6,   150] loss: 0.362
[6,   200] loss: 0.373
[6,   250] loss: 0.378
[6,   300] loss: 0.365
[7,    50] loss: 0.290
[7,   100] loss: 0.315
[7,   150] loss: 0.321
[7,   200] loss: 0.335
[7,   250] loss: 0.332
[7,   300] loss: 0.350
[8,   

## Step 3: Exploration and Evaluation

### Evaluate the network using cross validation (splitting data into training/testing). What is its accuracy?

In [82]:
def evaluate(model, dataset):
    #load some test data
    testloader = torch.utils.data.DataLoader(dataset, batch_size=32,
                                             shuffle=True, num_workers=0)
    correct = 0
    total = 0

    with torch.no_grad():  # <- Since we are not training, the model does not need to calculate gradients
        for data in testloader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Just do a coarse evaluation... how many did we predict correcly?
    print('Accuracy of the network on the test images: %d %%' %
          (100 * correct / total))
    
evaluate(net, test_dataset_times)


Accuracy of the network on the test images: 39 %


### Create and train a different network topology (add more convolution layers, experiment with normalization (batch normalization or dropout), explore other types/sizes of layer). Try to find a topology that works better than the one described above.

In [83]:
class NewNet(nn.Module):
    def __init__(self, num_classes):
        super(NewNet, self).__init__()
        c1Out = 25
        c2Out = 50
        c3Out = 100

        # input size: 20x20x1
        self.conv1 = nn.Conv2d(1, c1Out, 3)
        self.pool = nn.MaxPool2d(2, 2)  
        # output size: 18x18xc1Out

        self.conv2 = nn.Conv2d(c1Out, c2Out, 3)
        #  output size: 16x16xc2Out

        self.conv3 = nn.Conv2d(c2Out, c3Out, 3)
        # output size: 7x7xc3Out = 7x7x100 = 4900

        self.pooledOutputSize = c3Out*7*7

        self.fc1 = nn.Linear(self.pooledOutputSize, 4000)
        self.fc2 = nn.Linear(4000, 3500)
        self.fc3 = nn.Linear(3500, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

new_net = NewNet(num_classes_times)

In [84]:
print( "Training... times new model" )
train_model( new_net, 100, train_dataset_times)

Training... times new model
[1,    50] loss: 1.992
[1,   100] loss: 1.962
[1,   150] loss: 1.908
[1,   200] loss: 1.879
[1,   250] loss: 1.841
[1,   300] loss: 1.807
[2,    50] loss: 1.651
[2,   100] loss: 1.591
[2,   150] loss: 1.590
[2,   200] loss: 1.495
[2,   250] loss: 1.430
[2,   300] loss: 1.402
[3,    50] loss: 1.060
[3,   100] loss: 1.008
[3,   150] loss: 0.988
[3,   200] loss: 0.936
[3,   250] loss: 0.898
[3,   300] loss: 0.836
[4,    50] loss: 0.625
[4,   100] loss: 0.648
[4,   150] loss: 0.650
[4,   200] loss: 0.617
[4,   250] loss: 0.623
[4,   300] loss: 0.604
[5,    50] loss: 0.458
[5,   100] loss: 0.484
[5,   150] loss: 0.493
[5,   200] loss: 0.493
[5,   250] loss: 0.497
[5,   300] loss: 0.495
[6,    50] loss: 0.374
[6,   100] loss: 0.402
[6,   150] loss: 0.405
[6,   200] loss: 0.423
[6,   250] loss: 0.419
[6,   300] loss: 0.420
[7,    50] loss: 0.326
[7,   100] loss: 0.339
[7,   150] loss: 0.354
[7,   200] loss: 0.375
[7,   250] loss: 0.370
[7,   300] loss: 0.382
[8,   

In [85]:
evaluate(new_net, test_dataset_times)


Accuracy of the network on the test images: 37 %


### Test the accuracy of your network with character inputs from a DIFFERENT font set. How does it perform?

In [86]:
# Load the dataframe from the CSV file
df_calibri = pd.read_csv('fonts/CALIBRI.csv')

# Preprocess the dataframe
Xs_calibri, Ys_calibri, char_to_index_calibri, index_to_char_calibri, num_classes_calibri = preprocess_dataframe(
    df_calibri)

# Reshape Xs to match PyTorch's input format
# This indicates that the height and width of the input images are both 20 pixels
Xs_calibri = np.reshape(Xs_calibri, (-1, 1, 20, 20))

# Create a TensorDataset from Xs and Ys
dataset_calibri = TensorDataset(torch.from_numpy(
    Xs_calibri), torch.from_numpy(Ys_calibri))

print("num_classes_calibri:", num_classes_calibri)

# Split the dataset into training and testing sets
X_train_calibri, X_test_calibri, Y_train_calibri, Y_test_calibri = train_test_split(
    Xs_calibri, Ys_calibri, test_size=0.2, random_state=42)

num_classes_calibri: 2384


In [87]:
print("Dataset calibri shape:",
      dataset_calibri[0][0].shape, dataset_calibri[0][1].shape)

print("Number of samples in the dataset calibri:", len(dataset_calibri))


Dataset calibri shape: torch.Size([1, 20, 20]) torch.Size([])
Number of samples in the dataset calibri: 19068


In [88]:
evaluate(net, dataset_calibri)


Accuracy of the network on the test images: 6 %


In [89]:
evaluate(new_net, dataset_calibri)


Accuracy of the network on the test images: 5 %
