# HW 6: Character Classification using CNNs with PyTorch

## Step 1: Data Acquisition + Cleanup

In [19]:
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import re

def preprocess_dataframe(df):
    # Regular expression pattern to match the pixel columns
    rxcy_pattern = re.compile(r'r\d+c\d+')
    pixel_columns_times = [
        col for col in df.columns if rxcy_pattern.match(col)]

    # Select the 'm_label' column and the pixel value columns
    df = df[['m_label'] + pixel_columns_times]

    # Calculate label counts
    label_counts = df['m_label'].value_counts()

    # Filter labels in the range of [0, 255]
    filtered_labels = label_counts.index[
        np.logical_and(label_counts.index >= 0, label_counts.index <= 255)]

    # Filter the dataframe to keep only the records with labels in filtered_labels
    df = df[df['m_label'].isin(filtered_labels)]

    # Get unique characters and create mapping dictionaries
    unique_chars_times = sorted(filtered_labels)
    num_classes_times = len(unique_chars_times)
    char_to_index_times = {char: index for index,
                           char in enumerate(unique_chars_times)}
    index_to_char_times = {index: char for index,
                           char in enumerate(unique_chars_times)}

    # Normalize and reshape the pixel values
    Xs_times = df.drop('m_label', axis=1).to_numpy(dtype=np.float32) / 255.0
    Xs_times = Xs_times.reshape(Xs_times.shape[0], 20, 20)

    # Encode the labels
    Ys_times = np.array([char_to_index_times[char] for char in df['m_label']])

    return Xs_times, Ys_times, char_to_index_times, index_to_char_times, num_classes_times


# Load the dataframe from the CSV file
df_times = pd.read_csv('fonts/TIMES.csv')

# Preprocess the dataframe
Xs_times, Ys_times, char_to_index_times, index_to_char_times, num_classes_times = preprocess_dataframe(
    df_times)

# Reshape Xs to match PyTorch's input format
# This indicates that the height and width of the input images are both 20 pixels
Xs_times = np.reshape(Xs_times, (-1, 1, 20, 20))

# Create a TensorDataset from Xs and Ys
dataset_times = TensorDataset(torch.from_numpy(
    Xs_times), torch.from_numpy(Ys_times))

print("num_classes_times:", num_classes_times)

# Split the dataset into training and testing sets
X_train_times, X_test_times, Y_train_times, Y_test_times = train_test_split(
    Xs_times, Ys_times, test_size=0.2, random_state=42)

# Create TensorDatasets for training and testing
train_dataset_times = TensorDataset(
    torch.from_numpy(X_train_times), torch.from_numpy(Y_train_times))
test_dataset_times = TensorDataset(
    torch.from_numpy(X_test_times), torch.from_numpy(Y_test_times))


num_classes_times: 188


In [20]:
print("Training dataset times shape:",
      train_dataset_times[0][0].shape, train_dataset_times[0][1].shape)
print("Testing dataset times shape:",
      test_dataset_times[0][0].shape, test_dataset_times[0][1].shape)

print("Number of samples in the training dataset times:", len(train_dataset_times))
print("Number of samples in the testing dataset times:", len(test_dataset_times))


Training dataset times shape: torch.Size([1, 20, 20]) torch.Size([])
Testing dataset times shape: torch.Size([1, 20, 20]) torch.Size([])
Number of samples in the training dataset times: 1976
Number of samples in the testing dataset times: 494


## Step 2: Build a PyTorch Network

In [21]:
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        c1Out = 32  
        c2Out = 64  
        self.conv1 = nn.Conv2d(1, c1Out, 3)
        # after pooling: 9 x 9 x c1Out = 9 x 9 x 32 = 2592
        self.pool = nn.MaxPool2d(2, 2)  
        self.conv2 = nn.Conv2d(c1Out, c2Out, 3)
        # after pooling: 3 x 3 x c2Out = 3 x 3 x 64 = 576

        self.pooledOutputSize = c2Out * 3 * 3 
        self.fc1 = nn.Linear(self.pooledOutputSize, 400)
        self.fc2 = nn.Linear(400, 250)
        self.fc3 = nn.Linear(250, num_classes) 


    def forward(self, x):  
        x = self.pool(F.relu(self.conv1(x)))  
        x = self.pool(F.relu(self.conv2(x)))  
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))  
        x = F.relu(self.fc2(x)) 
        x = self.fc3(x)
        return x

    #compute the output size after our convolution layers
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = Net(num_classes_times)

In [22]:
def train_model(model, epochs, dataset):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    trainloader = torch.utils.data.DataLoader(
        dataset, batch_size=32, shuffle=True, num_workers=0)

    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            if i % 25 == 24:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 200))
                running_loss = 0.0

    print('Finished Training')


In [23]:
print( "Training... times old model" )
train_model( net, 1000, train_dataset_times )

Training... times old model
[1,    25] loss: 0.651
[1,    50] loss: 0.625
[2,    25] loss: 0.551
[2,    50] loss: 0.532
[3,    25] loss: 0.506
[3,    50] loss: 0.499
[4,    25] loss: 0.466
[4,    50] loss: 0.442
[5,    25] loss: 0.396
[5,    50] loss: 0.391
[6,    25] loss: 0.359
[6,    50] loss: 0.341
[7,    25] loss: 0.317
[7,    50] loss: 0.327
[8,    25] loss: 0.296
[8,    50] loss: 0.285
[9,    25] loss: 0.261
[9,    50] loss: 0.254
[10,    25] loss: 0.227
[10,    50] loss: 0.234
[11,    25] loss: 0.197
[11,    50] loss: 0.213
[12,    25] loss: 0.182
[12,    50] loss: 0.194
[13,    25] loss: 0.176
[13,    50] loss: 0.175
[14,    25] loss: 0.162
[14,    50] loss: 0.159
[15,    25] loss: 0.150
[15,    50] loss: 0.150
[16,    25] loss: 0.145
[16,    50] loss: 0.140
[17,    25] loss: 0.127
[17,    50] loss: 0.139
[18,    25] loss: 0.122
[18,    50] loss: 0.129
[19,    25] loss: 0.107
[19,    50] loss: 0.127
[20,    25] loss: 0.118
[20,    50] loss: 0.110
[21,    25] loss: 0.102
[21,  

## Step 3: Exploration and Evaluation

### Evaluate the network using cross validation (splitting data into training/testing). What is its accuracy?

In [24]:
def evaluate(model, dataset):
    #load some test data
    testloader = torch.utils.data.DataLoader(dataset, batch_size=32,
                                             shuffle=True, num_workers=0)
    correct = 0
    total = 0

    with torch.no_grad():  # <- Since we are not training, the model does not need to calculate gradients
        for data in testloader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Just do a coarse evaluation... how many did we predict correcly?
    print('Accuracy of the network on the test images: %d %%' %
          (100 * correct / total))
    
evaluate(net, test_dataset_times)


Accuracy of the network on the test images: 82 %


### Create and train a different network topology (add more convolution layers, experiment with normalization (batch normalization or dropout), explore other types/sizes of layer). Try to find a topology that works better than the one described above.

In [25]:
class NewNet(nn.Module):
    def __init__(self, num_classes):
        super(NewNet, self).__init__()
        c1Out = 16
        c2Out = 32
        c3Out = 64

        # input size: 20x20x1
        self.conv1 = nn.Conv2d(1, c1Out, 3)
        self.pool = nn.MaxPool2d(2, 2)  
        # output size: 18x18x16

        self.conv2 = nn.Conv2d(c1Out, c2Out, 3)
        #  output size: 8x8x32

        self.conv3 = nn.Conv2d(c2Out, c3Out, 3)
        # output size: 3x3x64 = 576

        self.pooledOutputSize = c3Out*3*3

        self.fc1 = nn.Linear(self.pooledOutputSize, 400)
        self.fc2 = nn.Linear(400, 250)
        self.fc3 = nn.Linear(250, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        # print(x.shape)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

new_net = NewNet(num_classes_times)

In [26]:
print( "Training... times new model" )
train_model( new_net, 1000, train_dataset_times)

Training... times new model
[1,    25] loss: 0.653
[1,    50] loss: 0.636
[2,    25] loss: 0.543
[2,    50] loss: 0.521
[3,    25] loss: 0.495
[3,    50] loss: 0.458
[4,    25] loss: 0.405
[4,    50] loss: 0.381
[5,    25] loss: 0.348
[5,    50] loss: 0.308
[6,    25] loss: 0.284
[6,    50] loss: 0.278
[7,    25] loss: 0.248
[7,    50] loss: 0.238
[8,    25] loss: 0.216
[8,    50] loss: 0.194
[9,    25] loss: 0.188
[9,    50] loss: 0.171
[10,    25] loss: 0.166
[10,    50] loss: 0.157
[11,    25] loss: 0.146
[11,    50] loss: 0.143
[12,    25] loss: 0.130
[12,    50] loss: 0.139
[13,    25] loss: 0.124
[13,    50] loss: 0.124
[14,    25] loss: 0.110
[14,    50] loss: 0.111
[15,    25] loss: 0.105
[15,    50] loss: 0.108
[16,    25] loss: 0.097
[16,    50] loss: 0.100
[17,    25] loss: 0.090
[17,    50] loss: 0.099
[18,    25] loss: 0.075
[18,    50] loss: 0.097
[19,    25] loss: 0.089
[19,    50] loss: 0.077
[20,    25] loss: 0.075
[20,    50] loss: 0.078
[21,    25] loss: 0.070
[21,  

In [27]:
evaluate(new_net, test_dataset_times)


Accuracy of the network on the test images: 85 %


### Test the accuracy of your network with character inputs from a DIFFERENT font set. How does it perform?

In [28]:
# Load the dataframe from the CSV file
df_calibri = pd.read_csv('fonts/CALIBRI.csv')

# Preprocess the dataframe
Xs_calibri, Ys_calibri, char_to_index_calibri, index_to_char_calibri, num_classes_calibri = preprocess_dataframe(
    df_calibri)

# Reshape Xs to match PyTorch's input format
# This indicates that the height and width of the input images are both 20 pixels
Xs_calibri = np.reshape(Xs_calibri, (-1, 1, 20, 20))

# Create a TensorDataset from Xs and Ys
dataset_calibri = TensorDataset(torch.from_numpy(
    Xs_calibri), torch.from_numpy(Ys_calibri))

print("num_classes_calibri:", num_classes_calibri)

# Split the dataset into training and testing sets
X_train_calibri, X_test_calibri, Y_train_calibri, Y_test_calibri = train_test_split(
    Xs_calibri, Ys_calibri, test_size=0.2, random_state=42)

num_classes_calibri: 188


In [32]:
print("Dataset calibri shape:",
      dataset_calibri[0][0].shape, dataset_calibri[0][1].shape)

print("Number of samples in the dataset calibri:", len(dataset_calibri))


Dataset calibri shape: torch.Size([1, 20, 20]) torch.Size([])
Number of samples in the dataset calibri: 1504


In [33]:
evaluate(net, dataset_calibri)


Accuracy of the network on the test images: 32 %


In [34]:
evaluate(new_net, dataset_calibri)


Accuracy of the network on the test images: 36 %
