# HW 6: Character Classification using CNNs with PyTorch

## Step 1: Data Acquisition + Cleanup

In [123]:
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import re

# Preprocess the dataframe


def preprocess_dataframe(df_times):
    # Regular expression pattern to match the pixel columns
    rxcy_pattern = re.compile(r'r\d+c\d+')
    pixel_columns_times = [
        col for col in df_times.columns if rxcy_pattern.match(col)]

    # Select the 'm_label' column and the pixel value columns
    df_times = df_times[['m_label'] + pixel_columns_times]

    # Get unique characters and create mapping dictionaries
    unique_chars_times = sorted(df_times['m_label'].unique())
    num_classes_times = len(unique_chars_times)
    char_to_index_times = {char: index for index,
                           char in enumerate(unique_chars_times)}
    index_to_char_times = {index: char for index,
                           char in enumerate(unique_chars_times)}

    # Normalize and reshape the pixel values
    Xs_times = df_times.drop('m_label', axis=1).to_numpy(
        dtype=np.float32) / 255.0
    Xs_times = Xs_times.reshape(Xs_times.shape[0], 20, 20)

    # Encode the labels
    Ys_times = np.array([char_to_index_times[char]
                        for char in df_times['m_label']])

    return Xs_times, Ys_times, char_to_index_times, index_to_char_times, num_classes_times


# Load the dataframe from the CSV file
df_times = pd.read_csv('fonts/TIMES.csv')

# Preprocess the dataframe
Xs_times, Ys_times, char_to_index_times, index_to_char_times, num_classes_times = preprocess_dataframe(
    df_times)

# Reshape Xs to match PyTorch's input format
# This indicates that the height and width of the input images are both 20 pixels
Xs_times = np.reshape(Xs_times, (-1, 1, 20, 20))

# Create a TensorDataset from Xs and Ys
dataset_times = TensorDataset(torch.from_numpy(
    Xs_times), torch.from_numpy(Ys_times))

print("num_classes_times:", num_classes_times)

# Split the dataset into training and testing sets
X_train_times, X_test_times, Y_train_times, Y_test_times = train_test_split(
    Xs_times, Ys_times, test_size=0.2, random_state=42)

# Create TensorDatasets for training and testing
train_dataset_times = TensorDataset(
    torch.from_numpy(X_train_times), torch.from_numpy(Y_train_times))
test_dataset_times = TensorDataset(
    torch.from_numpy(X_test_times), torch.from_numpy(Y_test_times))


num_classes_times: 3087


In [124]:
print("Training dataset times shape:",
      train_dataset_times[0][0].shape, train_dataset_times[0][1].shape)
print("Testing dataset times shape:",
      test_dataset_times[0][0].shape, test_dataset_times[0][1].shape)

print("Number of samples in the training dataset times:", len(train_dataset_times))
print("Number of samples in the testing dataset times:", len(test_dataset_times))


Training dataset times shape: torch.Size([1, 20, 20]) torch.Size([])
Testing dataset times shape: torch.Size([1, 20, 20]) torch.Size([])
Number of samples in the training dataset times: 10184
Number of samples in the testing dataset times: 2546


## Step 2: Build a PyTorch Network

In [125]:
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        c1Out = 256  
        c2Out = 512  
        self.conv1 = nn.Conv2d(1, c1Out, 3)
        # after pooling: 9 x 9 x c1Out = 9 x 9 x 256
        self.pool = nn.MaxPool2d(2, 2)  
        self.conv2 = nn.Conv2d(c1Out, c2Out, 3)
        # after pooling: 3 x 3 x c2Out = 3 x 3 x 512 = 4608

        self.pooledOutputSize = c2Out * 3 * 3 
        self.fc1 = nn.Linear(self.pooledOutputSize, 4000)
        self.fc2 = nn.Linear(4000, 3500)
        self.fc3 = nn.Linear(3500, num_classes_times) 


    def forward(self, x):  
        x = self.pool(F.relu(self.conv1(x)))  
        x = self.pool(F.relu(self.conv2(x)))  
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))  
        x = F.relu(self.fc2(x)) 
        x = self.fc3(x)
        return x

    #compute the output size after our convolution layers
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()

In [126]:
def train_model(model, epochs, dataset):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    trainloader = torch.utils.data.DataLoader(
        dataset, batch_size=32, shuffle=True, num_workers=0)

    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            if i % 50 == 49:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 200))
                running_loss = 0.0

    print('Finished Training')


In [127]:
print( "Training... times old model" )
train_model( net, 50, train_dataset_times )

Training... times old model
[1,    50] loss: 1.981
[1,   100] loss: 1.949
[1,   150] loss: 1.921
[1,   200] loss: 1.865
[1,   250] loss: 1.862
[1,   300] loss: 1.804
[2,    50] loss: 1.627
[2,   100] loss: 1.529
[2,   150] loss: 1.506
[2,   200] loss: 1.469
[2,   250] loss: 1.413
[2,   300] loss: 1.349
[3,    50] loss: 1.041
[3,   100] loss: 0.993
[3,   150] loss: 0.946
[3,   200] loss: 0.910
[3,   250] loss: 0.897
[3,   300] loss: 0.853
[4,    50] loss: 0.658
[4,   100] loss: 0.675
[4,   150] loss: 0.638
[4,   200] loss: 0.664
[4,   250] loss: 0.656
[4,   300] loss: 0.655
[5,    50] loss: 0.509
[5,   100] loss: 0.520
[5,   150] loss: 0.520
[5,   200] loss: 0.517
[5,   250] loss: 0.533
[5,   300] loss: 0.526
[6,    50] loss: 0.404
[6,   100] loss: 0.444
[6,   150] loss: 0.431
[6,   200] loss: 0.455
[6,   250] loss: 0.453
[6,   300] loss: 0.449
[7,    50] loss: 0.358
[7,   100] loss: 0.373
[7,   150] loss: 0.400
[7,   200] loss: 0.412
[7,   250] loss: 0.392
[7,   300] loss: 0.403
[8,   

## Step 3: Exploration and Evaluation

### Evaluate the network using cross validation (splitting data into training/testing). What is its accuracy?

In [128]:
def evaluate(model, dataset):
    #load some test data
    testloader = torch.utils.data.DataLoader(dataset, batch_size=32,
                                             shuffle=True, num_workers=0)
    correct = 0
    total = 0

    with torch.no_grad():  # <- Since we are not training, the model does not need to calculate gradients
        for data in testloader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Just do a coarse evaluation... how many did we predict correcly?
    print('Accuracy of the network on the test images: %d %%' %
          (100 * correct / total))
    
evaluate(net, test_dataset_times)


Accuracy of the network on the test images: 35 %


### Create and train a different network topology (add more convolution layers, experiment with normalization (batch normalization or dropout), explore other types/sizes of layer). Try to find a topology that works better than the one described above.

In [129]:
class NewNet(nn.Module):
    def __init__(self):
        super(NewNet, self).__init__()
        c1Out = 64
        c2Out = 128
        c3Out = 256

        # input size: 20x20x1
        self.conv1 = nn.Conv2d(1, c1Out, 3)
        self.pool = nn.MaxPool2d(2, 2)  
        # after pooling output size: 9x9x64

        self.conv2 = nn.Conv2d(c1Out, c2Out, 3)
        #  output size: 7x7x128

        self.conv3 = nn.Conv2d(c2Out, c3Out, 3)
        # output size: 5x5x256 = 6400

        self.pooledOutputSize = c3Out*5*5

        self.fc1 = nn.Linear(self.pooledOutputSize, 5000)
        self.fc2 = nn.Linear(5000, 4000)
        self.fc3 = nn.Linear(4000, num_classes_times)



    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        # print(x.shape)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


new_net = NewNet()


In [130]:
print( "Training... times new model" )
train_model( new_net, 50, train_dataset_times)

Training... times new model
[1,    50] loss: 1.993
[1,   100] loss: 1.940
[1,   150] loss: 1.876
[1,   200] loss: 1.853
[1,   250] loss: 1.816
[1,   300] loss: 1.771
[2,    50] loss: 1.571
[2,   100] loss: 1.510
[2,   150] loss: 1.502
[2,   200] loss: 1.394
[2,   250] loss: 1.367
[2,   300] loss: 1.353
[3,    50] loss: 1.011
[3,   100] loss: 0.995
[3,   150] loss: 0.976
[3,   200] loss: 0.953
[3,   250] loss: 0.910
[3,   300] loss: 0.849
[4,    50] loss: 0.649
[4,   100] loss: 0.686
[4,   150] loss: 0.659
[4,   200] loss: 0.684
[4,   250] loss: 0.631
[4,   300] loss: 0.649
[5,    50] loss: 0.515
[5,   100] loss: 0.497
[5,   150] loss: 0.533
[5,   200] loss: 0.532
[5,   250] loss: 0.526
[5,   300] loss: 0.529
[6,    50] loss: 0.402
[6,   100] loss: 0.416
[6,   150] loss: 0.442
[6,   200] loss: 0.439
[6,   250] loss: 0.457
[6,   300] loss: 0.457
[7,    50] loss: 0.352
[7,   100] loss: 0.380
[7,   150] loss: 0.370
[7,   200] loss: 0.393
[7,   250] loss: 0.406
[7,   300] loss: 0.397
[8,   

In [131]:
evaluate(new_net, test_dataset_times)


Accuracy of the network on the test images: 34 %


### Test the accuracy of your network with character inputs from a DIFFERENT font set. How does it perform?

In [132]:
# Load the dataframe from the CSV file
df_calibri = pd.read_csv('fonts/CALIBRI.csv')

# Preprocess the dataframe
Xs_calibri, Ys_calibri, char_to_index_calibri, index_to_char_calibri, num_classes_calibri = preprocess_dataframe(
    df_calibri)

# Reshape Xs to match PyTorch's input format
# This indicates that the height and width of the input images are both 20 pixels
Xs_calibri = np.reshape(Xs_calibri, (-1, 1, 20, 20))

# Create a TensorDataset from Xs and Ys
dataset_calibri = TensorDataset(torch.from_numpy(
    Xs_calibri), torch.from_numpy(Ys_calibri))

print("num_classes_calibri:", num_classes_calibri)

# Split the dataset into training and testing sets
X_train_calibri, X_test_calibri, Y_train_calibri, Y_test_calibri = train_test_split(
    Xs_calibri, Ys_calibri, test_size=0.2, random_state=42)

# Create TensorDatasets for training and testing
train_dataset_calibri = TensorDataset(
    torch.from_numpy(X_train_calibri), torch.from_numpy(Y_train_calibri))
test_dataset_calibri = TensorDataset(
    torch.from_numpy(X_test_calibri), torch.from_numpy(Y_test_calibri))


num_classes_calibri: 2384


In [133]:
print("Training dataset calibri shape:",
      train_dataset_calibri[0][0].shape, train_dataset_calibri[0][1].shape)
print("Testing dataset calibri shape:",
      test_dataset_calibri[0][0].shape, test_dataset_calibri[0][1].shape)

print("Number of samples in the training dataset calibri:", len(train_dataset_calibri))
print("Number of samples in the testing dataset calibri:", len(test_dataset_calibri))


Training dataset calibri shape: torch.Size([1, 20, 20]) torch.Size([])
Testing dataset calibri shape: torch.Size([1, 20, 20]) torch.Size([])
Number of samples in the training dataset calibri: 15254
Number of samples in the testing dataset calibri: 3814


In [134]:
evaluate(net, test_dataset_calibri)


Accuracy of the network on the test images: 4 %


In [135]:
evaluate(new_net, test_dataset_calibri)


Accuracy of the network on the test images: 5 %
