<a href="https://colab.research.google.com/github/felixsimard/comp551-p3/blob/main/Hassan_Exploration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- Notebook to build CNN image classifier
- Based on https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#training-on-gpu

In [1]:
import pickle
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
from google.colab import drive
from typing import List

## Pickle Data to Numpy NDArray

In [2]:
# Felix's load data fn
# Function to return pickle loaded file in an ndarray
def load_data(filename, data_path='/content/drive/MyDrive/data/'):
    drive.mount("/content/drive")
    loaded_pkl = None
    try:
        pkl_buffered = open(data_path+''+filename,'rb')
        loaded_pkl = pickle.load(pkl_buffered)
    except Exception as e:
        print("Error loading data: {}".format(e))
    return loaded_pkl

In [3]:
# loading all data
train_features = load_data("images_l.pkl")
train_labels = load_data("labels_l.pkl")
test = load_data("images_test.pkl")
train_unlabelled = load_data("images_ul.pkl")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
print(train_features.shape, train_features[:1])
print(train_labels.shape, train_labels[:1])

(30000, 56, 56) [[[  0.   0.   0. ... 175.   0.   0.]
  [  0.   0.   0. ...   0.   0.   0.]
  [  0.   0.   0. ...   0. 175.   0.]
  ...
  [  0.   0.   0. ...   0.   0.   0.]
  [  0.   0.   0. ...   0.   0.   0.]
  [  0.   0.   0. ...   0.   0.   0.]]]
(30000, 36) [[0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


- `train_features` has 30,000 samples of 56x56 images
- `train_labels` labels of the 56x56 images, a 36-bit binary vector
- The code block below verifies the image data are all in numpy n-dimensional arrays, `np.ndarray`

In [5]:
for data in [train_features, train_labels, train_unlabelled, test]:  
  print(type(data) is np.ndarray)

True
True
True
True


## Tensor DataLoader & Feature Labels

In [26]:
# Transforming the numpy arrays into tensors with the labels
# Concatenating datasets to have Tensor([[image_features], label])
training = DataLoader(TensorDataset(torch.Tensor(train_features).unsqueeze(1),
                                    torch.Tensor(train_labels).unsqueeze(1)))

- The classification task calls for classifying an image that contains:
1. Characters `A-Z` OR `a-z`
2. Numbers `0-9`
- Each image will include any combination of 1 lower OR uppercase character and one number
- Therefore, the labels will have to include every combination of these characters and numbers:
1. 260 different classes: `0-9` AND `A-Z`
2. 260 different classes: `0-9` AND `a-z`
- A total of 520 `labels`

In [7]:
labels = []

# This implementation is from Felix
for l in range(26):
    lowerC, upperC = [0.0 for i in range(26)], [0.0 for i in range(26)] 
    lowerC[l], upperC[l] = 1.0, 1.0
    for d in range(10):
        digits_str = [0.0 for j in range(10)]
        digits_str[d] = 1.0
        Lc = lowerC + digits_str
        Uc = upperC + digits_str
        labels.append(Lc)
        labels.append(Uc)

print(labels[:3], len(labels))


[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] 520


## Conv. NN Class (Implementation of VGG11 Deep CNN)

In [96]:
class CNN(nn.Module):

  # Constructor
  def __init__(self, in_channels=1, num_classes=520):
    super(CNN, self).__init__()         # Access methods in parent class
    self.in_channels = in_channels
    self.num_classes = num_classes
    # convolutional layers 
    self.conv_layers = nn.Sequential(
      nn.Conv2d(self.in_channels, 64, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Conv2d(64, 128, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Conv2d(128, 256, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.Conv2d(256, 256, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Conv2d(256, 512, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.Conv2d(512, 512, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Conv2d(512, 512, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.Conv2d(512, 512, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2)
      )
        # fully connected linear layers
    self.linear_layers = nn.Sequential(
      nn.Linear(in_features=512, out_features=4096),
      nn.ReLU(),
      nn.Dropout2d(0.5),
      nn.Linear(in_features=4096, out_features=512),
      nn.ReLU(),
      nn.Dropout2d(0.5),
      nn.Linear(in_features=512, out_features=36)
      )
    
  def forward(self, x):
      print(x.shape)
      x = self.conv_layers(x)
      print(x.shape)
      # flatten to prepare for the fully connected layers
      x = x.view(1, 512)
      print(x.shape)
      x = self.linear_layers(x)
      print(x.shape)
      return x
  

# Model Loss, Optimization, & Run with CUDA



In [9]:
epochs = 4
batch = 8
lr = 0.005

In [97]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr)
steps = len(training)

In [98]:
for epoch in range(epochs):
    for i, data in enumerate(training, 0):
        inputs, labels = data[0].to(device), data[1].squeeze_().to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
torch.save(model.state_dict(), './cnn.pth')

torch.Size([1, 1, 56, 56])
torch.Size([1, 512, 1, 1])
torch.Size([1, 512])
torch.Size([1, 36])


ValueError: ignored