References:

https://www.youtube.com/watch?v=9OHlgDjaE2I&ab_channel=AI-SPECIALS


In [72]:
import torch
import torch.nn as nn
import numpy as np

import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset

import torch.nn as nn
from torch.optim import Adam

from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split

import gc

In [73]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Copy paste from Q1 to get the LFW data subset

In [74]:
# Download the data, if not already on disk and load it as numpy arrays
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

# introspect the images arrays to find the shapes (for plotting)
n_samples, h, w = lfw_people.images.shape

# for machine learning we use the 2 data directly (as relative pixel
# positions info is ignored by this model)
X = lfw_people.images
n_features = X.shape[1]

# the label to predict is the id of the person
y = lfw_people.target
target_names = lfw_people.target_names

# Split into a training set and a test set using a stratified k fold
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# normalise according to spec
X_train = X_train / 255.0
X_test = X_test / 255.0
X_train = X_train[:, np.newaxis, :, :]
X_test = X_test[:, np.newaxis, :, :]

# Convert to torch floats
x_train_to_tensor = torch.from_numpy(X_train).to(torch.float32)
y_train_to_tensor = torch.from_numpy(y_train).to(torch.long) 
x_test_to_tensor = torch.from_numpy(X_test).to(torch.float32)
y_test_to_tensor = torch.from_numpy(y_test).to(torch.long)

# Convert to tensors
train_dataset = TensorDataset(x_train_to_tensor, y_train_to_tensor)
test_dataset = TensorDataset(x_test_to_tensor, y_test_to_tensor)

In [75]:
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [76]:
inputs, label = next(iter(train_loader))
print(f'Input shape: {inputs.shape}')

Input shape: torch.Size([64, 1, 50, 37])


We can see that for each index in shape:
- 0 tells us batch size
- 1 tells us number of input channels
- 2 tells us height of image
- 3 tells us width of image
So we have a batch size of 64, 1 input channel (grayscale image), 50x37 image shape

## Convolutional Neural Network

First, let's define our constants from the spec sheet:

In [77]:
kernel_size = 3
num_filters = 32

We can determine each shape of the CNN by using the formula:

$$ Output \space Shape = {{{W-K+2P} \over S} + 1} $$

Where:
- W = width
- K = kernel size
- P = padding (default = 0)
- S = stride (default = 1)

So, we can work out the shape of our CNN before doing anything:

Layer 1:
- Input shape = (64, 1, 50, 37)
- Output shape = (64, 32, 48, 35)
    - Height = (50 - 3 + 0) / 1 + 1 = 48
    - Width = (37 - 3 + 1) / 1 + 1 = 35

Layer 2:
- Input shape = (64, 32, 48, 35)
- Output shape = (64, 32, 46, 33)
    - Height = (48 - 3 + 0) / 1 + 1 = 46
    - Width = (35 - 3 + 1) / 1 + 1 = 33

Fully connected layer:
- Shape gets flattened first:
    - Output Shape = (64, 32 * 46 * 33) = (64, 48432)
- LazyLinear infers the output shape based on whatever we input, in this case I used the number of classes, so:
    - Output Shape = (64, 6)

In [78]:
class CNN(nn.Module):
    def __init__(self, num_classes=6):
        super(CNN, self).__init__()

        # First convolutional layer
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=num_filters, kernel_size=kernel_size)

        # Add non-linear activation function
        self.relu1=nn.ReLU()

        # Second convolutional layer
        self.conv2 = nn.Conv2d(in_channels=num_filters, out_channels=num_filters, kernel_size=kernel_size)

        # Add non-linear activation function
        self.relu2=nn.ReLU()

        # Add fully connected layer as per the spec
        self.fc=nn.LazyLinear(out_features=num_classes)

    def forward(self,input):
        output=self.conv1(input)
        output=self.relu1(output)

        output=self.conv2(output)
        output=self.relu2(output)

        output=torch.flatten(output, 1)
        output=self.fc(output)

        return output

In [79]:
# Define hyper-parameters arbitrarily
num_epochs = 20
batch_size = 32
learning_rate = 0.001 # this is a generally accepted value for learning rate
num_classes = 6

# Initialise the model and send to GPU
model = CNN(num_classes=num_classes)
model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=learning_rate, weight_decay=0.0001) # Used default weight_decay



In [80]:
for epoch in range(num_epochs):
    for images, labels in train_loader:  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        del images, labels, outputs
        torch.cuda.empty_cache()
        gc.collect()

    print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/20], Loss: 2.0769
Epoch [2/20], Loss: 2.0867
Epoch [3/20], Loss: 1.7113
Epoch [4/20], Loss: 1.4675
Epoch [5/20], Loss: 1.2444
Epoch [6/20], Loss: 0.9398
Epoch [7/20], Loss: 1.0364
Epoch [8/20], Loss: 0.5101
Epoch [9/20], Loss: 0.3737
Epoch [10/20], Loss: 0.5475
Epoch [11/20], Loss: 0.5690
Epoch [12/20], Loss: 0.4076
Epoch [13/20], Loss: 0.3557
Epoch [14/20], Loss: 0.3830
Epoch [15/20], Loss: 0.2110
Epoch [16/20], Loss: 0.2360
Epoch [17/20], Loss: 0.1621
Epoch [18/20], Loss: 0.5368
Epoch [19/20], Loss: 0.0643
Epoch [20/20], Loss: 0.3501


In [81]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print(f'Accuracy of the network on the test images: {100 * correct / total} %')   

Accuracy of the network on the test images: 82.29508196721312 %
