References:

https://www.youtube.com/watch?v=9OHlgDjaE2I&ab_channel=AI-SPECIALS


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import numpy as np

import torchvision.transforms as transforms
from torchvision.datasets import LFWPeople
from torch.utils.data import DataLoader, TensorDataset

import torch.nn as nn
from torch.optim import Adam
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler

from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
height = 64
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((height, height)),
    transforms.RandomHorizontalFlip(),  # 0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5, 0.5, 0.5],   # 0-1 to [-1,1], formula (x-mean)/std
                        [0.5, 0.5, 0.5])
])

In [20]:
# Download the data, if not already on disk and load it as numpy arrays
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

# introspect the images arrays to find the shapes (for plotting)
n_samples, h, w = lfw_people.images.shape

# for machine learning we use the 2 data directly (as relative pixel
# positions info is ignored by this model)
X = lfw_people.images
n_features = X.shape[1]

# the label to predict is the id of the person
y = lfw_people.target
target_names = lfw_people.target_names

# Split into a training set and a test set using a stratified k fold
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# normalise
X_train = X_train / 255.0
X_test = X_test / 255.0
X_train = X_train[:, :, :, np.newaxis]
X_test = X_test[:, :, :, np.newaxis]
# print("X_train shape:", X_train.shape)

x_train_to_tensor = torch.from_numpy(X_train).to(torch.float32).permute(0, 3, 1, 2)
y_train_to_tensor = torch.from_numpy(y_train).to(torch.long) 
x_test_to_tensor = torch.from_numpy(X_test).to(torch.float32).permute(0, 3, 1, 2)
y_test_to_tensor = torch.from_numpy(y_test).to(torch.long)

train_dataset = TensorDataset(x_train_to_tensor, y_train_to_tensor)
test_dataset = TensorDataset(x_test_to_tensor, y_test_to_tensor)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
# DataLoader
batch_size = 64

# Create train and test data
train_data = LFWPeople(root='./data', split="train", download=True, transform=transform)
test_data = LFWPeople(root='./data', split="test", download=True, transform=transform)

# Create train and test dataloaders
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
def data_loader(data_dir, batch_size, random_seed=42, valid_size=0.1, shuffle=True, test=False):

    normalize = transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.5, 0.5, 0.5],
    )

    # define transforms
    transform = transforms.Compose([
            transforms.Resize((100, 100)),
            transforms.ToTensor(),
            normalize,
    ])

    if test:
        dataset = LFWPeople(root='./data', split="test", download=True, transform=transform)

        data_loader = torch.utils.data.DataLoader(
            dataset, batch_size=batch_size, shuffle=shuffle
        )

        return data_loader

    # load the dataset
    train_dataset = LFWPeople(root='./data', split="train", download=True, transform=transform)

    valid_dataset = LFWPeople(root='./data', split="train", download=True, transform=transform)

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(42)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


# LFWPeople dataset 
train_loader, valid_loader = data_loader(data_dir='./data', batch_size=64)

test_loader = data_loader(data_dir='./data', batch_size=64, test=True)

In [None]:
print(train_data._get_classes)

In [None]:
print(test_data._get_classes)

In [None]:
train_count = 9525
test_count = 3708
print(train_count+test_count)

In [21]:
dataiter = iter(train_loader)
inputs, label = next(dataiter)
print(f'Input shape: {inputs.shape}')

Input shape: torch.Size([32, 1, 50, 37])


In [None]:
# Create class for CNN
class CNN(nn.Module):
    def __init__(self, num_classes=5749):
        super(CNN, self).__init__()
        # Specify layers in network

        # Input shape= 128, 3, 150, 150 
        # [0] = bs, 
        # [1] = num_channels, 
        # [2] = height,
        # [3] = width

        # Output size after convolution filter
        # ((w - f + 2P)/s) + 1
        # w = 150
        # f = 3 = kernel_size = size of filter
        # P = padding = 1
        # s = stride = 1
        
        # in_channels = num_channels
        # out_channels = number of filters
        # kernel_size = size of convolutional kernel/filter = 3x3
        # Define the convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        # Shape = 128, 32, 150, 150

        num_channels = 32
        # self.bn1 = nn.BatchNorm2d(num_features=num_channels)
        # # num_features = num_channels
        # # Shape = 128, 32, 150, 150

        # # bring non-linearity
        self.relu1 = nn.ReLU()
        # # Shape = 128, 32, 150, 150

        self.pool = nn.MaxPool2d(kernel_size=2)
        # Reduce image size by factor of 2
        # Shape = 128, 32, 75, 75


        # Add second convolutional layer
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1)
        # Shape = 128, 32, 75, 75

        # self.bn2 = nn.BatchNorm2d(num_features=32)
        # # num_features = num_channels
        # # Shape = 128, 32, 75, 75

        self.relu2 = nn.ReLU()
        # Shape = 128, 32, 75, 75


        # Add fully connected layer
        self.fc1 = nn.Linear(in_features=32*75*75, out_features=num_classes)
        

    def forward(self, input):
        # First layer
        output = self.conv1(input)
        # output = self.bn1(output)
        # output = self.relu1(output)
        output = self.pool(output)

        # Second layer
        output = self.conv2(output)
        # output = self.bn2(output)
        output = self.relu2(output)

        # Feed into Fully Connected layer
        # Above output will be matrix with shape (128, 32, 75, 75)
        output = output.view(-1, 32*75*75)

        output = self.fc1(output)

        return output

In [22]:
class ConvNet(nn.Module):
    def __init__(self,num_classes=6):
        super(ConvNet,self).__init__()
        
        #Output size after convolution filter
        #((w-f+2P)/s) +1
        
        #Input shape= (256,3,150,150)
        
        self.conv1=nn.Conv2d(in_channels=1,out_channels=32,kernel_size=3,stride=1,padding=1)
        # #Shape= (256,12,150,150)
        # self.bn1=nn.BatchNorm2d(num_features=32)
        # #Shape= (256,12,150,150)
        self.relu1=nn.ReLU()
        # #Shape= (256,12,150,150)
        
        self.pool1=nn.MaxPool2d(kernel_size=2)
        # #Reduce the image size be factor 2
        # #Shape= (256,12,75,75)
        
        self.conv2=nn.Conv2d(in_channels=32,out_channels=32,kernel_size=3,stride=1,padding=1)
        # #Shape= (256,20,75,75)
        self.relu2=nn.ReLU()
        # #Shape= (256,20,75,75)
        # 
        self.pool2=nn.MaxPool2d(kernel_size=2)  
        
        self.fc=nn.LazyLinear(out_features=num_classes)
        
        #Feed forwad function
        
    def forward(self,input):
        output=self.conv1(input)
        # output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool1(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
        output=self.pool2(output)
            
        #Above output will be in matrix form, with shape (256,32,75,75)
            
        output=torch.flatten(output, 1)
            
            
        output=self.fc(output)
            
        return output

In [None]:
print(np.unique(y))

In [23]:
num_epochs = 20
batch_size = 32
learning_rate = 0.001
num_classes = 6

model = ConvNet(num_classes=num_classes)
model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=learning_rate, weight_decay=0.0001)

In [24]:
import gc
total_step = len(train_loader)

for epoch in range(num_epochs):
    for images, labels in train_loader:  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        del images, labels, outputs
        torch.cuda.empty_cache()
        gc.collect()

    print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/20], Loss: 1.4645
Epoch [2/20], Loss: 1.6790
Epoch [3/20], Loss: 1.5732
Epoch [4/20], Loss: 1.1614
Epoch [5/20], Loss: 0.7813
Epoch [6/20], Loss: 0.5591
Epoch [7/20], Loss: 0.7871
Epoch [8/20], Loss: 0.7232
Epoch [9/20], Loss: 0.4163
Epoch [10/20], Loss: 0.3709
Epoch [11/20], Loss: 0.7150
Epoch [12/20], Loss: 0.3277
Epoch [13/20], Loss: 0.4115
Epoch [14/20], Loss: 0.3474
Epoch [15/20], Loss: 0.1332
Epoch [16/20], Loss: 0.1490
Epoch [17/20], Loss: 0.1781
Epoch [18/20], Loss: 0.1251
Epoch [19/20], Loss: 0.1210
Epoch [20/20], Loss: 0.0410


In [25]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print(f'Accuracy of the network on the test images: {100 * correct / total} %')   

Accuracy of the network on the test images: 83.60655737704919 %
