<a href="https://colab.research.google.com/github/hsiaopat/facial-recognition/blob/main/FacialRecognitionDropout%2BLeakyRELU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Introduction to Neural Networks (CSE 40868/60868), Spring 2023
# University of Notre Dame
# Final Project
# _______________________________________________________________________________
# Patrick Hsiao and Grace Bezold

In [1]:
!pip install retina-face
from retinaface import RetinaFace
from PIL import Image
import cv2
import torch
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.model_selection import train_test_split
import tensorflow as tf
import numpy as np
import cv2
import os
import torch.nn as nn
from torchsummary import summary
import torchvision.transforms as transforms
from torchvision.transforms.functional import resize
from torchvision.transforms import Resize, ToTensor
from tqdm import tqdm
from torchvision.datasets import ImageFolder
import torchvision.datasets as datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting retina-face
  Downloading retina_face-0.0.13-py3-none-any.whl (16 kB)
Installing collected packages: retina-face
Successfully installed retina-face-0.0.13


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Set image directory

In [3]:
# Get images from Google Drive
image_dir = '/content/drive/MyDrive/face_rec_data/'

Configure Images to fit only faces
*DO NOT RUN THIS CELL*
we have already uploaded the cropped images in face_rec_data so this does not need to be run

In [None]:
# Loop through all images in the directory
for folder_path in os.listdir(image_dir):
    folders = os.path.join(image_dir, folder_path)

    for file_name in os.listdir(folders):


      # Check if the file is an image
      if file_name.lower().endswith(('.png', '.jpg', '.jpeg')):

        # Get the full path of the image file
        image_path = os.path.join(folders, file_name)

        # Read the image
        image = cv2.imread(image_path)

        # Detect faces in the image
        faces = RetinaFace.detect_faces(image)
        print(faces)

        try:
        # Extract the bounding box of the face
            x1, y1, x2, y2 = faces['face_1']['facial_area']

        # Crop the face from the image
            cropped_image = image[y1:y2, x1:x2]

        # Convert to PIL Image
            pil_image = Image.fromarray(cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB))


        # Save the cropped image with a new file name
            cropped_path = os.path.join(folders, file_name)
            pil_image.save(cropped_path)
        except:
            print("No faces were detected in the image")
            continue


KeyboardInterrupt: ignored

In [4]:
# Define the directory containing the images and the transformation to apply to each image
transform = transforms.Compose([
    Resize((256, 256)),
    ToTensor()
])

# Create the ImageFolder dataset
image_dataset = ImageFolder(image_dir, transform=transform)
print(image_dataset)

# Split the dataset into training, validation, and testing sets
train_size = int(len(image_dataset) * 0.8)
val_size = int(len(image_dataset) * 0.1)
test_size = len(image_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(image_dataset, [train_size, val_size, test_size])
classes = ['grace','other','patrick']

# Create data loaders for the training, validation, and testing sets
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Print the shapes of the training, validation, and testing sets
data, labels = next(iter(train_loader))
print("Training set:", data.shape, labels.shape)
data, labels = next(iter(val_loader))
print("Validation set:", data.shape, labels.shape)
data, labels = next(iter(test_loader))
print("Testing set:", data.shape, labels.shape)


Dataset ImageFolder
    Number of datapoints: 311
    Root location: /content/drive/MyDrive/face_rec_data/
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
           )
Training set: torch.Size([32, 3, 256, 256]) torch.Size([32])
Validation set: torch.Size([31, 3, 256, 256]) torch.Size([31])
Testing set: torch.Size([32, 3, 256, 256]) torch.Size([32])


Convolutional Neural Network

In [5]:
import torch.nn.functional as F

class CNN(nn.Module):

    def __init__(self, numChannels, numClasses):
        super(CNN, self).__init__()
        self.classes = numClasses


        # Convolutional layers:
        self.conv1 = nn.Conv2d(in_channels=numChannels, out_channels=96, kernel_size=(11,11), stride=(4,4))
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=(5,5), stride=(1,1))
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=(3,3), stride=(1,1))
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=(3,3), stride=(1,1))
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=(3,3), stride=(1,1))

        # Activation function:
        self.relu = nn.LeakyReLU()
        self.leakyrelu = nn.LeakyReLU(negative_slope=0.01, inplace=True)


        # Pooling layer:
        self.maxpool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))

        # Batch normalization layers:
        self.batchnorm1 = nn.BatchNorm2d(num_features=96)
        self.batchnorm2 = nn.BatchNorm2d(num_features=256)

        # Fully-connected layers:
        self.fc1 = nn.Linear(in_features=2304, out_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features=3)

        #add drop out
        self.dropout = nn.Dropout()

    # Evaluation function
    def evaluate(self, model, dataloader, classes, device):

        # We need to switch the model into the evaluation mode
        model.eval()

        # Prepare to count predictions for each class
        correct_pred = {classname: 0 for classname in classes}
        total_pred = {classname: 0 for classname in classes}

        # For all test data samples:
        for data in dataloader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)

            images = images.detach().cpu().numpy()
            labels = labels.detach().cpu().numpy()
            predictions = predictions.detach().cpu().numpy()

            # Count the correct predictions for each class
            for label, prediction in zip(labels, predictions):

                # If you want to see real and predicted labels for all samples:
                # print("Real class: " + classes[label] + ", predicted = " + classes[prediction])

                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1

        # Calculate the overall accuracy on the test set
        acc = sum(correct_pred.values()) / sum(total_pred.values())

        return acc

    def forward(self, x):

        x = resize(x, size=[256])

        # Convolutional, LeakyReLU, MaxPooling and BatchNorm layers go first
        x = self.conv1(x)
        x = self.leakyrelu(x)
        x = self.maxpool(x)
        x = self.batchnorm1(x)
        x = self.conv2(x)
        x = self.leakyrelu(x)
        x = self.maxpool(x)
        x = self.batchnorm2(x)
        x = self.conv3(x)
        x = self.leakyrelu(x)
        x = self.conv4(x)
        x = self.leakyrelu(x)
        x = self.conv5(x)
        x = self.leakyrelu(x)
        x = self.maxpool(x)

        # Fully-connected layers, we need to "flatten" our tensors first
        x = torch.flatten(x, 1)

        # Finally, we need our two-layer perceptron (two fully-connected layers) at the end of the network:
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.leakyrelu(x)
        x = self.fc2(x)

        return x


In [7]:
epochs = 10

In [8]:
if __name__ == '__main__':
    # Specify the operation mode:
    # 'train' = training with your train and validation data splits
    # 'eval'  = evaluation of the trained model with your test data split 
    mode = 'train'

    # Path where you plan to save the best model during training
    my_best_model = "/content/drive/MyDrive/face_rec_data/face_recog_best_model.pth"
    print(my_best_model)



    # Set the device (GPU or CPU, depending on availability)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print("Currently using device: ", device)

    # Initialize the model and print out its configuration
    model = CNN(numChannels = 3, numClasses = 3)
    model.to(device)

    print("\n\nModel summary:\n\n")
    summary(model, input_size=(3, 32, 32))

    if mode == "train":

        print("\n\nTraining starts!\n\n")
        
        model.train()
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        
        running_loss = .0
        best_acc = .0
        for epoch in range(epochs):
            print(f"Starting epoch {epoch + 1}")
            for idx, data in tqdm(enumerate(train_loader), total=len(train_loader)):

                # Get the inputs (data is a list of [inputs, labels])
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                loss = loss.detach().cpu().numpy()
                inputs = inputs.detach().cpu().numpy()
                labels = labels.detach().cpu().numpy()
                running_loss += loss

            # Evaluate the accuracy after each epoch
            acc = model.evaluate(model, val_loader, classes, device)
            if acc > best_acc:
                print(f"Better validation accuracy achieved: {acc * 100:.2f}%")
                best_acc = acc
                print(f"Saving this model as: {my_best_model}")
                torch.save(model.state_dict(), my_best_model)

    # And here we evaluate the trained model with the test data
    elif mode == "eval":

        print("\n\nValidating the trained model:")
        print(f"Loading checkpoint from {my_best_model}")
        model.load_state_dict(torch.load(my_best_model))
        acc = model.evaluate(model, test_loader, classes, device)
        print(f"Accuracy on the test (unknown) data: {acc * 100:.2f}%")

    else:
        print("'mode' argument should either be 'train' or 'eval'")

/content/drive/MyDrive/face_rec_data/face_recog_best_model.pth
Currently using device:  cpu


Model summary:


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 62, 62]          34,944
         LeakyReLU-2           [-1, 96, 62, 62]               0
         MaxPool2d-3           [-1, 96, 31, 31]               0
       BatchNorm2d-4           [-1, 96, 31, 31]             192
            Conv2d-5          [-1, 256, 27, 27]         614,656
         LeakyReLU-6          [-1, 256, 27, 27]               0
         MaxPool2d-7          [-1, 256, 13, 13]               0
       BatchNorm2d-8          [-1, 256, 13, 13]             512
            Conv2d-9          [-1, 384, 11, 11]         885,120
        LeakyReLU-10          [-1, 384, 11, 11]               0
           Conv2d-11            [-1, 384, 9, 9]       1,327,488
        LeakyReLU-12            [-1, 384, 9, 9]         

100%|██████████| 8/8 [01:22<00:00, 10.36s/it]


Better validation accuracy achieved: 32.26%
Saving this model as: /content/drive/MyDrive/face_rec_data/face_recog_best_model.pth
Starting epoch 2


100%|██████████| 8/8 [00:28<00:00,  3.56s/it]


Better validation accuracy achieved: 38.71%
Saving this model as: /content/drive/MyDrive/face_rec_data/face_recog_best_model.pth
Starting epoch 3


100%|██████████| 8/8 [00:28<00:00,  3.58s/it]


Better validation accuracy achieved: 51.61%
Saving this model as: /content/drive/MyDrive/face_rec_data/face_recog_best_model.pth
Starting epoch 4


100%|██████████| 8/8 [00:28<00:00,  3.60s/it]


Better validation accuracy achieved: 54.84%
Saving this model as: /content/drive/MyDrive/face_rec_data/face_recog_best_model.pth
Starting epoch 5


100%|██████████| 8/8 [00:28<00:00,  3.54s/it]


Starting epoch 6


100%|██████████| 8/8 [00:29<00:00,  3.67s/it]


Starting epoch 7


100%|██████████| 8/8 [00:42<00:00,  5.27s/it]


Starting epoch 8


100%|██████████| 8/8 [00:44<00:00,  5.56s/it]


Starting epoch 9


100%|██████████| 8/8 [00:38<00:00,  4.86s/it]


Starting epoch 10


100%|██████████| 8/8 [00:36<00:00,  4.58s/it]


In [9]:
if __name__ == '__main__':
    # Specify the operation mode:
    # 'train' = training with your train and validation data splits
    # 'eval'  = evaluation of the trained model with your test data split 
    mode = 'eval'

    # Path where you plan to save the best model during training
    my_best_model = "/content/drive/MyDrive/face_rec_data/face_recog_best_model.pth"
    print(my_best_model)



    # Set the device (GPU or CPU, depending on availability)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print("Currently using device: ", device)

    # Initialize the model and print out its configuration
    model = CNN(numChannels = 3, numClasses = 3)
    model.to(device)

    print("\n\nModel summary:\n\n")
    summary(model, input_size=(3, 32, 32))

    if mode == "train":

        print("\n\nTraining starts!\n\n")
        
        model.train()
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        
        running_loss = .0
        best_acc = .0
        for epoch in range(epochs):
            print(f"Starting epoch {epoch + 1}")
            for idx, data in tqdm(enumerate(train_loader), total=len(train_loader)):

                # Get the inputs (data is a list of [inputs, labels])
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                loss = loss.detach().cpu().numpy()
                inputs = inputs.detach().cpu().numpy()
                labels = labels.detach().cpu().numpy()
                running_loss += loss

            # Evaluate the accuracy after each epoch
            acc = model.evaluate(model, val_loader, classes, device)
            if acc > best_acc:
                print(f"Better validation accuracy achieved: {acc * 100:.2f}%")
                best_acc = acc
                print(f"Saving this model as: {my_best_model}")
                torch.save(model.state_dict(), my_best_model)

    # And here we evaluate the trained model with the test data
    elif mode == "eval":

        print("\n\nValidating the trained model:")
        print(f"Loading checkpoint from {my_best_model}")
        model.load_state_dict(torch.load(my_best_model))
        acc = model.evaluate(model, test_loader, classes, device)
        print(f"Accuracy on the test (unknown) data: {acc * 100:.2f}%")

    else:
        print("'mode' argument should either be 'train' or 'eval'")

/content/drive/MyDrive/face_rec_data/face_recog_best_model.pth
Currently using device:  cpu


Model summary:


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 62, 62]          34,944
         LeakyReLU-2           [-1, 96, 62, 62]               0
         MaxPool2d-3           [-1, 96, 31, 31]               0
       BatchNorm2d-4           [-1, 96, 31, 31]             192
            Conv2d-5          [-1, 256, 27, 27]         614,656
         LeakyReLU-6          [-1, 256, 27, 27]               0
         MaxPool2d-7          [-1, 256, 13, 13]               0
       BatchNorm2d-8          [-1, 256, 13, 13]             512
            Conv2d-9          [-1, 384, 11, 11]         885,120
        LeakyReLU-10          [-1, 384, 11, 11]               0
           Conv2d-11            [-1, 384, 9, 9]       1,327,488
        LeakyReLU-12            [-1, 384, 9, 9]         