# Face and Gestures Analysis: Face Identification Challenge

Students:
* Manuel Parma (255570)
* Àlex Montoya (242873)
* Marina Riba (229240)

In [1]:
import os
import numpy as np
# from imageio import imread
from imageio.v2 import imread
from scipy.io import loadmat
import random
import time
import itertools

Functions

In [2]:
def CHALL_AGC_ComputeRecognScores(auto_ids, true_ids):
    #   Compute face recognition score
    #
    #   INPUTS
    #     - AutomSTR: The results of the automatic face
    #     recognition algorithm, stored as an integer
    #
    #     - AGC_Challenge_STR: The ground truth ids
    #
    #   OUTPUT
    #     - FR_score:     The final recognition score
    #
    #   --------------------------------------------------------------------
    #   AGC Challenge
    #   Universitat Pompeu Fabra
    #

    if len(auto_ids) != len(true_ids):
        assert ('Inputs must be of the same len');

    f_beta = 1
    res_list = list(filter(lambda x: true_ids[x] != -1, range(len(true_ids))))

    nTP = len([i for i in res_list if auto_ids[i] == true_ids[i]])

    res_list = list(filter(lambda x: auto_ids[x] != -1, range(len(auto_ids))))

    nFP = len([i for i in res_list if auto_ids[i] != true_ids[i]])

    res_list_auto_ids = list(filter(lambda x: auto_ids[x] == -1, range(len(auto_ids))))
    res_list_true_ids = list(filter(lambda x: true_ids[x] != -1, range(len(true_ids))))

    nFN = len(set(res_list_auto_ids).intersection(res_list_true_ids))

    FR_score = (1 + f_beta ** 2) * nTP / ((1 + f_beta ** 2) * nTP + f_beta ** 2 * nFN + nFP)

    return FR_score

In [3]:
import cv2 as cv
import torch
import torch.nn as nn
import torchvision.transforms as tf
from PIL import Image
import pickle


In [4]:
# face recognition code
classifier_file = os.path.dirname(cv.__file__) + "/data/haarcascade_frontalface_alt.xml"
face_cascade = cv.CascadeClassifier(classifier_file)

def face_detection(img, scaleFactor=1.1, minNeighbors=6, minSize=[100, 100]):
    """
    Method for detecting faces in an image using the Viola-Jones algorithm.
    :param img: Image data to detect the faces on.
    :return: Cropped image on face, None if no face is detected
    """
    if len(img.shape) == 3:
      # convert to grey image
      gray_image = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    else:
      gray_image = img

    # we run multiple tests for the three parameters and got the best performance
    # (approx. 85% accuracy) with these values.
    faces = face_cascade.detectMultiScale(gray_image,
                                          scaleFactor=scaleFactor,
                                          minNeighbors=minNeighbors,
                                          minSize=minSize)

    if len(faces) == 0:
      return None

    # we keep the biggest bounding box
    if len(faces) > 1:
        faces = sorted(faces, key=lambda rect: rect[2] * rect[3], reverse=True)
        faces = faces[:1]

    # convert to coordinates
    x, y, w, h = faces[0]
    return img[y:y+h, x:x+w]

In [5]:
class VGGSimple5(nn.Module):
    def __init__(self, num_classes=10):

        super(VGGSimple5, self).__init__()

        self.conv11 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.conv12 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.conv13 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)

        self.conv20 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.conv21 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv22 = nn.Conv2d(128,128, kernel_size=3, stride=1, padding=1)

        self.maxpool = nn.MaxPool2d(kernel_size=5, stride=5)

        self.fc1 = nn.Linear(8*8*128, 80)
        self.fc2 = nn.Linear(80, num_classes)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.15)

    def forward(self, x):

        out = self.relu(self.conv11(x))
        out = self.relu(self.conv12(out))
        out = self.relu(self.conv13(out))
        out = self.maxpool(out)
        out = self.dropout(out)

        out = self.relu(self.conv20(out))
        out = self.relu(self.conv21(out))
        out = self.relu(self.conv22(out))
        out = self.maxpool(out)
        out = self.dropout(out)

        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.fc2(out)

        return out

Our face recognition function below first uses our face detection algorithm (Viola-Jones) to decide if a face is found or not. If found, it passes the image through our trained CNN, to try to predict the identity.

In [6]:
best_model_path = './best_model/'
model_weights = 'VGGSimple5_95_acc.ckpt'
our_model = VGGSimple5(num_classes=81)

our_model.load_state_dict(torch.load(best_model_path + model_weights, map_location=torch.device('cpu')))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

our_model.eval()
our_model.to(device)

print(device)

# transformations for normalizing input
tr = tf.Compose([
    tf.Resize((224, 224)),
    tf.ToTensor(),
    tf.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# labels dictionary
with open(best_model_path + "label_dicts.pk", "rb") as f:
  labels_dict = pickle.load(f)

labels_to_number = labels_dict["labels_to_number"]
number_to_labels = labels_dict["number_to_labels"]

softmax = nn.Softmax(dim=1)
SOFTMAX_THRESHOLD = 0.8

def my_face_recognition_function(A):
    image = face_detection(A, scaleFactor=1.1, minNeighbors=6, minSize=[50, 50])

    if image is None:
        return -1

    image = Image.fromarray(image).convert('RGB')

    # Apply the preprocessing
    image = tr(image)
    image = image.view(1, *image.shape)
    image = image.to(device)

    predicted_values = softmax(our_model(image)).tolist()[0]
    
    # filter cases where the predicted class has a low probability
    max_softmax = max(predicted_values)
    if max_softmax < SOFTMAX_THRESHOLD:
        return -1
    
    predicted_label = int(number_to_labels[predicted_values.index(max_softmax)])

    return predicted_label


cuda:0


In [7]:
# count the parameters of the model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Model parameters: {count_parameters(our_model)}")

Model parameters: 996017


In [8]:
# predict an example
img = imread('./TRAINING_ORIGINAL/image_A0032.jpg')
print(my_face_recognition_function(img))

7


 Basic script for Face Recognition Challenge
 --------------------------------------------------------------------
AGC Challenge
Universitat Pompeu Fabra

In [9]:
# Load challenge Training data
AGC_Challenge3_TRAINING = loadmat("AGC_Challenge3_Training.mat")
AGC_Challenge3_TRAINING = np.squeeze(AGC_Challenge3_TRAINING['AGC_Challenge3_TRAINING'])

imageName = AGC_Challenge3_TRAINING['imageName']
imageName = list(itertools.chain.from_iterable(imageName))

ids = list(AGC_Challenge3_TRAINING['id'])
ids = np.concatenate(ids).ravel().tolist()

faceBox = AGC_Challenge3_TRAINING['faceBox']
faceBox = list(itertools.chain.from_iterable(faceBox))

imgPath = "./TRAINING_ORIGINAL/"

In [10]:
# Initialize results structure
AutoRecognSTR = []

# Initialize timer accumulator
total_time = 0

# Load your FRModel
# my_FRmodel = " "

for idx, im in enumerate(imageName):

    if (idx + 1) % 100 == 0:
      print(f"Seen {idx + 1} images")

    A = imread(imgPath + im)

    try:
        ti = time.time()
        # Timer on
        ###############################################################
        # Your face recognition function goes here.It must accept 2 input parameters:

        # 1. the input image A
        # 2. the recognition model

        # and must return a single integer number as output, which can be:

        # a) A number between 1 and 80 (representing one of the identities in the training set)
        # b) A "-1" indicating that none of the 80 users is present in the input image

        autom_id = my_face_recognition_function(A)
        
        tt = time.time() - ti
        total_time = total_time + tt
    except:
        # If the face recognition function fails, it will be assumed that no user was detected for his input image
        autom_id = random.randint(-1, 80)

    AutoRecognSTR.append(autom_id)

FR_score = CHALL_AGC_ComputeRecognScores(AutoRecognSTR, ids)
_, rem = divmod(total_time, 3600)
minutes, seconds = divmod(rem, 60)
print('F1-score: %.2f, Total time: %2d m %.2f s' % (100 * FR_score, int(minutes), seconds))

Seen 100 images
Seen 200 images
Seen 300 images
Seen 400 images
Seen 500 images
Seen 600 images
Seen 700 images
Seen 800 images
Seen 900 images
Seen 1000 images
Seen 1100 images
Seen 1200 images
F1-score: 83.64, Total time:  0 m 48.61 s


In [11]:
# this small loop gives us an idea of cases to improve
np.set_printoptions(suppress=True)
false_positives = 0
false_negatives = 0
wrong_class = 0
for real_id, pred_id in zip(ids, AutoRecognSTR):
    if real_id != pred_id:
        if real_id == -1:
            false_positives += 1
        elif pred_id == -1:
            false_negatives += 1
        else:
            wrong_class += 1
    
print("False positives:", false_positives)
print("False negatives:", false_negatives)
print("Wrong class:", wrong_class)
print("Total wrong:", false_positives + false_negatives + wrong_class)

False positives: 32
False negatives: 88
Wrong class: 24
Total wrong: 144
