# Face Recognition

<a target="_blank" href="https://colab.research.google.com/github/harrow-css/2023-multithreading/blob/main/ai/week-6/face-recognition.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

Train a basic face recognition model using convolutional neural networks!

![ConvNet](https://miro.medium.com/max/3944/1*YejW73f36BGhNGhrtbz67g.png)

Copyright 2023 Team Enigma

In [None]:
# Copyright 2023 Team Enigma

#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Importing libraries

We will be using [TensorFlow](https://tensorflow.org/), a great Machine Learning library from Google!

**Spoiler Alert!**

Creating a machine learning project from scratch takes a lot of time, and gets you stuck into nitty-gritty details, instead of the more interesting high-level overview (which is what this lecture series is all about!). Most machine learning researchers nowadays use standard library like TensorFlow and PyTorch anyways, so there's nothing embarassing for us if we start off with libraries.

In [None]:
import tensorflow as tf         # Open source machine learning library
import numpy as np              # Python's scientific computing library
import cv2                      # Open source computer vision platform

from google.colab import drive  # For accessing data on Google Drive
from google.colab import files  # File utilities for Google Colab
import pickle as pkl            # For saving Python objects (classes)

import os                       # File access utilities
import matplotlib.pyplot as plt # Plotting and display utilities

In [None]:
# Mounting Google Drive to access our shared data!
drive.mount('/content/drive')

## Defining hyperparameters

In [None]:
PARAMS = {
    'ROOT_PATH': '/content/drive/MyDrive/23AITHREAD_FaceRecDat',
    'IMG_SIZE': (150, 150),
    'EPOCHS': 10
}

## Data Processing

In [None]:
class FaceExtractor(object):
    '''
    Using OpenCV's cascade classifiers to detect faces
    '''
    def __init__(self, xml_path):
        self.classifier = cv2.CascadeClassifier(xml_path)

    def detect(self, image):
        scale_factor = 1.2
        min_neighbors = 5
        min_size = (30, 30)
        face_coords = self.classifier.detectMultiScale(
            image,
            scaleFactor  = scale_factor,
            minNeighbors = min_neighbors,
            minSize      = min_size,
            flags        = cv2.CASCADE_SCALE_IMAGE
        )
        return face_coords

In [None]:
extractor = FaceExtractor(os.path.join(
    PARAMS['ROOT_PATH'], 'haarcascade_frontalface_default.xml'
))

In [None]:
def crop(image, face_coords):
    '''
    Crops an image based on a list of coordinates.
    Returns a list of cropped images (represented as pixel arrays)
    '''

    faces = []
    for (x, y, w, h) in face_coords:
        faces.append(image[y: y + h, x: x + w])
    return faces

def resize(images, size):
    '''
    Rezies a list of images to a given size
    '''

    resized_images = []

    for img in images:
        if img.shape < size:
            interpolation = cv2.INTER_AREA
        else:
            interpolation = cv2.INTER_CUBIC

        resz = cv2.resize(img, size, interpolation=interpolation)
        resized_images.append(resz)

    return resized_images

def normalize(images):
    '''
    Maps all pixel values in an image to between -1 and 1
    '''

    return (images - 127.5) / 255

def denormalize(images):
    '''
    Maps the pixel values of normalized image back to its orignal values
    '''

    return images * 255 + 127.5

In [None]:
def preproc(image, size):
    '''
    Wrapper function that takes in a cv2 image, turns it into grayscale,
    extracts faces from it, resizes these images, and then normalizes them
    '''

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    face_coords = extractor.detect(gray)

    faces = crop(gray, face_coords)

    resized = np.array(resize(faces, size), np.float32)

    normalized = normalize(resized)

    return normalized

In [None]:
def get_dataset(path, size):
    '''
    Creates a dataset from a given directory,
    treating subdirectory as classes.

    It applies `preproc` on all images, and returns
    an array of images, an array of corresponding labels
    (classes represented as integers), a mapping from
    labels to class names, and the number of classes
    '''

    images = []
    labels = []
    labels_dict = {}
    people = sorted([person for person in os.listdir(path)])

    for i, person in enumerate(people):
        print('Processing images of {}'.format(person))

        labels_dict[i] = person
        for image_name in os.listdir(os.path.join(path,
                                                       person)):
            img = cv2.imread(
                os.path.join(path, person, image_name), 1
            )
            faces = preproc(img, size)

            for f in faces:
                f = tf.expand_dims(f, -1)
                if f.shape == (size[0], size[1], 1):
                    images.append(f)
                    labels.append(i)

    return (np.asarray(images, np.float32),
            np.asarray(labels, np.float32),
            labels_dict,
            len(people))

In [None]:
train_data, train_labels, labels_dict, num_ppl = get_dataset(
    os.path.join(PARAMS['ROOT_PATH'], 'train'), PARAMS['IMG_SIZE']
)

print()
print('Training data shape: {}'.format(train_data.shape))
print('Training labels shape: {}'.format(train_labels.shape))

## Model Construction

### Architecture

Here, we will design a standard convolution architecture. Refer to the diagram below if youre stuck!

![ConvDiagram](https://miro.medium.com/max/2510/1*vkQ0hXDaQv57sALXAJquxA.jpeg)


In [None]:
# The complete convolutional model architecture

model = tf.keras.Sequential([

    tf.keras.layers.Conv2D(
        filters     = 128,
        kernel_size = (5, 5),
        activation  = 'relu',
        input_shape = (PARAMS['IMG_SIZE'][0],
                       PARAMS['IMG_SIZE'][1],
                       1)
    ),

    tf.keras.layers.MaxPool2D(
        pool_size = (2, 2)
    ),

    tf.keras.layers.Conv2D(
        filters     = 64,
        kernel_size = (3, 3),
        activation  = 'relu'
    ),

    tf.keras.layers.MaxPool2D(
        pool_size = (2, 2)
    ),

    tf.keras.layers.Flatten(),

    tf.keras.layers.Dense(128, activation='relu'),

    tf.keras.layers.Dense(64, activation='relu'),

    tf.keras.layers.Dense(num_ppl, activation='softmax')

])

In [None]:
# Prints a summary of the model archiecture

model.summary()

### Optimzers and Loss Functions

In [None]:
# Defines an optimzer and a loss function for the model
model.compile(optimizer = 'adam',
              loss      = 'sparse_categorical_crossentropy',
              metrics   = ['accuracy'])

### Training!

In [None]:
# Trains the model on our data
model.fit(train_data, train_labels, epochs = PARAMS['EPOCHS'])

In [None]:
# Saves the model weights and our predefined hyperparameters
model.save('face.h5')

file = open('face.aux', 'wb')
pkl.dump((PARAMS, labels_dict), file)
file.close()

In [None]:
# Downloads files for standalone prediction
files.download('face.aux')
files.download('face.h5')

## Testing

In [None]:
# Create test dataset
test_data, test_labels, test_labels_dict, _ = get_dataset(
    os.path.join(PARAMS['ROOT_PATH'], 'test'), PARAMS['IMG_SIZE']
)

print()
print('Testing data shape: {}'.format(test_data.shape))
print('Testing labels shape: {}'.format(test_labels.shape))

assert test_labels_dict == labels_dict

In [None]:
# Evaluate model accuracy on unseen test data
_ = model.evaluate(test_data, test_labels)

In [None]:
# Visualize model performance on the test dataset
for sample_img, sample_label in zip(test_data, test_labels):

    img_to_show = denormalize(sample_img)

    plt.imshow(tf.squeeze(img_to_show), cmap='gray')
    plt.grid(False)
    plt.show()

    predictions = model.predict(tf.expand_dims(sample_img, 0))
    pred_id = np.argmax(predictions[0])
    print(pred_id)

    print('Predicted: ' + labels_dict[pred_id])
    print('Label: ' + labels_dict[sample_label] + '\n\n')

## Standalone Webcam Prediction Code

Once you've trained your model, how cool would it be to keep it forever and show off? Well, you're in treat, because this tool allows you to use your **webcam** to make predictions.

Make sure you have your `face.aux` and `face.h5` available in your `/content` directory (the default directory when Colab starts up), and you're all set to go!

*When you just want to make predictions on your webcam, you can simply run this cell on its own without anything before it (provided `face.aux` and `face.h5` are uploaded).*


In [None]:
import tensorflow as tf
import numpy as np
import cv2
from IPython.display import display, Javascript, Image, clear_output
from google.colab.output import eval_js
from base64 import b64decode
from google.colab import drive
from google.colab import files
import pickle as pkl
import os
import time
import matplotlib.pyplot as plt

drive.mount('/content/drive')
PARAMS, labels_dict = pkl.load(open('face.aux', 'rb'))
model = tf.keras.models.load_model('face.h5')

class FaceExtractor(object):
    def __init__(self, xml_path):
        self.classifier = cv2.CascadeClassifier(xml_path)

    def detect(self, image):
        scale_factor = 1.2
        min_neighbors = 5
        min_size = (30, 30)
        face_coords = self.classifier.detectMultiScale(
            image,
            scaleFactor  = scale_factor,
            minNeighbors = min_neighbors,
            minSize      = min_size,
            flags        = cv2.CASCADE_SCALE_IMAGE
        )
        return face_coords

extractor = FaceExtractor(os.path.join(
    PARAMS['ROOT_PATH'], 'haarcascade_frontalface_default.xml'
))

def crop(image, face_coords):
    faces = []
    for (x, y, w, h) in face_coords:
        faces.append(image[y: y + h, x: x + w])
    return faces

def resize(images, size):
    resized_images = []

    for img in images:
        if img.shape < size:
            interpolation = cv2.INTER_AREA
        else:
            interpolation = cv2.INTER_CUBIC

        resz = cv2.resize(img, size, interpolation=interpolation)
        resized_images.append(resz)

    return resized_images

def normalize(images):
    return (images - 127.5) / 255

def denormalize(images):
    return images * 255 + 127.5

def preproc(image, size):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    face_coords = extractor.detect(gray)
    faces = crop(gray, face_coords)
    resized = np.array(resize(faces, size), np.float32)
    normalized = normalize(resized)

    return normalized

def take_photo(filename='photo.jpg', quality=0.8):
    js = Javascript('''
        async function takePhoto(quality) {
        const div = document.createElement('div');
        const capture = document.createElement('button');
        capture.textContent = 'Take photo';
        div.appendChild(capture);

        const video = document.createElement('video');
        video.style.display = 'block';
        const stream = await navigator.mediaDevices.getUserMedia({video: true});

        document.body.appendChild(div);
        div.appendChild(video);
        video.srcObject = stream;
        await video.play();

        google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

        await new Promise((resolve) => capture.onclick = resolve);

        const canvas = document.createElement('canvas');
        canvas.width = video.videoWidth;
        canvas.height = video.videoHeight;
        canvas.getContext('2d').drawImage(video, 0, 0);
        stream.getVideoTracks()[0].stop();
        div.remove();
        return canvas.toDataURL('image/jpeg', quality);
        }
        ''')
    display(js)
    data = eval_js('takePhoto({})'.format(quality))
    binary = b64decode(data.split(',')[1])
    with open(filename, 'wb') as f:
        f.write(binary)
    return filename

while 1:
    clear_output()
    filename = take_photo()

    img = cv2.imread(filename, 1)
    img = preproc(img, PARAMS['IMG_SIZE'])[0]
    img_to_show = denormalize(img)

    plt.imshow(tf.squeeze(img_to_show), cmap='gray')
    plt.grid(False)
    plt.show()

    img = tf.reshape(img, (1, img.shape[0], img.shape[1], 1))
    predictions = model.predict(img)
    pred_id = np.argmax(predictions[0])
    print('Prediction: ' + labels_dict[pred_id])

    inp = input('\nAgain? [Y]/n \n')
    if inp.lower() != 'y':
        break