# Character Impact Project (Part 6) - Facial Recognition
## OpenCV Facial Recognition
### Alden Chico

---

## Preparing Data for the Facial Recognition Model

In [1]:
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import time

from glob import glob
from collections import defaultdict

In [2]:
'''Load an image from project directory into the program'''
def create_img(img_path):
    img = cv2.imread(img_path)
    return img

In [3]:
''' Edit the associated_character function to customize where to split path on to retrieve character name '''
def associated_character(img_path, split_on):
    character = img_path.split('/')[split_on]
    return character

In [4]:
'''Detect the face from the image using the LBPH classifier, crop the image to include only the face,
    and return the face in grayscale along with coordinates to find its location from the image'''
def detect_faces(img):
    
    rect_coord_list = []
    gray_faces_list = []
    
    # Convert img to grayscale if it's in color
    if img.shape[-1] == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray_img = img

    # Run facial detection on the image
    lbp = cv2.CascadeClassifier('reference/6-Facial_Recognition/lbpcascade_frontalface.xml')
    faces = lbp.detectMultiScale(gray_img, scaleFactor=1.1, minNeighbors=5)
    if len(faces)==0:
        return None, None
    
    for face in faces:
        (x, y, w, h) = face
        gray_faces_list.append(gray_img[y:y+w, x:x+h])
        rect_coord_list.append(face)
    
    # Return grayscale face image and coordinates where the face was found
    return gray_faces_list, rect_coord_list

In [5]:
'''
    Load the image data from the project directory into the program.
    
    Function returns dictionaries indexed by values associated with each character
    
    character_dict : Dictionary to reassociate index to its character
    face_dict : Dictionary that contains all the grayscale face images from the project directory image data
    original_dict: Dictionary that contains all the original project directory image data
    
'''

def prepare_data(img_path, character_split):
    
    # Retrieve all the png files containing images of characters' faces
    img_path_list = glob(img_path)
    
    # character_dict stores label number as key and the character's name as value
    original_list = []
    label_list = np.array([])
    
    character_list = []
    character_dict = {}
    character_index = 0
    
    for img_path in img_path_list:

        img = create_img(img_path)
        character = associated_character(img_path, character_split)
        
        # Add the character to the character dictionary if they're not in it yet
        if character not in character_list and character != 'test':
            character_dict[character_index] = character
            character_list.append(character)
            character_index += 1
        
        # Find which index is associated with the character
        for char_index, c in character_dict.items():
            if c == character:
                label = char_index
        
        # Add the original picture to original_dict
        original_list.append(img)
        label_list = np.append(label_list, label)
    
    # Return the dictionary with character names and dictionary with character face images
    label_list = label_list.astype(int)
    return character_dict, original_list, label_list

---

## Creating a Custom Predict Function

In [6]:
'''
    Run the prediction on the image and draw a rectangle with the predicted label on a copy of the image
'''
def predict(img, face_recognizer):
    
    pred_label_list = []
    
    # Find faces from the image
    face_list, rect_list = detect_faces(img)
    
    # Draw rectangle and predicted character name around face if a face was detected
    if face_list is not None:
        for face, rect in zip(face_list, rect_list):
            # Set the coordinates for drawing the rectangle around face
            (x, y, w, h) = rect

            # Predict what character is shown in the picture
            pred_label = face_recognizer.predict(face)
            pred_label_list.append(pred_label[0])
    
    # Draw label that no face was detected if classifier finds no face in picture
    else:
        pred_label_list.append(-1)
        
    return pred_label_list, rect_list

In [7]:
def draw_character_labels(img, character_dict, label_list, rect_list):
    img_copy = img.copy()
    green = (0, 255, 0)
    red = (0, 0, 255)
    for label, rect in zip(label_list, rect_list):
        if label != -1:
            character = character_dict[label]
            (x, y, w, h) = rect
            cv2.rectangle(img_copy, (x, y), (x+w, y+h), green, 2)
            cv2.putText(img_copy, character, (x, y-15),
                            cv2.FONT_HERSHEY_PLAIN, fontScale=1.5, color=green, thickness=2)
        else:
            character = 'No Face Detected'
            cv2.putText(img_copy, character, (20, 50),
                            cv2.FONT_HERSHEY_PLAIN, fontScale=1.5, color=red, thickness=2)
    return img_copy

---

## Preparing Utility Functions for Cross Validation

In [8]:
def shuffle_original(original_list, label_list, random_state=None):
    
    np.random.seed(random_state)
    indices = np.arange(len(original_list))
    
    working_original_list = []
    working_label_list = np.array([])
    np.random.shuffle(indices)
    for ind in indices:
        working_original_list.append(original_list[ind]) 
        working_label_list = np.append(working_label_list, label_list[ind])
    
    working_label_list = working_label_list.astype(int)
    return working_original_list, working_label_list

In [9]:
'''
    The confusion matrix can be used to generate f1-scores to gauge the validity of a given model
    
    This function takes in a dictionary of test images indexed by the character indexes and returns
    a list of images with rectangle and the prediction drawn, as well as a confusion matrix to find 
    classification metrics
'''

def generate_confusion_matrix(character_dict, test_img_list, 
                                      test_label_list, face_recognizer):
    
    # Generate initial confusion matrix DataFrame
    character_list = []
    for _, character in character_dict.items():
        character_list.append(character)
    
    confusion_matrix = pd.DataFrame(index=character_list, columns=character_list)
    confusion_matrix.loc[:, :] = 0
    
    pred_img_df = pd.DataFrame(index=character_list, columns=character_list).astype('object')
    for i in range(pred_img_df.shape[0]):
        for j in range(pred_img_df.shape[1]):
            pred_img_df.iloc[i, j] = []
    
    # Make predictions on the test images and add to DataFrame
    for test_img, true_label in zip(test_img_list, test_label_list):
        
        # What to do if the test image is in color
        if test_img.shape[-1] == 3:
            # Make prediction on test image and add to pred_face_list
            pred_label_list, pred_rect_list = predict(test_img, face_recognizer)

            if pred_rect_list is not None:
                pred_img = draw_character_labels(test_img, character_dict, 
                                                 pred_label_list, pred_rect_list)

            # Populate confusion matrix based on prediction
            actual_character = character_dict[true_label]
            pred_character = character_dict[pred_label_list[0]]

            confusion_matrix.loc[pred_character, actual_character] += 1
            pred_img_df.loc[pred_character, actual_character].append(pred_img)
        
        # What to do if the test image is in grayscale
        else:
            # Predict on the grayscale face provided
            pred_label = face_recognizer.predict(test_img)
            
            # Populate confusion matrix based on prediction
            actual_character = character_dict[true_label]
            pred_character = character_dict[pred_label[0]]

            confusion_matrix.loc[pred_character, actual_character] += 1
            pred_img_df.loc[pred_character, actual_character].append(test_img)
        
    return confusion_matrix, pred_img_df

In [10]:
'''
    F1 score is the harmonic mean between precision and recall which tidily summarizes the combination
    of the two metrics in lieu of the classifier model
    
    This function takes the confusion matrix and the dictionary of characters and returns a list of F1 scores
    associated with each character.
'''

def generate_f1_scores(confusion_matrix, character_dict):
    f1_score_list = []
    for _, character in character_dict.items():
        true_positive = confusion_matrix.loc[character, character]
        if true_positive == 0:
            precision = recall = f1 = 0
        else:
            precision = true_positive / confusion_matrix.sum(axis=1)[character]
            recall = true_positive / confusion_matrix.sum(axis=0)[character]
            f1 = 2 * (precision*recall) / (precision + recall)
        f1_score_list.append(f1)
    return f1_score_list

In [11]:
def cross_validate(original_list, label_list, face_recognizer, character_dict, folds=5, random_state=None):
    
    data_dict = defaultdict(list)
    face_dict = defaultdict(list)
    
    pred_face_list = []
    f1_average_list = []
    confusion_matrix_list = []
    pred_img_df_list = []
    
    # Shuffle the grayscale image face list and original images
    cv_img_list, cv_label_list = shuffle_original(original_list, label_list, random_state)
    
    # Organize the original images by their labels into a defaultdict
    for cv_img, cv_label in zip(cv_img_list, cv_label_list):
        faces, _ = detect_faces(cv_img)
        if faces is not None:
            data_dict[cv_label].append(cv_img)
            face_dict[cv_label].append(faces[0])
        
    for i in range(folds):
            
        # Add images from each label into the training and testing set of the defaultdict
        test_img_list = train_face_list =[]
        test_label_list = train_label_list = np.array([])
        
        for (label, img_list), (_, face_list) in zip(data_dict.items(), face_dict.items()):
            list_len = len(img_list)
            test_sample_len = int(1/folds * list_len)
            
            test_split = img_list[i*test_sample_len : (i+1)*test_sample_len]
            test_img_list = test_img_list + test_split
            test_label_list = np.append(test_label_list, np.full(len(test_split), label))
            
            train_split = face_list[:i*test_sample_len] + face_list[(i+1)*test_sample_len:]
            train_face_list = train_face_list + train_split
            train_label_list = np.append(train_label_list, np.full(len(train_split), label))
    

        
        train_label_list = train_label_list.astype(int)
        face_recognizer.train(train_face_list, train_label_list)
        
        
        # Generate the confusion matrix by predicting test images
        confusion_matrix, pred_img_df = generate_confusion_matrix(character_dict, test_img_list, test_label_list, 
                                                                  face_recognizer)
        confusion_matrix_list.append(confusion_matrix)
        pred_img_df_list.append(pred_img_df)
        
        # Gather f1 score information from data gathered in testing phase
        f1_score_list = generate_f1_scores(confusion_matrix, character_dict)
        character_average_f1 = np.mean(f1_score_list)
        f1_average_list.append(character_average_f1)
        
    # Average the f1 scores together
    cv_average_f1 = np.mean(f1_average_list)
    
    return pred_img_df_list, confusion_matrix_list, f1_average_list, cv_average_f1