In [3]:
%matplotlib inline
import cv2
import matplotlib.pyplot as plt
from skimage import io
from skimage.color import rgb2gray, gray2rgb, rgb2hsv
from scipy import ndimage
import numpy as np
from PIL import Image
import os
import pandas as pd
import dlib

### Facial detection

In [2]:
def facial_detection_haar(filename):
    img = io.imread(filename)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    faces = face_cascade.detectMultiScale(
        gray,
        scaleFactor=1.1,
        minNeighbors=3,
        minSize=(30, 30),
        flags = cv2.CASCADE_SCALE_IMAGE)
    for (x, y, w, h) in faces:
        cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 10)
    return img, len(faces)

### Face Recognition

In [4]:
import face_recognition

In [208]:
def facial_recognition(filename, cnn=True):
    image = face_recognition.load_image_file(filename)
    if cnn:
        face_locations = face_recognition.face_locations(image, model='cnn')
    else:
        face_locations = face_recognition.face_locations(image)
    return image, len(face_locations), face_locations

### Facial Analysis

In [5]:
from deepface import DeepFace

Using TensorFlow backend.


In [216]:
def facial_analysis(face_locations,image,config='age, gender, race, emotion'):
    if len(face_locations)>0:
        config = config.split(',')
        config = [c.strip() for c in config]
        results = []
        im = Image.fromarray(image)
        for f in face_locations:
            face = im.crop((f[3],f[0],f[1],f[2]))
            face = np.asarray(face)
            demography = DeepFace.analyze(face,config)
            results.append(demography)
        return results
    else:
        return []

In [290]:
def facial_percentage(face_locations,image):
    result = []
    img_pixelcount = image_obj.shape[0]*image_obj.shape[1]
    for f in face_locations:
        face_pixels = (f[2]-f[0])*(f[1]-f[3])
        result.append(face_pixels/img_pixelcount)
    return result

In [4]:
def create_feature_database(IMG_DIR):
    cols = ['videoId','numFaces','emotions','face_locations','face_percent']
    feature_df = pd.DataFrame(columns=cols)
    for filename in os.listdir(IMG_DIR):
        genders = []
        image_obj,num_faces,face_coords = facial_recognition(IMG_DIR+'/'+filename)
        #face locations coordinates are (top, right, bottom, left)
        analysis = facial_analysis(face_coords,image_obj)
        if len(analysis)>0:
            emotions = [analysis[f]['dominant_emotion'] for f in analysis]
            age = [analysis[f]['age'] for f in analysis]
            gender = [analysis[f]['gender'] for f in analysis]
            race = [analysis[f]['dominant_race'] for f in analysis]
        else:
            emotions=age=gender=race=np.nan
            
        face_percent = facial_percentage(face_coords,image_obj)
        feature_df = feature_df.append({'videoId':filename[:-4],'numFaces':num_faces,'emotions':emotions,'age':age,
                                        'gender':gender,'race':race,'face_locations':face_coords,
                                        'face_percent':face_percent}, ignore_index=True)
    return feature_df

In [6]:
dlib.DLIB_USE_CUDA = True
import time

In [8]:
def create_feature_database_batches(IMG_DIR):
    df = pd.DataFrame(columns = ['videoId','numFaces','face_locations','age','gender','emotions','race'])
    batch = 0
    videoId_batch = []
    face_locations_batch = []
    faces_batch = []
    img_obj_batch = []
    last_file = os.listdir(IMG_DIR)[-1]
    num_batch = 0
    for filename in os.listdir(IMG_DIR):
        image = face_recognition.load_image_file(IMG_DIR+'/'+filename)
        img_obj_batch.append(image)
        videoId_batch.append(filename[:-4])
        batch += 1
        if batch == 250 or filename == last_file:
            print('Batch {0} Start!'.format(num_batch))
            face_locations_batch = face_recognition.batch_face_locations(img_obj_batch,number_of_times_to_upsample=1, batch_size=batch)
            empty_indices = [empty_ix for empty_ix, element in enumerate(face_locations_batch) if element == []]

            for index in sorted(empty_indices, reverse=True):
                del face_locations_batch[index]
                del videoId_batch[index]
                del img_obj_batch[index]
            for ix in range(len(face_locations_batch)):
                im = Image.fromarray(img_obj_batch[ix])
                for f in face_locations_batch[ix]:
                    face = im.crop((f[3],f[0],f[1],f[2]))
                    face = np.asarray(face)
                    faces_batch.append(face)
                
            analysis_counter = 0
            analysis = DeepFace.analyze(faces_batch)
        
            for i in range(len(face_locations_batch)):
                f = face_locations_batch[i]
                emotions = [] 
                age = []
                gender = []
                race = []
                for j in range(len(f)):
                    analysis_counter += 1
                    curr_analysis = analysis['instance_'+str(analysis_counter)]
                    emotions.append(curr_analysis['dominant_emotion'])
                    age.append(curr_analysis['age'])
                    gender.append(curr_analysis['gender'])
                    race.append(curr_analysis['dominant_race'])
                df = df.append({'videoId':videoId_batch[i],'numFaces':len(f),'emotions':emotions,'age':age,
                                'gender':gender,'race':race,'face_locations':f}, ignore_index=True)
            batch = 0
            videoId_batch = []
            face_locations_batch = []
            faces_batch = []
            img_obj_batch = []
            print('Batch {0} Done!'.format(num_batch))
    return df