## Preprocessing of faces and landmarks

In [1]:
# imports
import face_alignment
import cv2
import os
from matplotlib import pyplot as plt
import time
import random
import math

In [2]:
DATA_FOLDER = 'train_sample_videos'

time_to_detect_landmarks = 0
time_to_detect_tracking = 0
time_to_render_bounds = 0
time_to_read = 0
time_to_write = 0
time_to_resize = 0

In [3]:
# create the directory for each video and set it as the current directory to save each frame inside
def create_directory(video_file, file_type='.mp4'):
    name = video_file.split(file_type)[0]
    os.mkdir('preprocessed_data/{}'.format(name))
    return 'preprocessed_data/{}'.format(name)

Landmarks from face_alignment are located as follows:\n
    Landmark | indices\n
    face:      0,  17\n
    eyebrow1:  17, 22\n
    eyebrow2:  22, 27 
    nose:      27, 31
    nostril:   31, 36
    eye1:      36, 42
    eye2:      42, 48
    lips:      48, 60
    teeth:     60, 68

# Methods for extracting the bounding box for the landmarks

In [4]:
def landmark_bounding_box(landmarks, offset, resize_info):
    eye1_range = list(range(17, 21)) + list(range(36, 41))
    eye2_range = list(range(22, 26)) + list(range(42, 47))
    nose_range = list(range(27, 35))
    mouth_range = list(range(48, 59))
    
    eye1_rectangle = get_bounding_box(eye1_range, landmarks, offset, resize_info)
    eye1_rectangle = normalize_landmark(eye1_rectangle)
    
    eye2_rectangle = get_bounding_box(eye2_range, landmarks, offset, resize_info)
    eye2_rectangle = normalize_landmark(eye2_rectangle)
    
    nose_rectangle = get_bounding_box(nose_range, landmarks, offset, resize_info)
    nose_rectangle = normalize_landmark(nose_rectangle)
    
    mouth_rectangle = get_bounding_box(mouth_range, landmarks, offset, resize_info)
    mouth_rectangle = normalize_landmark(mouth_rectangle, size=(64, 128))
    
    return eye1_rectangle, eye2_rectangle, nose_rectangle, mouth_rectangle

def get_bounding_box(index_range, landmarks, offset, resize_info):
    x1 = y1 = math.inf
    x2 = y2 = 0
    
    for i in index_range:
        x1 = min(landmarks[i][0] + offset[0], x1)
        y1 = min(landmarks[i][1] + offset[1], y1)
        x2 = max(landmarks[i][0] + offset[0], x2)
        y2 = max(landmarks[i][1] + offset[1], y2)
    
    x_cushion = int((x2-x1)*resize_info[0]*0.08) if x2-x1 > 9 else 10
    y_cushion = int((y2-y1)*resize_info[0]*0.08) if y2-y1 > 9 else 10
    
    return [int(x1*resize_info[0])-x_cushion, 
            int(y1*resize_info[1])-y_cushion, 
            int(x2*resize_info[0])+x_cushion, 
            int(y2*resize_info[1])+y_cushion]

def normalize_landmark(bounding_box, size=(28, 28)):
    
    w = bounding_box[2]-bounding_box[0]
    h = bounding_box[3]-bounding_box[1]
    
    if w < size[1] and h < size[0]:
        bounding_box[0] -= int(math.floor((size[1] - w )/ 2))
        bounding_box[2] += int(math.ceil((size[1] - w) / 2))
        bounding_box[1] -= int(math.floor((size[0] - h) / 2))
        bounding_box[3] += int(math.ceil((size[0] - h) / 2))
    elif w < size[1]:
        #increase width
        to_increase = ((size[1] / size[0]) * h) - w
        bounding_box[0] -= int(math.floor(to_increase / 2))
        bounding_box[2] += int(math.ceil(to_increase / 2))
    elif h < size[0]:
        #increase height
        to_increase = (w / (size[1] / size[0])) - h
        bounding_box[1] -= int(math.floor(to_increase / 2))
        bounding_box[3] += int(math.ceil(to_increase / 2))
    else:
        # both height and width are bigger than the desired size
        wid_rat = size[1] / w
        hei_rat = size[0] / h
        if wid_rat > hei_rat:
            #increase width
            to_increase = ((size[1] / size[0]) * h) - w
            bounding_box[0] -= int(math.floor(to_increase / 2))
            bounding_box[2] += int(math.ceil(to_increase / 2))
        else:
            #increase height
            to_increase = (w / (size[1] / size[0])) - h
            bounding_box[1] -= int(math.floor(to_increase / 2))
            bounding_box[3] += int(math.ceil(to_increase / 2))
            
    return bounding_box

In [5]:
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    global time_to_resize
    start = time.time()
    dim = None
    (h, w) = image.shape[:2]
    
    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image

    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        dim = (int(w * r), height)

    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        dim = (width, int(h * r))
    
    # resize the image
    resized = cv2.resize(image, dim, interpolation = inter)

    # return the resized image
    time_to_resize += time.time() - start
    return resized, (w/dim[0], h/dim[1])



In [6]:
def face_track(frame, fa, last_bounding_box=None):
    '''
    we want to limit our search space based on the last bounding box to speed up landmark detection
    
    frame: frame of video (already resized)
    last_bounding_box: rectangluar endpoints of last detected face (x1, y1, x2, y2)
    fa: pretrained detection model
    '''
    global time_to_detect_landmarks
    
    if last_bounding_box is None:
        start = time.time()
        preds = fa.get_landmarks(frame)
        time_to_detect_landmarks += time.time() - start
        return preds, (0, 0)
    
    # expand the last bounding box in all directions
    x1 = int(max(0, last_bounding_box[0]-20))                # new x1
    y1 = int(max(0, last_bounding_box[1]-20))                # new y1
    x2 = int(min(frame.shape[1]-1, last_bounding_box[2]+20)) # new x2
    y2 = int(min(frame.shape[0]-1, last_bounding_box[3]+20)) # new y2

    start = time.time()
    preds = fa.get_landmarks(frame[y1:y2, x1:x2, :]) # only predict in the area where we last searched
    time_to_detect_landmarks += time.time() - start
    
    # return the predictions and offset
    return preds, (x1, y1)

In [7]:
def detect_landmarks(path, file_name, fa, folder_to_write):
    '''
    method to take each frame of a video and save the face and landmarks from each frame
    
    path: directory of video
    file_name: name of the video
    fa: model for face and landmark detections
    '''
    capture = cv2.VideoCapture(os.path.join(path, file_name))
    width  = int(capture.get(3))
    height = int(capture.get(4))
    
    global time_to_detect_tracking, time_to_render_bounds, time_to_read, time_to_write, time_to_resize
    
    face_box = None
    frame_num = 1
    
    while capture.isOpened():
        start = time.time()
        success, orig_frame = capture.read()
        time_to_read += time.time() - start
        if not success:
            # we have reached the end of the video
            break
        start = time.time()
        frame, resize_info = image_resize(orig_frame, width=400)
        time_to_resize += time.time() - start
        
        start = time.time()        
        preds, offset = face_track(frame, fa, face_box)
        time_to_detect_tracking += time.time() - start
        
        if preds:
            for i in range(len(preds[1])):
                face_box = preds[1][i]
                
                face_box[0] += offset[0]
                face_box[2] += offset[0]
                face_box[1] += offset[1]
                face_box[3] += offset[1]
                start = time.time()
                eye1, eye2, nose, mouth = landmark_bounding_box(preds[0][i], offset, resize_info)
                time_to_render_bounds += time.time()-start
                
#                 cv2.rectangle(orig_frame, (eye1[0], eye1[1]), (eye1[2], eye1[3]), (255, 255, 255))
#                 cv2.rectangle(orig_frame, (eye2[0], eye2[1]), (eye2[2], eye2[3]), (255, 255, 255))
#                 cv2.rectangle(orig_frame, (nose[0], nose[1]), (nose[2], nose[3]), (0, 0, 255))
#                 cv2.rectangle(orig_frame, (mouth[0], mouth[1]), (mouth[2], mouth[3]), (0, 255, 0))
#                 plt.figure(1)
#                 plt.imshow(orig_frame)
#                 plt.show()

                face = orig_frame[int(face_box[1]*resize_info[1]):int(face_box[3]*resize_info[1]), 
                                  int(face_box[0]*resize_info[0]):int(face_box[2]*resize_info[0]), :]
                
                eye1, _ = image_resize(orig_frame[eye1[1]:eye1[3], eye1[0]:eye1[2], :], width=28)
                eye2, _ = image_resize(orig_frame[eye2[1]:eye2[3], eye2[0]:eye2[2], :], width=28)
                nose, _ = image_resize(orig_frame[nose[1]:nose[3], nose[0]:nose[2], :], width=28)
                mouth, _ = image_resize(orig_frame[mouth[1]:mouth[3], mouth[0]:mouth[2], :], width=128)
                
                start = time.time()
                cv2.imwrite(folder_to_write + '/face_frame{}.jpg'.format(frame_num), orig_frame[int(face_box[1]*resize_info[1]):int(face_box[3]*resize_info[1]), 
                                                                                                int(face_box[0]*resize_info[0]):int(face_box[2]*resize_info[0]), :])
                cv2.imwrite(folder_to_write + '/eye1_frame{}.jpg'.format(frame_num), eye1)
                cv2.imwrite(folder_to_write + '/eye2_frame{}.jpg'.format(frame_num), eye2)
                cv2.imwrite(folder_to_write + '/nose_frame{}.jpg'.format(frame_num), nose)
                cv2.imwrite(folder_to_write + '/mouth_frame{}.jpg'.format(frame_num), mouth)
                time_to_write += time.time() - start

        frame_num += 1
        
    capture.release()

In [8]:
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D)

vids = os.listdir(DATA_FOLDER)
start = time.time()
for vid in vids:
    try:
        folder2write = create_directory(vid)
        detect_landmarks(DATA_FOLDER, vid, fa, folder2write)
    except:
        print('Unsuccessful. Either the folder has already been created or the detect_landmarks failed')
        continue
    print('Finished processing video {}'.format(vid))
    print('Time: {} s'.format(time.time()-start))
    
process_time = time.time() - start
print('PROCESS TIME: {} s for {} videos'.format(process_time, len(vids)))
print('\tLandmark Detection: {} s'.format(time_to_detect_landmarks))
print('\tTracking time: {} s'.format(time_to_detect_tracking))
print('\tMake rectangles: {} s'.format(time_to_render_bounds))
print('\tRead Time: {} s'.format(time_to_read))
print('\tWrite Time: {} s'.format(time_to_write))
print('\tResize: {} s'.format(time_to_resize))

Unsuccessful. Either the folder has already been created or the detect_landmarks failed
Unsuccessful. Either the folder has already been created or the detect_landmarks failed
Unsuccessful. Either the folder has already been created or the detect_landmarks failed
Unsuccessful. Either the folder has already been created or the detect_landmarks failed
Unsuccessful. Either the folder has already been created or the detect_landmarks failed
Unsuccessful. Either the folder has already been created or the detect_landmarks failed
Unsuccessful. Either the folder has already been created or the detect_landmarks failed
Unsuccessful. Either the folder has already been created or the detect_landmarks failed
Unsuccessful. Either the folder has already been created or the detect_landmarks failed
Unsuccessful. Either the folder has already been created or the detect_landmarks failed
Unsuccessful. Either the folder has already been created or the detect_landmarks failed
Unsuccessful. Either the folder 

Finished processing video erlvuvjsjf.mp4
Time: 66.76971650123596 s
Finished processing video dhjmzhrcav.mp4
Time: 111.06208896636963 s
Finished processing video agqphdxmwt.mp4
Time: 154.86982488632202 s
Finished processing video bqkdbcqjvb.mp4
Time: 199.87752485275269 s
Finished processing video bwuwstvsbw.mp4
Time: 246.84643006324768 s
Finished processing video avmjormvsx.mp4
Time: 289.36387276649475 s
Finished processing video ekkdjkirzq.mp4
Time: 340.59872341156006 s
Finished processing video ahfazfbntc.mp4
Time: 383.2388651371002 s
Finished processing video cxttmymlbn.mp4
Time: 426.35219645500183 s
Finished processing video btugrnoton.mp4
Time: 470.342342376709 s
Finished processing video avnqydkqjj.mp4
Time: 521.1570062637329 s
Finished processing video ccmonzqfrz.mp4
Time: 566.2170481681824 s
Finished processing video bvzjkezkms.mp4
Time: 613.8295328617096 s
Finished processing video dhxctgyoqj.mp4
Time: 657.9787566661835 s
Finished processing video erqgqacbqe.mp4
Time: 703.60339

Finished processing video dhkwmjxwrn.mp4
Time: 3417.112284183502 s
Finished processing video andaxzscny.mp4
Time: 3458.3161282539368 s
Finished processing video eebrkicpry.mp4
Time: 3499.7316834926605 s
Finished processing video cfxkpiweqt.mp4
Time: 3541.568264722824 s
Finished processing video drcyabprvt.mp4
Time: 3585.0323679447174 s
Finished processing video bsqgziaylx.mp4
Time: 3626.5360078811646 s
Finished processing video drtbksnpol.mp4
Time: 3667.7807807922363 s
Finished processing video djxdyjopjd.mp4
Time: 3709.24018740654 s
Finished processing video metadata.json
Time: 3709.2520220279694 s
Finished processing video bqhtpqmmqp.mp4
Time: 3751.5687053203583 s
Finished processing video ckkuyewywx.mp4
Time: 3792.127441883087 s
PROCESS TIME: 3792.1279804706573 s for 401 videos
	Landmark Detection: 3016.7212538719177 s
	Tracking time: 3018.0961623191833 s
	Make rectangles: 14.84348440170288 s
	Read Time: 143.53834795951843 s
	Write Time: 345.24220395088196 s
	Resize: 527.28730797767