### install face_alignment (https://github.com/1adrianb/face-alignment), we use it to do face detection and landmark detection here.

In [1]:
import cv2
import face_alignment
import numpy as np
from PIL import Image
from skimage import io
from skimage import transform as trans
from torchvision import transforms

In [2]:
def get_scale_center(bb, scale_=220.0):
    center = np.array([bb[2] - (bb[2] - bb[0]) / 2,
                      bb[3] - (bb[3] - bb[1]) / 2])
    scale = (bb[2] - bb[0] + bb[3] - bb[1]) / scale_

    return scale, center

In [3]:
def get_transform(center, scale, res, rot=0):
    # Generate transformation matrix
    h = 200 * scale
    t = np.zeros((3, 3))
    t[0, 0] = float(res[1]) / h
    t[1, 1] = float(res[0]) / h
    t[0, 2] = res[1] * (-float(center[0]) / h + .5)
    t[1, 2] = res[0] * (-float(center[1]) / h + .5)
    t[2, 2] = 1

    if not rot == 0:
        rot = -rot  # To match direction of rotation from cropping
        rot_mat = np.zeros((3, 3))
        rot_rad = rot * np.pi / 180
        sn, cs = np.sin(rot_rad), np.cos(rot_rad)
        rot_mat[0, :2] = [cs, -sn]
        rot_mat[1, :2] = [sn, cs]
        rot_mat[2, 2] = 1
        # Need to rotate around center
        t_mat = np.eye(3)
        t_mat[0, 2] = -res[1] / 2
        t_mat[1, 2] = -res[0] / 2
        t_inv = t_mat.copy()
        t_inv[:2, 2] *= -1
        t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
    return t

In [4]:
def crop(img,rects):
    im_w = 256
    bb = rects[:4]
    scale, center = get_scale_center(bb, scale_=260)
    aug_rot = 0
    dx, dy = 0, 0
    center[0] += dx * center[0]
    center[1] += dy * center[1]
    mat = get_transform(center, scale, (im_w, im_w), aug_rot)[:2]
    img = cv2.warpAffine(img.copy(), mat, (im_w, im_w))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return bb,img

In [43]:
# video to img
def video2img(src,dst)
    import os
    vid = cv2.VideoCapture(src)
    save_folder = dst
    os.makedirs(save_folder,exist_ok=True)
    index = 0
    while True:
        _, frame = vid.read()
        if frame is None:
            break
        else:
            cv2.imwrite(f'{save_folder}/%06d.jpg'%index,frame)
            index = index+1

In [46]:
def caculate_pts5(pred):
    eye_left = np.mean(pred[36:42,:],axis=0)
    eye_right = np.mean(pred[42:48,:],axis=0)
    return np.array([eye_left,eye_right,pred[33,:],pred[48,:],pred[54,:]])

In [78]:
def AU_detection(model,src,dst):
    import os
    import matplotlib.pyplot as plt
    tform = trans.SimilarityTransform()
    sr_folder = './video_2/'
    save_folder = './res_video_2/' 
    os.makedirs(save_folder,exist_ok=True)
    au_indices = (1,2,4,6,7,9,10,12,14,15,17,23,24,25,26)
    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, face_detector='sfd')
    for idx in range(664) :
        # Detect face
        try:
            im_0 = cv2.imread(f'./{sr_folder}/%06d.jpg'%idx-10)
        except:
            im_0 = cv2.imread(f'./{sr_folder}/%06d.jpg'%idx)


        im_1 = cv2.imread(f'./{sr_folder}/%06d.jpg'%idx)
        frame = im_1.copy()

        try:
            im_2 = cv2.imread(f'./{sr_folder}/%06d.jpg'%idx+10)
        except:
            im_2 = cv2.imread(f'./{sr_folder}/%06d.jpg'%idx)

        #detect landmarks 
        lmks_0 = fa.get_landmarks(im_0)
        lmks_0 = caculate_pts5(lmks_0[0])

        #detect landmarks and bounding box
        lmks_1, _, rects_1 = fa.get_landmarks(im_1,return_bboxes=True)
        lmks_1 = caculate_pts5(lmks_1[0])

        #detect landmarks
        lmks_2 = fa.get_landmarks(im_2)
        lmks_2 = caculate_pts5(lmks_2[0])

        #crop the face area based on bounding box
        bb = rects_1[0][:4]
        bbox = bb.copy()
        scale, center = get_scale_center(bb, scale_=260)
        aug_rot = 0
        dx, dy = 0,0
        im_w = 256
        center[0] += dx * center[0]
        center[1] += dy * center[1]
        mat = get_transform(center, scale, (im_w, im_w), aug_rot)[:2]
        im_1 = cv2.warpAffine(im_1.copy(), mat, (im_w, im_w))
        im_1 = cv2.cvtColor(im_1, cv2.COLOR_BGR2RGB)

        # calculate the transformed landmarks
        lmks_1 = np.dot(np.concatenate((lmks_1, lmks_1[:, 0:1]*0+1), axis=1), mat.T)

        im_1 = Image.fromarray(im_1)
        im_1 = transform_val(im_1)
        im_1 = im_1.unsqueeze(0)
        im_1 = im_1.cuda()

        #align im_0 to im_1
        tform.estimate(lmks_0, lmks_1)
        M = tform.params[0:2, :]
        im_0 = cv2.warpAffine(im_0.copy(), M, (im_w, im_w), borderValue=0.0)
        im_0 = cv2.cvtColor(im_0, cv2.COLOR_BGR2RGB)
        im_0 = Image.fromarray(im_0)
        im_0 = transform_val(im_0)
        im_0 = im_0.unsqueeze(0)
        im_0 = im_0.cuda()

        #align im_2 to im_1
        tform.estimate(lmks_2, lmks_1)
        M = tform.params[0:2, :]
        im_2 = cv2.warpAffine(im_2.copy(), M, (im_w, im_w), borderValue=0.0)
        im_2 = cv2.cvtColor(im_2, cv2.COLOR_BGR2RGB)


        im_2 = Image.fromarray(im_2)
        im_2 = transform_val(im_2)
        im_2 = im_2.unsqueeze(0)
        im_2 = im_2.cuda()

        #AU detection
        with torch.no_grad():
            pred = model(im_1,im_0,im_2)

        probs = list(np.array(torch.sigmoid(pred[0]).cpu().data))   
        bbox = np.array(bbox,dtype=int)
        cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=(0, 0, 255), thickness=2)
        for loc, (au_idx, au_prob) in enumerate(zip(au_indices, probs)):
            colour = (0, round(255 * au_prob), round(255 * (1 - au_prob)))
            cv2.circle(frame, (bbox[2] + 10, bbox[1] + 15 * loc + 5),
                       radius=5, thickness=-1, color=colour, lineType=cv2.LINE_AA)
            cv2.putText(frame, f'AU {au_idx}', (bbox[2] + 20, bbox[1] + 15 * loc + 10),
                    cv2.FONT_HERSHEY_DUPLEX, 0.45, colour, lineType=cv2.LINE_AA)
        cv2.imwrite(f'./{save_folder}/%06d.png'%idx,frame)


In [2]:
def img2video(src):
    import moviepy
    import glob
    import moviepy.video.io.ImageSequenceClip
    im_list = glob.glob(f'{src}/*.png')
    im_list.sort()
    fps = 10
    clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(im_list, fps=fps)
    print(clip.get_frame(3).shape)
    clip.write_videofile('./res_video2.mp4')

(1280, 720, 3)
Moviepy - Building video ./res_video2.mp4.
Moviepy - Writing video ./res_video2.mp4



                                                              

Moviepy - Done !
Moviepy - video ready ./res_video2.mp4


In [5]:
import torch
from models.aunet import AU_NET
transform_val = transforms.Compose([transforms.ToTensor()])
model = AU_NET(alpha=0.9, beta=0.1, n_classes=15)
pre_trained = torch.load('./cross_model.pth', 'cpu')
pretrained_items = list(pre_trained.items())
current_items = model.predictor.state_dict()
count = 0
for key, value in current_items.items():
    layer_name, weights = pretrained_items[count]
    current_items[key] = weights
    count = count + 1
model.predictor.load_state_dict(current_items, strict=True)
model = model.cuda()
model = model.eval()

In [None]:
src = 'demo.mp4'
dst = './demos_imgs/'
video2img(src,dst)
res = './demos_res'
AU_detection(model,dst,res)
img2video(res)