In [1]:
import insightface
import urllib
import urllib.request
import cv2
import numpy as np
from numpy import linalg
from matplotlib import pyplot as plt
from IPython.display import clear_output
import math
import os

In [2]:
cosine_threshold = 0.4
color = (0, 0, 255)
ctx_id = 0
font = cv2.FONT_HERSHEY_SIMPLEX

In [13]:
cap = cv2.VideoCapture('videos/video1.mp4')

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
size = (frame_width, frame_height)

In [4]:
model = insightface.app.FaceAnalysis()
model.prepare(ctx_id = ctx_id, nms=0.4)

[32, 16, 8] {'32': {'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': (1.0,), 'ALLOWED_BORDER': 9999}, '16': {'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': (1.0,), 'ALLOWED_BORDER': 9999}, '8': {'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': (1.0,), 'ALLOWED_BORDER': 9999}}
use_landmarks True


In [5]:
def is_rotation_matrix(R) :
    Rt = np.transpose(R)
    shouldBeIdentity = np.dot(Rt, R)
    I = np.identity(3, dtype = R.dtype)
    n = np.linalg.norm(I - shouldBeIdentity)
    return n < 1e-6

def rotation_vector_to_euler_angles(rotation_vector) :
    R, _ = cv2.Rodrigues(rotation_vector)
    
    if not is_rotation_matrix(R):
        return
    
    sy = math.sqrt(R[0,0] * R[0,0] +  R[1,0] * R[1,0])
    singular = sy < 1e-6

    if  not singular :
        x = math.atan2(R[2,1] , R[2,2])
        y = math.atan2(-R[2,0], sy)
        z = math.atan2(R[1,0], R[0,0])
    else :
        x = math.atan2(-R[1,2], R[1,1])
        y = math.atan2(-R[2,0], sy)
        z = 0

    return np.rad2deg(np.array([x, y, z]))

In [6]:
def chin_calc(bbox, fivepointlandmarks):
    p1 = np.asarray([bbox[2], bbox[3]])
    p2 = np.asarray([bbox[0], bbox[3]])
    p3 = np.asarray([int((fivepointlandmarks[6] + fivepointlandmarks[8]) / 2), int((fivepointlandmarks[7] + fivepointlandmarks[9]) / 2)])
    chindistfromp3 = int(linalg.norm(np.cross(p2 - p1, p1 - p3)) / linalg.norm(p2 - p1))
    chin = [p3[0], p3[1] + chindistfromp3]

    # Return a 2-D point represent for the chin of the given face
    return chin

def six_point_of_landmarks(bbox, fivepointlandmarks):
    pchin = chin_calc(bbox, fivepointlandmarks)

    image_points = np.array([
                            (fivepointlandmarks[4], fivepointlandmarks[5]),     # Nose tip
                            (pchin[0], pchin[1])                          ,     # Chin
                            (fivepointlandmarks[0], fivepointlandmarks[1]),     # Left eye left corner
                            (fivepointlandmarks[2], fivepointlandmarks[3]),     # Right eye right corne
                            (fivepointlandmarks[6], fivepointlandmarks[7]),     # Left Mouth corner
                            (fivepointlandmarks[8], fivepointlandmarks[9])      # Right mouth corner
                        ], dtype="double")
    return image_points

def est_head_pose(face, imsize):
    bbox = face.bbox.astype(np.int).flatten()
    fivepointlandmarks = face.landmark.astype(np.int).flatten()
    image_points = six_point_of_landmarks(bbox, fivepointlandmarks)

    # 3D model points. 
    model_points = np.array([
                                (0.0, 0.0, 0.0),             # Nose tip
                                (0.0, -330.0, -65.0),        # Chin
                                (-210.0, 170.0, -135.0),     # Left eye left corner
                                (210.0, 170.0, -135.0),      # Right eye right corne
                                (-150.0, -150.0, -125.0),    # Left Mouth corner
                                (150.0, -150.0, -125.0)      # Right mouth corner
                            ])
    
    # Camera internals
    focal_length = imsize[1]
    center = (imsize[1]/2, imsize[0]/2)
    camera_matrix = np.array(
                            [[focal_length, 0, center[0]],
                            [0, focal_length, center[1]],
                            [0, 0, 1]], dtype = "double"
                            )
    
    dist_coeffs = np.zeros((4,1)) # Assuming no lens distortion
    (success, rotation_vector, translation_vector) = cv2.solvePnP(model_points, 
                                                                  image_points, 
                                                                  camera_matrix, 
                                                                  dist_coeffs, 
                                                                  flags=cv2.cv2.SOLVEPNP_ITERATIVE)
    (nose_end_point2D, jacobian) = cv2.projectPoints(np.array([(0.0, 0.0, 1000.0)]), 
                                                                rotation_vector, 
                                                                translation_vector, 
                                                                camera_matrix, 
                                                                dist_coeffs)

    p1 = (int(image_points[0][0]), int(image_points[0][1]))
    p2 = (int(nose_end_point2D[0][0][0]), int(nose_end_point2D[0][0][1]))
    
    euler_angles = rotation_vector_to_euler_angles(rotation_vector)

    # return a line that point out the current pose of the head
    return p1, p2, bbox, euler_angles

In [11]:
def face_filter(raw_euler_angles):
    roll = False
    pitch = False
    yaw = False
    
    euler_angles = np.absolute(raw_euler_angles.astype(int))
    
    if (170 <= euler_angles[0] and euler_angles[0] <= 180) or (0 <= euler_angles[0] and euler_angles[0] <= 4):
        roll = True
    
    if (170 <= euler_angles[2] and euler_angles[2] <= 180) or (0 <= euler_angles[2] and euler_angles[2] <= 4):
        yaw = True
        
    if 30 <= euler_angles[1] and euler_angles[1] <= 55:
        pitch = True
    return roll and pitch and yaw

In [12]:
def save_face(face, index):
    cv2.imwrite('filtered_faces/FaceNo' + str(index) + '.png', face)
    
def save_vector(vector, index):
    vector.tofile('filtered_faces/VectorNo' + str(index) + '.dat')

In [14]:
faceNo = 0
while(True):
    ret, frame = cap.read()
    if ret is True:
        faces = model.get(frame)
        for idx, face in enumerate(faces):
            p1, p2, bbox, euler_angles = est_head_pose(face, frame.shape)
            if face_filter(euler_angles):
                embedding = face.embedding.astype(np.float).flatten()
                cropped_face = frame[bbox[1] : bbox[3], bbox[0] :bbox[2]]
                save_face(cropped_face, faceNo)
                save_vector(embedding, faceNo)
                faceNo += 1
                print(faceNo)
        clear_output(wait=True)
cap.release()
print('Render completed!')

KeyboardInterrupt: 