In [1]:
# import the necessary packages
import numpy as np
from imutils import face_utils
import dlib
import cv2
import math
import imutils
import face_recognition

from locations import landmarks

In [2]:
# initializing dlib's face detector and encoder, creating predictor
p = "../data/shape_predictor_194_face_landmarks.dat"
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(p)

In [3]:
def calc_angle(v1, v2):
    '''
    计算两个向量的夹角：
    AB = [5,-1,1,-3]
    CD = [4,1,4.5,4.5]
    print(angle_new(AB, CD),type(AB))
    '''
    dx1 = v1[2] - v1[0]
    dy1 = v1[3] - v1[1]
    dx2 = v2[2] - v2[0]
    dy2 = v2[3] - v2[1]
    angle1 = math.atan2(dy1, dx1)
    angle1 = int(angle1 * 180/math.pi)
    # print(angle1)
    angle2 = math.atan2(dy2, dx2)
    angle2 = int(angle2 * 180/math.pi)
    # print(angle2)
    if angle1*angle2 >= 0:
        included_angle = abs(angle1-angle2)
    else:
        included_angle = abs(angle1) + abs(angle2)
        if included_angle > 180:
            included_angle = 360 - included_angle
    return included_angle

def calc_wuguan_std(center_list):
    """
    输入一张脸在一个视频中的的五官中心点列表
    """
    if len(center_list) == 1:
        return np.array([0])
    elif len(center_list) > 1:
        return np.array([np.linalg.norm(np.array(center_list[i])-np.array(center_list[i+1]),axis=1).std() \
                for i in range(len(center_list)-1)])

In [4]:
# calc features
def feature_calc(video_path, n_frames=30):
    # Create video reader and find length
    v_cap = cv2.VideoCapture(video_path)
    v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # pick n frames
    sample = np.linspace(0, v_len - 1, n_frames).astype(int)

    # by frame index
    face_color_avgs = []
    face_color_stds = []
    yanpi_face_avgs = []
    lip_color_avgs = []
    face_fluctuation = []
    brow_dists = []
    brow2nose_dists = []
    brow2eye_dists = []
    brow2nose_angles = []
    brow_thicks = []
    face_rects = []

    # part counter indexes
    r_eye_index = landmarks().get_polygons_index('R_EYE')
    l_eye_index = landmarks().get_polygons_index('L_EYE')
    xiaba_index = landmarks().get_polygons_index('XIABA')
    l_brow_index = landmarks().get_polygons_index('L_BROW')
    r_brow_index = landmarks().get_polygons_index('R_BROW')
    nose_index = landmarks().get_polygons_index('NOSE')
    u_lip_index = landmarks().get_polygons_index('U_LIP')
    d_lip_index = landmarks().get_polygons_index('D_LIP')
    
    latest_encodings = [] # init a list of the lastest encodings of each cluster of faces
    flag = 0 # set a detector flag in one video while looping
    
    for j in range(v_len):
        success = v_cap.grab()
        if j in sample:
#             print('=========第{}帧========'.format(j))
            success, frame = v_cap.retrieve()
            if not success:
                continue
            frame = imutils.resize(frame, width = 1000)
            
            # detect faces in the grayscale image
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            rects = detector(gray, 0)
            
            # get encodings list of faces
            known_face_locations = [(rect.top(), rect.right(), rect.bottom(), rect.left()) for rect in rects]
            biden_encoding = face_recognition.face_encodings(face_image=frame, 
                                                             known_face_locations=known_face_locations)
                        
            for (i, rect) in enumerate(rects):
                f_encoding = biden_encoding[i] # the encoding of i-th face
                
                # find the face index of this rect update latest_encodings list
                if latest_encodings:
                    results = face_recognition.compare_faces(latest_encodings, f_encoding)
                    if sum(results) == 0:
                        flag = 1
                    else:
                        true_index = results.index(True)
                        latest_encodings[true_index] = f_encoding
                else:
                    flag = 1
                
                if flag:
                    latest_encodings.append(f_encoding)
                    true_index = len(latest_encodings) - 1
                    face_color_avgs.append([])
                    face_color_stds.append([])
                    yanpi_face_avgs.append([])
                    lip_color_avgs.append([])
                    face_fluctuation.append([])
                    brow_dists.append([])
                    brow2nose_dists.append([])
                    brow2eye_dists.append([])
                    brow2nose_angles.append([[],[]])
                    brow_thicks.append([])
                    face_rects.append([])
                    flag = 0
                
                shape = predictor(gray, rect)
                shape = face_utils.shape_to_np(shape)
                    
                xia_lian = abs(shape[0][1] - shape[109][1])
                to_append_pnt = [[shape[0][0], int(shape[0][1] + xia_lian * 0.4)], 
                                   [shape[134][0], int(shape[134][1] + xia_lian * 0.4)]]

                # part counter points
                l_eye = shape[l_eye_index]
                r_eye = shape[r_eye_index]
                xiaba = np.concatenate((shape[xiaba_index], to_append_pnt), axis=0)
                u_lip = shape[u_lip_index]
                d_lip = shape[d_lip_index]
                l_brow = shape[l_brow_index]
                r_brow = shape[r_brow_index]
                nose = shape[nose_index]

                # make masks with coordinates
                mask_xiaba = np.zeros((frame.shape[0], frame.shape[1]))
                mask_eye = np.zeros((frame.shape[0], frame.shape[1]))
                mask_lip = np.zeros((frame.shape[0], frame.shape[1]))

                cv2.fillPoly(mask_xiaba, [xiaba], 1)
                cv2.fillPoly(mask_eye, [l_eye, r_eye], 1)
                cv2.fillPoly(mask_lip, [u_lip, d_lip], 1)

                mask_xiaba = mask_xiaba.astype(np.bool)
                xiaba_out = np.zeros_like(gray)
                xiaba_out[mask_xiaba] = gray[mask_xiaba]
                mask_eye = mask_eye.astype(np.bool)
                eye_out = np.zeros_like(gray)
                eye_out[mask_eye] = gray[mask_eye]
                mask_lip = mask_lip.astype(np.bool)
                lip_out = np.zeros_like(gray)
                lip_out[mask_lip] = gray[mask_lip]

                l_x, l_y, l_w, l_h = cv2.boundingRect(l_eye)
                l_eye_rectangle = gray[int(l_y-0.25*l_h):l_y+l_h, l_x:l_x+l_w]
                r_x, r_y, r_w, r_h = cv2.boundingRect(r_eye)
                r_eye_rectangle = gray[int(r_y-0.25*r_h):r_y+r_h, r_x:r_x+r_w]

                # distances
                center = [l_brow.mean(axis=0).tolist(),
                          r_brow.mean(axis=0).tolist(),
                          l_eye.mean(axis=0).tolist(),
                          r_eye.mean(axis=0).tolist(),
                          nose.mean(axis=0).tolist(),
                          np.concatenate((u_lip,d_lip)).mean(axis=0).tolist()]
                brow_dist = np.linalg.norm(shape[70] - shape[82])
                brow2nose_dist = np.linalg.norm(np.concatenate((shape[70],shape[82])).mean(axis=0) - shape[143])
                brow2eye_dist = (np.linalg.norm(np.array(center[0])-np.array(center[2])) + np.linalg.norm(np.array(center[1])-np.array(center[3]))) / 2
                brow_thick = (np.linalg.norm(shape[96] - shape[110]) + np.linalg.norm(shape[74] - shape[88])) / 2

                # angels
                nose_vec = np.array([shape[139],shape[147]]).mean(axis=0).tolist() + np.array([shape[135],shape[151]]).mean(axis=0).tolist()
                l_brow_vec = shape[103].tolist() + shape[113].tolist()
                r_brow_vec = shape[81].tolist() + shape[91].tolist()
                l_brow2nose_angel = calc_angle(nose_vec,l_brow_vec)
                r_brow2nose_angel = calc_angle(nose_vec,r_brow_vec)


                # calc feature and drop them into corresponding face index
                face_color_avg = xiaba_out.sum() / mask_xiaba.sum()
                face_color_avgs[true_index].append(face_color_avg)
                face_color_stds[true_index].append(xiaba_out.std())
                yanpi_avg = (l_eye_rectangle.sum() + r_eye_rectangle.sum() - eye_out.sum()) / ((l_eye_rectangle.shape[0] + \
                        r_eye_rectangle.shape[0]) * (l_eye_rectangle.shape[1] + r_eye_rectangle.shape[1]) - mask_eye.sum())
                yanpi_face_avgs[true_index].append(yanpi_avg)
                lip_color_avg = lip_out.sum() / mask_lip.sum()
                lip_color_avgs[true_index].append(lip_color_avg)
                face_fluctuation[true_index].append(center)
                brow_dists[true_index].append(brow_dist)
                brow2nose_dists[true_index].append(brow2nose_dist)
                brow2eye_dists[true_index].append(brow2eye_dist)
                brow2nose_angles[true_index][0].append(l_brow2nose_angel)
                brow2nose_angles[true_index][1].append(r_brow2nose_angel)
                brow_thicks[true_index].append(brow_thick)
                face_rects[true_index].append(rect.left())

    v_cap.release() # release video
    
    face_cnt = len(face_color_avgs) # 脸的数量
    feature_face_color = [[np.array(face_color_avgs[i]).std()] for i in range(face_cnt)] # 面部色度标准差
    feature_face_maxdiff = [[max(face_color_stds[i])] for i in range(face_cnt)] # 面部色度最大差异（最大标准差）
    feature_yanpi_face_diff = [[np.array(yanpi_face_avgs[i]).std()] for i in range(face_cnt)] # 眼皮面部色度差的标准差
    feature_lip_color = [[np.array(lip_color_avgs[i]).std()] for i in range(face_cnt)] # 嘴唇色度标准差
    feature_face_fluctuation = [[max(calc_wuguan_std(face_fluctuation[i]))] for i in range(face_cnt)] # 面部特征移动相对距离
    feature_brow_dist = [[np.array(brow_dists[i]).std()] for i in range(face_cnt)] # 眉毛距离标准差
    feature_brow2nose_dist = [[np.array(brow2nose_dists[i]).std()] for i in range(face_cnt)] # 眉心到鼻尖距离标准差
    feature_brow2eye_dist = [[np.array(brow2eye_dists[i]).std()] for i in range(face_cnt)] # 眉毛到眼睛距离标准差
    feature_brow2nose_angle = [[np.array(brow2nose_angles[i][0]).std(),np.array(brow2nose_angles[i][1]).std()] 
                               for i in range(face_cnt)] # 眉毛与鼻子夹角，左右眉毛都做了计算
    feature_brow_thick = [[np.array(brow_thicks[i]).std()] for i in range(face_cnt)] # 眉毛粗细
    rect_left = [[np.array(face_rects[i]).mean()] for i in range(face_cnt)] # 计算每张脸与左边框的距离，以此来打标签

    feature = []
    for i in range(face_cnt):
        feature.append(feature_face_color[i] +
                       feature_face_maxdiff[i] +
                       feature_yanpi_face_diff[i] +
                       feature_lip_color[i] +
                       feature_face_fluctuation[i] +
                      feature_brow_dist[i] +
                      feature_brow2nose_dist[i] +
                      feature_brow2eye_dist[i] +
                      feature_brow2nose_angle[i] +
                      feature_brow_thick[i] + 
                      rect_left[i])
    print(feature)
    return feature

In [5]:
def main():
    return feature_calc('../data/exbxfmqqpx.mp4')

In [6]:
import time
t1 = time.time()
main()
print(time.time()-t1)

[[3.146701306650596, 5.021132139396925, 1.1200151690175546, 4.762820441642136, 3.8737907586797324, 2.1723619695792027, 6.842880249126811, 0.9643196159810988, 3.103250761581506, 2.372796933843307, 0.5015199231916689, 654.0909090909091], [0.0, 7.874720743840239, 0.0, 0.0, 0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 194.0]]
3.948089122772217


In [None]:
# make csv
import pandas as pd
import json
import os

metadata = json.loads(open('./data/metadata.json').read())
features = []
for video in metadata.keys():
    for k in range(10):
        if os.path.exists(os.path.join('./data/dfdc_train_part_{}'.format(k),video)):
            video_path = os.path.join('./data/dfdc_train_part_{}'.format(k),video)
            if metadata[video]['label'] == 'FAKE':
                label = 0
            else:
                label = 1
            feature = feature_calc(video_path)
            if feature:
                for i in range(len(feature)):
                    features.append([video] + feature[i] + [label])
                    print([video] + feature[i] + [label])
        break
                    
res = pd.DataFrame(features,columns = ['video',
                                       'face_color',
                                       'face_maxdiff',
                                       'yanpi_face_diff',
                                       'lip_color',
                                       'face_fluctuation',
                                       'brow_dist',
                                       'brow2nose_dist',
                                       'brow2eye_dist',
                                       'l_brow2nose_angle',
                                       'r_brow2nose_angle',
                                       'brow_thick',
                                       'label'])

# outpath = '../feature.csv'
# df.to_csv(outputpath,sep=',',index=False,header=True)