In [1]:
#############################3   Import Packages ##########################################3
import os
import tensorflow as tf
from deepgaze.head_pose_estimation import CnnHeadPoseEstimator
from scipy.spatial import distance as dist
from imutils.video import FileVideoStream
from imutils.video import VideoStream
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from imutils import face_utils
import numpy as np
import imutils
import dlib
import cv2
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt

[DEEPGAZE] head_pose_estimation.py: the dlib library is installed.


Using TensorFlow backend.


In [2]:
def rect_to_bb(rect):
    x = rect.left()
    y = rect.top()
    w = rect.right() - x
    h = rect.bottom() - y
    return (x, y, w, h)



In [3]:
def eyes_data(videofile, outputfile):

    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("lol.dat")

    (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
    (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]

    left_arr = []; right_arr = []

    i = 0
    vid = cv2.VideoCapture(videofile)
    
    while(True):
        
        ret, frame = vid.read()
        if(ret == False):
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        out_face=np.zeros_like(frame)
        rects = detector(gray, 0)

        for rect in rects:
            shape = predictor(gray, rect)
            shape = face_utils.shape_to_np(shape)
            leftEye = np.array(shape[lStart:lEnd])
            rightEye = np.array( shape[rStart:rEnd])

            rect_x = leftEye[0,0];
            rect_y = leftEye[1,1]; 
            rect_x2 = leftEye[3,0]; 
            rect_y2 = leftEye[5,1];
            
            right_rect_x = rightEye[0,0]; 
            right_rect_y = rightEye[1,1]; 
            right_rect_x2 = rightEye[3,0]; 
            right_rect_y2 = rightEye[5,1]

            crop_img = frame[ rect_y-15:rect_y2+15, rect_x-15:rect_x2+15]
            right_crop_img = frame[ right_rect_y-15:right_rect_y2+15, right_rect_x-15:right_rect_x2+15]


        left_arr.append(cv2.resize(crop_img,(60,36)))
        right_arr.append(cv2.resize(right_crop_img,(60,36)))
        i=i+1

    print(i, np.array(left_arr).shape, np.array(right_arr).shape)
    
    np.save(outputfile +'_left_eye_data.npy', np.array(left_arr))
    np.save(outputfile +'_right_eye_data.npy', np.array(right_arr))
    


In [4]:
def get_pose(videofile, outputfile):
    
    sess = tf.Session() #Launch the graph in a session.
    my_head_pose_estimator = CnnHeadPoseEstimator(sess) #Head pose estimation object
    my_head_pose_estimator.load_roll_variables(os.path.realpath("/ssd_scratch/cvit/isha2/DGM_final2/deepgaze/etc/tensorflow/head_pose/roll/cnn_cccdd_30k.tf"));
    my_head_pose_estimator.load_pitch_variables(os.path.realpath("/ssd_scratch/cvit/isha2/DGM_final2/deepgaze/etc/tensorflow/head_pose/pitch/cnn_cccdd_30k.tf"));
    my_head_pose_estimator.load_yaw_variables(os.path.realpath("/ssd_scratch/cvit/isha2/DGM_final2/deepgaze/etc/tensorflow/head_pose/yaw/cnn_cccdd_30k.tf"));

    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("lol.dat")

    i = 0
    vid = cv2.VideoCapture(videofile)
    
    arr = []
    
    while(True):
        
        ret, frame = vid.read()
        if(ret == False):
            break

        frame = imutils.resize(frame, width = 450)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        out_face = np.zeros_like(frame)
        rects = detector(gray, 0)
        crop_img = out_face
        
        for rect in rects:

            shape = predictor(gray, rect)
            shape = face_utils.shape_to_np(shape)
            #print(shape.shape)

            (x, y, w, h) = face_utils.rect_to_bb(rect)
            remapped_shape = np.zeros_like(shape)
            feature_mask = np.zeros((frame.shape[0], frame.shape[1]))
            #print(feature_mask.shape)
            remapped_shape = cv2.convexHull(shape)
            cv2.drawContours(frame, [remapped_shape], -1, (0, 255, 0), 1)

            cv2.fillConvexPoly(feature_mask, remapped_shape[0:27], 1)
            feature_mask = feature_mask.astype(np.bool)
            out_face[feature_mask] = frame[feature_mask]

            x = min(shape[:,0])-20
            y = min(shape[:,1])-20
            w = max(shape[:,0]) -min(shape[:,0]) +50
            h = max(shape[:,1]) -min(shape[:,1]) +50
            crop_img = out_face[y:y+h, x:x+w]
#             plt.imshow(crop_img)
#             plt.show()

        if(np.sum(crop_img) == 0):
            print('face not detected')
            temp =[]
            temp.append(i)
            temp.append(0)
            temp.append(0)
            temp.append(0)
            arr.append(temp)
            continue;
        else:
            image = crop_img
            image = cv2.resize(image,(200,200))

            temp =[]
            roll = my_head_pose_estimator.return_roll(image)  # Evaluate the roll angle using a CNN
            pitch = my_head_pose_estimator.return_pitch(image)  # Evaluate the pitch angle using a CNN
            yaw = my_head_pose_estimator.return_yaw(image)  # Evaluate the yaw angle using a CNN

            temp.append(i)
            temp.append(roll[0,0,0])
            temp.append(pitch[0,0,0])
            temp.append(yaw[0,0,0])
                
            arr.append(temp)
        i += 1

    print(i, np.array(arr).shape)
    # np.save(outputfile +'_headpose.npy', np.array(arr))
    return np.array(arr)


In [5]:
def get_pupil_location(videofile, outputfile):

    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("lol.dat")

    (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
    (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]

    left_arr = []; right_arr = []

    i = 0; arr = []
    vid = cv2.VideoCapture(videofile)
    
    while(True):
        
        ret, frame = vid.read()
        if(ret == False):
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        out_face=np.zeros_like(frame)
        rects = detector(gray, 0)

        for rect in rects:
            shape = predictor(gray, rect)
            shape = face_utils.shape_to_np(shape)

            tl_x ,tl_y = shape[0,0], shape[19,1]
            br_x ,br_y = shape[16,0], shape[8,1]

            area = (br_y - tl_y)*(br_x - tl_x)

            (x, y, w, h) = face_utils.rect_to_bb(rect)
            cv2.rectangle(frame, (x-5, y-5), (br_x+5, br_y+5), (0, 255, 0), 2)

            nose_x, nose_y = shape[33,0], shape[33,1]
            
            leftEye = np.array(shape[lStart:lEnd])
            rightEye = np.array( shape[rStart:rEnd])
            #print(leftEye[0,0],(leftEye).shape)

            pt1_x=(np.sum(leftEye[1,0] + leftEye[2,0]))/2.0
            pt1_y=(np.sum(leftEye[1,1] + leftEye[2,1]))/2.0
            pt2_x=(np.sum(leftEye[5,0] + leftEye[4,0]))/2.0
            pt2_y=(np.sum(leftEye[5,1] + leftEye[4,1]))/2.0
            pt_x=int(np.sum(pt1_x + pt2_x)/2.0)
            pt_y=int(np.sum(pt1_y + pt2_y)/2.0)

            pt1r_x=(np.sum(rightEye[1,0] + rightEye[2,0]))/2.0
            pt1r_y=(np.sum(rightEye[1,1] + rightEye[2,1]))/2.0
            pt2r_x=(np.sum(rightEye[5,0] + rightEye[4,0]))/2.0
            pt2r_y=(np.sum(rightEye[5,1] + rightEye[4,1]))/2.0
            ptr_x=int(np.sum(pt1r_x + pt2r_x)/2.0)
            ptr_y=int(np.sum(pt1r_y + pt2r_y)/2.0)
            
            cv2.circle(frame,(int(pt_x), int(pt_y)),3,(255,255,255),4)
            cv2.circle(frame,(int(ptr_x), int(ptr_y)),3,(255,255,255),4)

#             plt.imshow(frame)
#             plt.show()
            
        
        temp =[]
        
        temp.append(i)
        temp.append(pt_x)
        temp.append(pt_y)
        temp.append(ptr_x)
        temp.append(ptr_y)
        temp.append(area)
        temp.append(nose_x)
        temp.append(nose_y)
        
        arr.append(temp)
        i += 1
        
    print(i, np.array(arr).shape)
    #np.save(outputfile +'_pupil.npy', np.array(arr))
    return np.array(arr)


In [6]:
ur = 14

directory = "/ssd_scratch/cvit/isha2/DGM_final2/dataset_samples_callibrated/user"+str(ur)+"/explicit_face_features_game/"
if not os.path.exists(directory):
    os.makedirs(directory)

for i in range(0,112):
    print(ur, i)
    video_name = "/ssd_scratch/cvit/isha2/DGM_final2/dataset_samples_callibrated/user"+str(ur)+"/driver_view_cropped/sample_" +str(i+1)+".avi"
    dest_folder = directory + "sample" +str(i+1)
    eyes_data(video_name, dest_folder)
    headpose = get_pose(video_name, dest_folder)
    pupil = get_pupil_location(video_name, dest_folder)
    
    if(headpose.shape[0]==0 or pupil.shape[0]==0):
        continue
    #print(headpose.shape, pupil.shape)
    headpose_pupil = np.concatenate((headpose, pupil[:,1:]), axis =1)
    print(headpose_pupil.shape)
    np.save(dest_folder +'_headpose_pupil.npy', np.array(headpose_pupil))
    
    

14 0


KeyboardInterrupt: 