## DGAZE: Driver Gaze Mapping on the Road

In [1]:
import os, sys
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input, Merge, Dropout
from keras.layers import BatchNormalization
from keras.optimizers import SGD, Adam, Adamax
from keras.models import model_from_yaml
from keras.regularizers import l1, l2
from load_dataset import get_data, dataset
from utils import print_metadata, get_dgaze_frames_count, split_data, plot_gaze_points, save_model, load_model

from sklearn import preprocessing
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import pandas as pd
import copy
import cv2 

from numpy.random import seed
seed(1)

from tensorflow import set_random_seed
set_random_seed(2)

import random 
random.seed(3)

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

# from keras import backend as k
# import tensorflow as tf
# config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1,
# allow_soft_placement=True, device_count = {'CPU': 1})
# sess = tf.Session(graph=tf.get_default_graph(),config=config)
# k.set_session(sess)

import warnings
warnings.filterwarnings("ignore")

Using TensorFlow backend.


### Load Dataset

In [3]:
data_path = '/ssd_scratch/cvit/isha/eye_gaze_mapping/DGM_final2/dataset_samples_callibrated/'
model_save = '/ssd_scratch/cvit/isha/DGAZE_pretrained_weights/weights_IDGAZE_proposed_approach2'
drivers = os.listdir(data_path)
ndrivers = len(drivers)
frames_per_seq = 50
sequences = 112
batch_size = 8
learning_rate = 0.001
nepochs = 300

# Driver_data is dict contatining drivers user1, user 2.....etc. For each driver, we have 112 sequences and for   
# each sequence we have features like ['face_location', 'headpose_pupil', 'left_eye', 'gaze_point', 'right_eye'] 
driver_data = get_data(data_path, drivers, sequences, frames_per_seq)

# Print the total numer of frames in the dataset
get_dgaze_frames_count(driver_data, drivers)

# Prints the DGAZE Metadata including list of drivers, sequences and features 
#print_metadata(driver_data, ['drivers', 'sequences', 'features'])
print_metadata(driver_data, ['features'])



100%|██████████| 20/20 [00:13<00:00,  1.17it/s]


Total frames in DGAZE dataset is 227178
List of Features: ['left_eye', 'headpose_pupil', 'face_location', 'gaze_point', 'right_eye']






### Dataset split

In [None]:
seq_range = np.arange(10, sequences+1)
nsequences = len(seq_range)
ndrivers = len(drivers)

dsplit = [int(0.8*ndrivers),int(0.1*ndrivers), int(0.1*ndrivers)]
gp_split = [int(0.7*nsequences),int(0.15*nsequences), int(0.15*nsequences)]
data_split = split_data(drivers, seq_range, dsplit, gp_split)

In [None]:
# Training dataset
train = dataset(driver_data, data_split['drivers_train'], data_split['sequence_train'])

# Validation dataset
val = dataset(driver_data, data_split['drivers_val'], data_split['sequence_val'])

# Test dataset
test = dataset(driver_data, data_split['drivers_test'], data_split['sequence_test'])

In [None]:
print(train['left_eye'].shape, train['right_eye'].shape, train['headpose_pupil'].shape, \
      train['face_location'].shape, train['face_features'].shape, train['gaze_point'].shape)

print(val['left_eye'].shape, val['right_eye'].shape, val['headpose_pupil'].shape, \
      val['face_location'].shape, val['face_features'].shape, val['gaze_point'].shape)

print(test['left_eye'].shape, test['right_eye'].shape, test['headpose_pupil'].shape, \
      test['face_location'].shape, test['face_features'].shape, test['gaze_point'].shape)

print("Total number of frames -->",train['gaze_point'].shape[0] + val['gaze_point'].shape[0]\
      + test['gaze_point'].shape[0])

In [None]:
train['left_eye'].max()

## Plot gaze point distribution on road image

In [None]:
plot_gaze_points(data_path, train['gaze_point'])
plot_gaze_points(data_path, val['gaze_point'])
plot_gaze_points(data_path, test['gaze_point'])


## Normalize face features

In [None]:
for d in data_split['drivers_val']:
    data_calibrate = dataset(driver_data, [d], np.arange(12,13))
    x = data_calibrate['face_location']
    y = data_calibrate['headpose_pupil']

    cap = cv2.VideoCapture(data_path + d + '/driver_view/sample_10.avi')
    ret, frame = cap.read()
    plt.figure()
    cv2.rectangle(frame, (x[0,2], x[0,0]), (x[0,3], x[0,1]), (255, 255, 255), 6)
    cv2.circle(frame,(int(y[0,6]), int(y[0,7])),3,(255,255,0),40)
    cv2.circle(frame,(int(y[0,4]), int(y[0,5])),3,(255,255,0),40)
    cv2.circle(frame,(int(y[0,9]), int(y[0,10])),3,(255,255,0),40)
    plt.imshow(frame)
    plt.show()

In [None]:
# scaler = preprocessing.MinMaxScaler()
# train['face_features'] = scaler.fit_transform(train['face_features'])
# val['face_features'] = scaler.transform(val['face_features'])
# test['face_features'] = scaler.transform(test['face_features'])


## I-DGAZE: Predicting driver gaze on road

In [None]:
model_lefteye = Sequential()
model_lefteye.add(Conv2D(20, kernel_size=(3, 3),activation='relu',input_shape=(36,60,3)))
model_lefteye.add(MaxPooling2D(pool_size=(2, 2)))
model_lefteye.add(Dropout(0.5))
model_lefteye.add(Conv2D(50, (3, 3), activation='relu',kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model_lefteye.add(MaxPooling2D(pool_size=(2, 2)))
model_lefteye.add(Flatten())

model_facefeatures = Sequential()
model_facefeatures.add(Dense(16, activation ='relu', input_dim=(14)))

model_merge = Sequential()
model_merge.add(Merge([model_lefteye, model_facefeatures], mode = 'concat'))

model_merge.add(Dense(512, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model_merge.add(Dense(2))
print(model_merge.summary())

In [None]:
opt = Adam(lr = 0.001)
model_merge.compile(loss = 'mae', optimizer = opt )
earlystopping = keras.callbacks.EarlyStopping(monitor = 'val_loss',min_delta = 1, patience =3, verbose =0, mode ='auto')

history = model_merge.fit([train['left_eye'], train['face_features']], train['gaze_point'][:,:2], \
                validation_data= ([val['left_eye'], val['face_features']],val['gaze_point'][:,:2]),
                epochs = nepochs, batch_size = 32, callbacks=[earlystopping], verbose=1, shuffle= True)

save_model(model_save, model_merge)

print(history.history.keys())

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.show()

## Quantitative Results

In [None]:
def gaze_error(model, data):
    scores = model.evaluate([data['left_eye'], data['face_features']], data['gaze_point'][:,:2])
    return scores


###  ==> Before Calibration

In [None]:
train_error = gaze_error(model_merge, train)
val_error = gaze_error(model_merge, val)
test_error = gaze_error(model_merge, test)
    
print("Train Error ==> ", train_error)
print("Val Error ==> ",  val_error)
print("Test Error ==> " ,test_error)

### ==> After Calibration

In [None]:
# dataset for calibration
test_calibrate = dataset(driver_data, data_split['drivers_test'], np.arange(1,10))
#test_calibrate['face_features'] = scaler.transform(test_calibrate['face_features'])

print(test_calibrate['left_eye'].shape, test_calibrate['right_eye'].shape, test_calibrate['headpose_pupil'].shape, \
      test_calibrate['face_location'].shape, test_calibrate['face_features'].shape, test_calibrate['gaze_point'].shape)

In [None]:
opt = Adam(lr=0.001)
model_merge = load_model(model_save)
model_merge.compile(loss = 'mae', optimizer = opt)

model_merge.fit([test_calibrate['left_eye'], test_calibrate['face_features']], test_calibrate['gaze_point'][:,:2], \
                validation_data= ([test['left_eye'], test['face_features']],test['gaze_point'][:,:2]),
                epochs = 300, batch_size = batch_size, callbacks=[earlystopping], verbose=1, shuffle= True)


In [None]:
test_error = gaze_error(model_merge, test)
print("Test Error ==> " ,test_error)

## Qualitative Results

In [None]:
from copy import copy 

def plot_data(driver_path, driver_data, model_save, driver, seq, n):
        
    video = driver_path + "/" + driver + "/original_road_view/sample_" + str(seq) + ".avi"
    driver_video = driver_path + "/" + driver + "/driver_view/sample_" + str(seq)+".avi"
        
    cap1 = cv2.VideoCapture(video) # road video
    cap2 = cv2.VideoCapture(driver_video) # driver video

    data = dataset(driver_data, [driver], np.arange(seq, seq+1))
    data['face_features'] = scaler.transform(data['face_features'])
    
    opt = Adam(lr=0.0001)
    model = load_model(model_save)
    model.compile(loss='mae', optimizer=opt)
    
    if os.path.exists(video) and os.path.exists(driver_video):
        
        # Before Calibration
        lefteye = data['left_eye']
        righteye = data['right_eye']
        face_features = data['face_features']
        gaze_point = data['gaze_point']

        pred = model.predict([lefteye, face_features]).astype(int)

        
        # After Calibration
        data_calibrate = dataset(driver_data, [driver], np.arange(1,10))
        data_calibrate['face_features'] = scaler.transform(data_calibrate['face_features'])

        model = load_model(model_save)
        model.compile(loss='mae', optimizer=opt)
    
        history = model.fit([data_calibrate['left_eye'], data_calibrate['face_features']],
                                 data_calibrate['gaze_point'][:,:2], epochs=20, batch_size=batch_size, 
                                 verbose=1, shuffle= True)

        pred_calibrate = model.predict([data['left_eye'], data['face_features']]).astype(int)
        
        
        # Plot output
        for i in range(int(n/2)):
            ret, road_frame = cap1.read()
            ret1, driver_frame = cap2.read()

        frame1 = driver_frame
        frame2 = copy(road_frame)
        frame2 = cv2.circle(frame2, (data['gaze_point'][int(n/2),0], data['gaze_point'][int(n/2),1]), \
                            70, (0,255,0), -1)
        frame3 = copy(road_frame)
        frame3 = cv2.circle(frame3, (data['gaze_point'][int(n/2),0], data['gaze_point'][int(n/2),1]), \
                            70, (0,255,0), -1)
        frame3 = cv2.circle(frame3, (pred[int(n/2),0], pred[int(n/2),1]), \
                            70, (0, 0,255), -1)
        
        frame4 = copy(road_frame)
        for j in range(0, n):
            frame4 = cv2.circle(frame4,(data['gaze_point'][j,0], data['gaze_point'][j,1]), 70, (0,255,0), -1 )
            frame4 = cv2.circle(frame4,(pred[j,0], pred[j,1]), 70, (0,0,255), -1 )

        frame5 = copy(road_frame)
        for j in range(0, n):
            frame5 = cv2.circle(frame5,(data['gaze_point'][j,0], data['gaze_point'][j,1]), 70, (0,255,0), -1 )
            frame5 = cv2.circle(frame5,(pred_calibrate[j,0], pred_calibrate[j,1]), 70, (0,0,255), -1 )
 

        frame_array =  np.concatenate((frame1, frame2, frame3, frame4, frame5), axis =1)
        frame_array = cv2.resize(frame_array, (int(frame_array.shape[1]/4), int(frame_array.shape[0]/4)))

        plt.figure(figsize=(20,10))
        plt.axis('off')
        plt.imshow(frame_array)
        plt.show()

        return frame_array
    

  

In [None]:
users = data_split['drivers_test'] + data_split['drivers_val'] 
k =0
for j in range(10,100,10):
    k +=1
  #  image_name = 'qualitative_results'+str(k)+'.png'
    for i in range(len(users)):
        frame_array = plot_data(data_path, driver_data, model_save, users[i], j+i, 50)

#         print(frame_array.shape)
#         if(i ==0):
#             frame_array2 = frame_array
#         else:
#             frame_array2 = np.concatenate((frame_array2, frame_array), axis =0)
#    # cv2.imwrite(image_name, frame_array2)

In [None]:
data_path

In [None]:
data_split['drivers_test'] 