In [None]:
from keras.models import Sequential, Model
from keras.layers.core import Dense, Lambda, Flatten, Reshape, Dropout
from keras.layers.convolutional import Conv2D, UpSampling2D,Cropping2D,ZeroPadding2D
from keras.layers.pooling import MaxPooling2D, AveragePooling2D, MaxPool2D
from keras.layers.merge import Concatenate, Add
from keras.layers import Input
from keras import optimizers
from keras import losses
from keras import metrics
from keras.layers.normalization import BatchNormalization
from keras.layers import Activation
from keras.layers import TimeDistributed
from keras.layers.pooling import GlobalAveragePooling2D
from keras.layers import LSTM
from keras import regularizers
import keras.backend as K
import keras

from keras.applications.vgg19 import VGG19
from keras.applications.vgg19 import preprocess_input

from keras.utils import plot_model
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, Callback
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf

from pathlib import Path
import numpy as np
import h5py
import cv2
import datetime
import time
from sklearn.metrics import confusion_matrix
from sklearn.metrics import hamming_loss



In [None]:
class LoggingTensorBoard(TensorBoard):    

    def __init__(self, log_dir, settings_str_to_log, **kwargs):
        super(LoggingTensorBoard, self).__init__(log_dir, **kwargs)

        self.settings_str = settings_str_to_log

    def on_train_begin(self, logs=None):
        TensorBoard.on_train_begin(self, logs=logs)

In [None]:
HDF5_PATH = "./Data_10sec.hdf5"

hdf = h5py.File(HDF5_PATH, "r")
pframes = hdf[ "Validation_Set/Total_Video"]
plt.imshow(pframes[44], interpolation='nearest')
plt.show()

In [None]:
#to get the total samples in each set
train_len = len(hdf["Training_Set/Total_Gaze"])
test_len = len(hdf["Testing_Set/Total_Gaze"])
val_len = len(hdf["Validation_Set/Total_Gaze"])


In [None]:
def myGenerator(set_name,batch_size,timesteps):
    
    hdf = h5py.File(HDF5_PATH, "r")
    
    pgaze = hdf[set_name +"/Total_Gaze"][:,:,1:3]
    pframes = hdf[set_name +"/Total_Video"]
    pLabels = hdf[set_name +"/Total_Label"][:,1:10]

    len_train = pgaze.shape[0]
    randomBatchOrder = list(range(len_train-timesteps))
    while True:
        #np.random.shuffle(randomBatchOrder) 
        for i in range(0, (len_train // (batch_size))-1):
            gaze = []
            frames = []
            labels = []
            for j in range (batch_size): # this loop fetches batch_size number of samples, each sample having timestep samples
                idx = randomBatchOrder[i*batch_size+j]
                
                g = pgaze[idx:idx+timesteps]
                gaze.append(g)
                f = pframes[idx:idx+timesteps]
                frames.append(f)
                l = pLabels[idx+timesteps]
                labels.append(l)

            yield [np.array(gaze).reshape(batch_size, -1, 2,1), np.array(frames)], np.array(labels)
            

In [None]:
#hyperparameters
timesteps = 60//5
batch_size = 15
epochs = 150
NUM_CLASSES =9
g = myGenerator('Training_Set', batch_size, timesteps)

In [None]:
x,y  = next(g)
i = 3
for idx in range(timesteps):
    img = x[1][i][idx]
    plt.imshow(img)
    gaze = x[0][idx]
    plt.scatter(gaze[:,0]*img.shape[1], gaze[:,1]*img.shape[0], c="r")
    plt.show()

In [None]:
gazeShape = x[0].shape
imgShape = x[1].shape

print(y.shape,gazeShape,imgShape)

# Model

In [None]:
with tf.device('/device:GPU:0'):
    config = tf.ConfigProto(log_device_placement = True, allow_soft_placement = True)
    config.gpu_options.allow_growth=True
    config.gpu_options.per_process_gpu_memory_fraction = 0.7
    config.gpu_options.allocator_type = 'BFC'
    
    with tf.Session(config=config):
        tf.get_default_graph()
        
        # Broadcast progress to the tensorboard.
        readable_timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
        file_name = "Team3_" + readable_timestamp
        print(file_name)
        tensorboardFolder = "/srv/share/tensorboardfiles/" + file_name
        
        
        optimizer=optimizers.Adam(lr = 0.0001)
                
        cnn_base = VGG19(input_shape=(imgShape[2], imgShape[3], imgShape[4]), weights="imagenet", include_top=False)
        cnn_out = GlobalAveragePooling2D()(cnn_base.output)
        cnn = Model(inputs=cnn_base.input, outputs=cnn_out) #VGG19 pretrained model
        for layer in cnn.layers: 
            layer.trainable = False   
            
        scene_input = Input(shape=(timesteps,imgShape[2], imgShape[3], imgShape[4]), name='scene_input')
        scene_frames = TimeDistributed(cnn)(scene_input) #Timedistributed wrapper around VGG19 model
        scene_flatten_TD = TimeDistributed(Flatten())(scene_frames)
        scene_flatten = LSTM(50, dropout=0.7, recurrent_dropout=0.7)(scene_flatten_TD)
        
        gaze_input = Input(shape = (gazeShape[1], gazeShape[2],gazeShape[3]),name = "gaze_input") #reshaped gaze input
        gaze_Conv2D_1 = Conv2D(filters = 64, kernel_size = (4,1), activation ='relu',
                               kernel_initializer= 'glorot_normal')(gaze_input)
        gaze_Maxpool = MaxPool2D(pool_size=(10,1))(gaze_Conv2D_1)
       
        gaze_Conv2D_2 = Conv2D(filters = 32, kernel_size = (4,1), activation ='relu',
                               kernel_initializer= 'glorot_normal')(gaze_Maxpool)
        gaze_Maxpool2 = MaxPool2D(pool_size=(10,1))(gaze_Conv2D_2)
        
        gaze_flatten = Flatten()(gaze_Maxpool2)
        Merge = Concatenate()([gaze_flatten, scene_flatten]) # concatenate intermediate outputs from gaze and scene model

        Dense1 = Dense(256,activation='relu',kernel_regularizer=regularizers.l2(0.01))(Merge)
        Dense1 = Dropout(0.3)(Dense1)
        Dense2 = Dense(128,activation='relu',kernel_regularizer=regularizers.l2(0.01))(Dense1)
        Dense2 = Dropout(0.3)(Dense2)
        Out = Dense(NUM_CLASSES, activation='sigmoid')(Dense2) # final output layer
            
            # Multi-task loss
        def multitask_loss(y_true, y_pred):
            # Avoid divide by 0
            y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
            return K.mean(K.sum(- y_true * K.log(y_pred) - (1 - y_true) * K.log(1 - y_pred), axis=1))
        
       
        Combined_model = Model(inputs=[gaze_input, scene_input], outputs=[Out])
        Combined_model.compile(optimizer=optimizer, loss=multitask_loss, metrics = [multitask_loss, "acc"])
        Combined_model.summary()
        
        callbacks = [
            ModelCheckpoint(str(Path.home()) + "/models/"+ file_name + ".{epoch:04d}-{val_loss:.2f}.h5",
                            monitor='val_acc',
                            verbose=0,
                            save_best_only=True,
                            save_weights_only=False
                           ),
            LoggingTensorBoard(settings_str_to_log=config,
                               log_dir=tensorboardFolder,
                               histogram_freq=0,
                               write_graph=True,
                               write_images=True,
                               update_freq='epoch'
                              ),
            # Learning Rate
            ReduceLROnPlateau(monitor='val_loss',
                              patience=10,
                              verbose=1,
                              factor=0.9,
                              min_lr=0.00001),
        ]

        history = Combined_model.fit_generator(myGenerator("Training_Set",batch_size,timesteps),
                                steps_per_epoch=train_len // batch_size,
                                epochs=epochs,
                                verbose=1,
                                validation_data= myGenerator("Testing_Set",batch_size,timesteps),
                                validation_steps=test_len // batch_size,
                                callbacks = callbacks)
        
        result = Combined_model.predict_generator(myGenerator("Validation_Set",batch_size,timesteps),
                                                steps=val_len // batch_size)

# Results

In [None]:
#Ground truth labels of validation set
val_label = hdf["Validation_Set/Total_Label"][4:2974,1:10]
print(val_label.shape)

#Convert the probabilities to predicted label vectors
result[result>=0.5] =1
result[result<0.5] =0
print(hamming_loss(val_label,result))


In [None]:
#summarize history for loss
plt.plot(history.history['multitask_loss'])
plt.plot(history.history['val_multitask_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')

# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')

In [None]:
#Confusion matrix of each activity
for activity in range(10):
    print("Activity ",activity+1,confusion_matrix(val_label[:,activity], result[:,activity]))
    

In [None]:
#Confusion matrix in percentage
for val in range(10):
    print("Activity", val+1) 
    confusion_mat = confusion_matrix(val_label[:,val], result[:,val])
    mat_toarray = np.asarray(confusion_mat)
    array_sum = np.sum(mat_toarray)
    print((mat_toarray/array_sum)*100)