Problem Statement
--------------------------------

Imagine you are working as a data scientist at a home electronics company which manufactures state of the art smart televisions. You want to develop a cool feature in the smart-TV that can recognise five different gestures performed by the user which will help users control the TV without using a remote.

The gestures are continuously monitored by the webcam mounted on the TV. Each gesture corresponds to a specific command:

Thumbs up:  Increase the volume
Thumbs down: Decrease the volume
Left swipe: 'Jump' backwards 10 seconds
Right swipe: 'Jump' forward 10 seconds  
Stop: Pause the movie
 

Each video is a sequence of 30 frames (or images). In the next couple of lectures, our subject matter expert Snehansu will walk you through the structure of the dataset.

In [80]:
# Importing the necessary libraries

import numpy as np
import os
from scipy.misc import imread, imresize
import datetime
import os
import warnings
warnings.filterwarnings("ignore")
from sys import getsizeof
import abc

# We set the random seed so that the results don't vary drastically.

In [81]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
import tensorflow as tf
tf.random.set_seed(30)

In [82]:
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

In [83]:
train_doc = np.random.permutation(open('/home/chaitanya/Gesture_Recognition/Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('/home/chaitanya//Gesture_Recognition/Project_data/val.csv').readlines())
batch_size = 10

In [84]:
#Proj_folder = '/home/chaitanya/Gesture_Recognition/Project_data/'

In [85]:
from keras.models import Sequential, Model
from keras.layers import Dense, GRU, Flatten, TimeDistributed, BatchNormalization, Activation
from keras.layers.convolutional import Conv3D, MaxPooling3D, MaxPooling2D, Conv2D
from keras.layers.recurrent import LSTM
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras import optimizers
from keras.layers import Dropout

In [86]:
Proj_folder = '/home/chaitanya/Gesture_Recognition/Project_data/'

In [87]:
# We need to plot the training and validation plot accuracies.
# Need to mention the losses

def plot(history):
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15,5))
    axes[0].plot(history.history['loss'])
    axes[0].plot(history.history['vald_loss'])
    axes[0].plot(history.history['loss', 'vald_loss'])
    
    axes[1].plot(history.history['categorical_accuracy'])
    axes[1].plot(history.history['vald_categorical_accuracy'])
    axes[1].plot(history.history['categorical_accuracy', 'vald_categorical_accuracy'])

GENERATOR:

The generator should be able to take a batch of videos as input without any error. Steps like cropping, resizing and normalization should be performed successfully. Here we are going to pre-process the images with different dimensions, to create video frames

In [95]:
#ef generator(source_path, folder_list, batch_size):
 #   print( 'Source path = ', source_path, '; batch size =', batch_size)
  #  img_idx = #create a list of image numbers you want to use for a particular video
   # while True:
    #    t = np.random.permutation(folder_list)
     #   num_batches = # calculate the number of batches
      #  for batch in range(num_batches): # we iterate over the number of batches
       #     batch_data = np.zeros((batch_size,x,y,z,3)) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
        #    batch_labels = np.zeros((batch_size,5)) # batch_labels is the one hot representation of the output
         #   for folder in range(batch_size): # iterate over the batch_size
          #      imgs = os.listdir(source_path+'/'+ t[folder + (batch*batch_size)].split(';')[0]) # read all the images in the folder
           #     for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
            #        image = imread(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
             #       
              #      #crop the images and resize them. Note that the images are of 2 different shape 
               #     #and the conv3D will throw error if the inputs in a batch have different shapes
                    
                #    batch_data[folder,idx,:,:,0] = #normalise and feed in the image
                 #   batch_data[folder,idx,:,:,1] = #normalise and feed in the image
                  #  batch_data[folder,idx,:,:,2] = #normalise and feed in the image
                    
                #batch_labels[folder, int(t[folder + (batch*batch_size)].strip().split(';')[2])] = 1
            #yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do

        


class ModelBuilder(metaclass=abc.ABCMeta):
    def initialize_path(self, proj_folder):
        self.train_doc = np.random.permutation(open(proj_folder + '/' + 'train.csv').readlines())
        self.train_doc = np.random.permutation(open(proj_folder + '/' + 'val.csv').readlines())
        self.train_path = proj_folder + '/' + 'train'
        self.val_path = proj_folder + '/' + 'val'
        self.num_train_sequences = len(self.train_doc)
        self.num_val_sequences = len(self.val_doc)
        
# Initializing image properties
    def initialize_image_properties(self,image_height=100, image_width=100):
        self.image_height=image_height
        self.image_width=image_width
        self.channels=3
        self.num_channels=5
        self.total_frames=30
        
# Initialising the frames  tp  sample ,batchsize & the generator function, epochs
    

    def initialize_hyperparams(self, frames_to_sample=30, batch_size=20, num_epochs=10):
        self.frames_to_sample=frames_to_sample
        self.batch_size=batch_size
        self.num_epochs=num_epochs
        
    def generator(self,source_path, folder_list, augment=False):
        img_idx = np.round(np.linspace(0,self.total_frames-1,self.frames_to_sample)).astype(int)
        batch_size=self.batch_size
        while True:
            x = np.random.permutation(folder_list)
            num_batches = len(t)//batch_size
            for batch in range(num_batches):
                batch_data, batch_labels=self.one_batch_data(source_path,t,batch,batch_size,img_idx,augment)
                yield batch_data, batch_labels 

            remaining_seq=len(t)%batch_size
        
            if (remaining_seq != 0):
                batch_data, batch_labels= self.one_batch_data(source_path,t,num_batches,batch_size,img_idx,augment,remaining_seq)
                yield batch_data, batch_labels 
    
    
    def one_batch_data(self,source_path,t,batch,batch_size,img_idx,augment,remaining_seq=0):
    
        seq_len = remaining_seq if remaining_seq else batch_size
    
        batch_data = np.zeros((seq_len,len(img_idx),self.image_height,self.image_width,self.channels)) 
        batch_labels = np.zeros((seq_len,self.num_classes)) 
    
        if (augment): batch_data_aug = np.zeros((seq_len,len(img_idx),self.image_height,self.image_width,self.channels))

        
        for folder in range(seq_len): 
            imgs = os.listdir(source_path+'/'+ t[folder + (batch*batch_size)].split(';')[0]) 
            for idx,item in enumerate(img_idx):
                
                image = imread(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                image_resized=imresize(image,(self.image_height,self.image_width,3))
            
                # Normalizing images
                
                batch_data[folder,idx,:,:,0] = (image_resized[:,:,0])/255
                batch_data[folder,idx,:,:,1] = (image_resized[:,:,1])/255
                batch_data[folder,idx,:,:,2] = (image_resized[:,:,2])/255
            
                if (augment):
                    shifted = cv2.warpAffine(image, 
                                             np.float32([[1, 0, np.random.randint(-30,30)],[0, 1, np.random.randint(-30,30)]]), 
                                            (image.shape[1], image.shape[0]))
                    
                    gray = cv2.cvtColor(shifted,cv2.COLOR_BGR2GRAY)

                    x0, y0 = np.argwhere(gray > 0).min(axis=0)
                    x1, y1 = np.argwhere(gray > 0).max(axis=0) 

                #cropping of images
                
                    cropped=shifted[x0:x1,y0:y1,:]
                    
                    image_resized=imresize(cropped,(self.image_height,self.image_width,3))
                                #      (image_resized.shape[1], image_resized.shape[0])
            
                    batch_data_aug[folder,idx,:,:,0] = (image_resized[:,:,0])/255
                    batch_data_aug[folder,idx,:,:,1] = (image_resized[:,:,1])/255
                    batch_data_aug[folder,idx,:,:,2] = (image_resized[:,:,2])/255
                
            
            batch_labels[folder, int(t[folder + (batch*batch_size)].strip().split(';')[2])] = 1
            
    
        if (augment):
            batch_data=np.concatenate([batch_data,batch_data_aug])
            batch_labels=np.concatenate([batch_labels,batch_labels])

        
        return(batch_data,batch_labels)
    
    
    def train_model(self, model, augment_data=False):
        train_generator = self.generator(self.train_path, self.train_doc,augment=augment_data)
        val_generator = self.generator(self.val_path, self.val_doc)

        model_name = 'model_init' + '_' + str(datetime.datetime.now()).replace(' ','').replace(':','_') + '/'
    
        if not os.path.exists(model_name):
            #os.mkdr(model_name)
             os.mkdir(model_name)
        filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

        checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
        LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2, verbose=1, patience=4)
        
        earlystop = EarlyStopping( monitor="val_loss", min_delta=0,patience=10,verbose=1)
        callbacks_list = [checkpoint, LR, earlystop]

        if (self.num_train_sequences%self.batch_size) == 0:
            steps_per_epoch = int(self.num_train_sequences/self.batch_size)
        else:
            steps_per_epoch = (self.num_train_sequences//self.batch_size) + 1

        if (self.num_val_sequences%self.batch_size) == 0:
            validation_steps = int(self.num_val_sequences/self.batch_size)
        else:
            validation_steps = (self.num_val_sequences//self.batch_size) + 1
    
        history=model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=self.num_epochs, verbose=1, 
                            callbacks=callbacks_list, validation_data=val_generator, 
                            validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)
        return history

        
    @abc.abstractmethod
    def define_model(self):
        pass


MODEL:

Here you make the model using different functionalities that Keras provides. Remember to use Conv3D and MaxPooling3D and not Conv2D and Maxpooling2D for a 3D convolution model. You would want to use TimeDistributed while building a Conv2D + RNN model. Also remember that the last layer is the softmax. Design the network in such a way that the model is able to give good accuracy on the least number of parameters so that it can fit in the memory of the webcam.

In [96]:
from keras.models import Sequential, Model
from keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from keras.layers.convolutional import Conv3D, MaxPooling3D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers

In [97]:
class ModelConv3D1(ModelBuilder):
    #def define_model()
    
    #class ModelConv3D1(ModelBuilder):
    def define_model(self):
        model = Sequential()
        model.add(Conv3D(16, (3, 3, 3), Padding = 'same',
                         input_shape=(self.frames_to_sample,self.image_height, self_image_width, self.channels)))
        
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))
        
        model.add(Conv3D(32, (2, 2, 2), Padding = 'same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))
        model.add(Conv3D(64, (2, 2, 2), Padding = 'same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))
        
        model.add(Conv3D(128, (2, 2, 2), Padding = 'same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))
        
        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        
        model.add(Dense(64, activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.25))
        
        model.add(Dense(self.num_classes, activation='sotmax'))
        
        optimiser = optimizers.Adam()
        model.compile(optimizer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
        return model    

In [98]:
conv_3d1=ModelConv3D1()
conv_3d1.initialize_path(Proj_folder)
conv_3d1.initialize_image_properties(image_height=160,image_width=160)
conv_3d1.initialize_hyperparams(frames_to_sample=16,batch_size=8,num_epochs=1)
conv_3d1_model=conv_3d1.define_model()
conv_3d1_model.summary()

AttributeError: 'ModelConv3D1' object has no attribute 'val_doc'

In [None]:
#sample cropping

test_generator=ModelConv3D1()
test_generator.initialize_path(Proj_folder)
test_generator.initialize_image_properties(image_height= 150, image_width = 150)
test_generator.initialize_hyperparams(frame_to_sample=16,batch_size=3,num_epochs=1)
x=test_generator.generator(test_generator.val_path,test_generator.val_doc,augment=True)
batch_data, batch_labels=next(x)
fig, axes = plt.subplots(nrows=1, ncols=2)
axes[0].imshow(batch_data[0, 15,:, :, :])
axes[1].imshow(batch_data[0, 15,:, :, :])
plt.show()


In [None]:
conv_3d1.train_model(conv_3d1_model)

In [None]:

test_generator=ModelConv3D1()
test_generator.initialize_path(Proj_folder)
test_generator.initialize_image_properties(image_height= 150, image_width = 150)
test_generator.initialize_hyperparams(frame_to_sample=16,batch_size=4,num_epochs=3)

conv_3d1_model=conv_3d1.define_model()
print("total params:", conv_3d1_model.count_params())
conv_3d1.train_model(conv_3d1_model)

In [None]:
test_generator=ModelConv3D1()
test_generator.initialize_path(Proj_folder)
test_generator.initialize_image_properties(image_height= 100, image_width = 100)
test_generator.initialize_hyperparams(frame_to_sample=16,batch_size=8,num_epochs=3)

conv_3d1_model=conv_3d1.define_model()
print("total params:", conv_3d1_model.count_params())
conv_3d1.train_model(conv_3d1_model)

In [None]:
test_generator=ModelConv3D1()
test_generator.initialize_path(Proj_folder)
test_generator.initialize_image_properties(image_height= 160, image_width = 160)
test_generator.initialize_hyperparams(frame_to_sample=16,batch_size=16,num_epochs=3)

conv_3d1_model=conv_3d1.define_model()
print("total params:", conv_3d1_model.count_params())
conv_3d1.train_model(conv_3d1_model)

In [None]:
test_generator=ModelConv3D1()
test_generator.initialize_path(Proj_folder)
test_generator.initialize_image_properties(image_height= 160, image_width = 160)
test_generator.initialize_hyperparams(frame_to_sample=16,batch_size=32,num_epochs=3)

conv_3d1_model=conv_3d1.define_model()
print("total params:", conv_3d1_model.count_params())
conv_3d1.train_model(conv_3d1_model)

# As per my point of view, Based on the experiments "Image resolution and number of frames in sequence" have more impact on training time

# My point of view is to chane the resolution for each and every experiment to anylyze the model performance.