# Gesture Recognition
To build a 3D Conv model that will be able to predict the 5 gestures correctly. 

### <font color='cyan'> Sections in this notebook: </font>
I. Prerequisites
    
    I.1. Importing all the necessary modules
    I.2. Shuffle the data
    
II. Custom Generator


    

# <font color='goldenrod'> I. Prerequisites </font>

### <font color='skyblue'>  I.1. Importing all the necessary modules </font> 

In [1]:
import numpy as np
import os
import cv2
from cv2 import imread, resize
import matplotlib.pyplot as plt
import random as rn
from keras import backend as K
import tensorflow as tf
import datetime
import os

$\Rightarrow$ <font color="asparagus"> We set the random seed so that the results don't vary drastically. </font> 

In [2]:
np.random.seed(30)
rn.seed(30)
tf.random.set_seed(30)

### <font color='skyblue'>  I.2. Shuffle the data </font> 

$\Rightarrow$ <font color="asparagus"> Read all the lines in the csv and randomly permute them. </font>

$\Rightarrow$ <font color="red "> TODO: REMOVE THIS COMMENT !! </font> <br>In this block, you read the folder names for training and validation. You also set the `batch_size` here. Note that you set the batch size in such a way that you are able to use the GPU in full capacity. You keep increasing the batch size until the machine throws an error.

In [3]:
train_path = r"D:\DDownloads\UpGrad\NeuralNetwork\CaseStudy\Project_data\train"
val_path = r"D:\DDownloads\UpGrad\NeuralNetwork\CaseStudy\Project_data\val"

In [4]:
trainCSV = r"D:\DDownloads\UpGrad\NeuralNetwork\CaseStudy\Project_data\train.csv"
valCSV = r"D:\DDownloads\UpGrad\NeuralNetwork\CaseStudy\Project_data\val.csv"

In [5]:
train_doc = np.random.permutation(open(trainCSV).readlines())
val_doc = np.random.permutation(open(valCSV).readlines())


In [6]:
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)

# training sequences = 663


In [7]:
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)

# validation sequences = 100


$\Rightarrow$ <font color="asparagus"> Set the batch size. </font>

In [8]:
batch_size = 75

$\Rightarrow$ <font color="red "> TODO: REMOVE THIS SECTION... Used for debugging cv2 </font>

In [9]:
img_name = os.listdir(train_path + "\\" + train_doc[0].split(';')[0])[0]

In [10]:
img_name

'WIN_20180926_16_54_08_Pro_00006.png'

In [11]:
x = imread(train_path + "\\" + train_doc[0].split(';')[0] + "\\" + img_name)

In [12]:
x.shape[0], x.shape[1]

(120, 160)

In [13]:
dim = (224, 224)

In [14]:
resized_img = resize(x, dim, interpolation = cv2.INTER_AREA)

In [15]:
cv2.imshow("img", resized_img)
cv2.waitKey(0)

-1

In [16]:
cv2.imshow("Orig", x)
cv2.waitKey(0)

-1

In [17]:
b ,g, r = cv2.split(resized_img)

$\Rightarrow$ <font color="red "> TODO: Up to this part</font>

# <font color='goldenrod'> II. Custom Generator </font>

$\Rightarrow$ <font color="asparagus"> Class Names/Labels:  </font> <br>
- Left to Right : 0 <br>
- Right to Left: 1<br>
- Stop: 2<br>
- Thumbs down: 3<br>
- Thums up: 4

In [18]:
batch_size = 75

In [19]:
class Generator:
    width = None 
    height = None 
    numChannels = 3
    
    source_path = None
    vectorList = None
    batch_size = None
    frameIdxList = None
    numFramesInVideo = None
    numVideso = None
    def __init__(self,
                 folder_list,
                 imgIdxList,
                 width=224,
                 height=224,
                 source_path=r"D:\DDownloads\UpGrad\NeuralNetwork\CaseStudy\Project_data\train",
                 batch_size=75):
        self.vectorList = np.random.permutation(folder_list) # Shuffle the data and store in a list
        print(self.vectorList)
        self.frameIdxList = imgIdxList
        self.numFramesInVideo = len(imgIdxList)
        self.numVideos = len(folder_list)
        self.source_path = source_path
        self.batch_size = batch_size
        self.width = width
        self.height = height
        self.numOfBatches = self.numVideos // self.batch_size
        
    # Loop through current batch size --> get one folder at a time -->
    # loop through each image in a folder --> preprocess --> One hot encode the label --> yield
    def __getBatchData(self, batch, curr_batch_size):
        batch_data = np.zeros((batch_size, self.numFramesInVideo, 
                               self.width, self.height, self.numChannels)) 
        # batch_labels is the one hot representation of the output
        batch_labels = np.zeros((batch_size, 5))
        for folderIdx in range(curr_batch_size):
             # Get vector/folder name
            ## Turn this on for debugging
            #print(folderIdx + (batch*batch_size))
            vectorName = self.vectorList[folderIdx + (batch*self.batch_size)].strip().split(';')[0]
            #print(vectorName)
            imgs = os.listdir(self.source_path+'/'+ vectorName)
            # Iterate iver the frames/images of a folder to read them in
            for idx,item in enumerate(self.frameIdxList):
                # Get the image in float32 
                image = imread(self.source_path+'/'+ vectorName +'/'+imgs[item]).astype(np.float32)
                # Resize
                resized_img = resize(image, (self.width, self.height), interpolation = cv2.INTER_AREA)
                # Normalize
                resized_img = resized_img / 255.0
                #crop the images ## TO DO, we are resizing for now
                channels = cv2.split(resized_img) # b g r
                batch_data[folderIdx,idx,:,:,0] = channels[0]
                batch_data[folderIdx,idx,:,:,1] = channels[1]
                batch_data[folderIdx,idx,:,:,2] = channels[2]
            # One hot encoding
            batch_labels[folderIdx, int(self.vectorList[folderIdx + (batch*batch_size)].strip().split(';')[2])] = 1
        return batch_data, batch_labels
    
    # Public method, call this to get generator object
    def generator(self):
        while True:
            for batch in range(self.numOfBatches):
                batch_data, batch_labels = self.__getBatchData(batch, self.batch_size)
                yield batch_data, batch_labels
            # For the remaining data points which are left after full batches
            batch += 1
            rem_batch_size = self.numVideos % self.batch_size
            batch_data, batch_labels = self.__getBatchData(batch, rem_batch_size)
            yield batch_data, batch_labels 

$\Rightarrow$ <font color="red "> TODO: REMOVE THIS COMMENT !! </font> <br> Note here that a video is represented above in the generator as (number of images, height, width, number of channels). Take this into consideration while creating the model architecture.

$\Rightarrow$ <font color="asparagus"> Some global constants

In [20]:
imgIdxList = list(range(0,30,2))
batch_size = 75
# (width, height) is the final size of the input images 
# numChannels = 3 (RGB)
width = 224
height = 224
numChannels = 3

$\Rightarrow$ <font color="asparagus"> For testing

In [21]:
batch_size = 3

In [22]:
train_doc[:4]

array(['WIN_20180926_16_54_08_Pro_Right_Swipe_new;Right_Swipe_new;1\n',
       'WIN_20180925_18_02_58_Pro_Thumbs_Down_new;Thumbs_Down_new;3\n',
       'WIN_20180925_17_33_08_Pro_Left_Swipe_new;Left_Swipe_new;0\n',
       'WIN_20180925_17_51_17_Pro_Thumbs_Up_new;Thumbs_Up_new;4\n'],
      dtype='<U88')

In [23]:
gen = Generator(folder_list=train_doc[:4], 
                      imgIdxList=imgIdxList, 
                      width=width, 
                      height=height, 
                      source_path=train_path, batch_size=batch_size)

['WIN_20180925_18_02_58_Pro_Thumbs_Down_new;Thumbs_Down_new;3\n'
 'WIN_20180925_17_51_17_Pro_Thumbs_Up_new;Thumbs_Up_new;4\n'
 'WIN_20180925_17_33_08_Pro_Left_Swipe_new;Left_Swipe_new;0\n'
 'WIN_20180926_16_54_08_Pro_Right_Swipe_new;Right_Swipe_new;1\n']


In [24]:
train_generator = gen.generator() # Create generator Class' instance

In [25]:
batch_data, batch_label =  next(train_generator)

In [26]:
batch_data.shape # VideoIdx, FrameIdxInVideo, width, height, numChannels

(3, 15, 224, 224, 3)

In [27]:
batch_data[0].shape

(15, 224, 224, 3)

In [28]:
batch_data[0][0].shape

(224, 224, 3)

$\Rightarrow$ <font color="asparagus"> Change the first index to 0, 1, 2, .., (batch_size -1) to view the image. </font>

In [29]:
cv2.imshow("First", batch_data[2][0])
cv2.waitKey(0)

-1

In [30]:
batch_data, batch_label =  next(train_generator)

In [31]:
cv2.imshow("First", batch_data[0][0])
cv2.waitKey(0)

-1

$\Rightarrow$ <font color="asparagus"> If you try to see batch_data[1][0], it should be all zeros, since batch size is 3, number of videos = 4. The second next(train_generator) statement will generate only one video, the other two will tensors will be all zeros because of: <br> batch_data = np.zeros((batch_size, self.numFramesInVideo, self.width, self.height, self.numChannels)) </font>

## Model
Here you make the model using different functionalities that Keras provides. Remember to use `Conv3D` and `MaxPooling3D` and not `Conv2D` and `Maxpooling2D` for a 3D convolution model. You would want to use `TimeDistributed` while building a Conv2D + RNN model. Also remember that the last layer is the softmax. Design the network in such a way that the model is able to give good accuracy on the least number of parameters so that it can fit in the memory of the webcam.

In [None]:
from keras.models import Sequential, Model
from keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from keras.layers.convolutional import Conv3D, MaxPooling3D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers

#write your model here

Now that you have written the model, the next step is to `compile` the model. When you print the `summary` of the model, you'll see the total number of parameters you have to train.

In [None]:
optimiser = #write your optimizer
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print (model.summary())

Let us create the `train_generator` and the `val_generator` which will be used in `.fit_generator`.

In [None]:
train_generator = generator(train_path, train_doc, batch_size)
val_generator = generator(val_path, val_doc, batch_size)

In [None]:
curr_dt_time = datetime.datetime.now()

In [None]:
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = # write the REducelronplateau code here
callbacks_list = [checkpoint, LR]

The `steps_per_epoch` and `validation_steps` are used by `fit_generator` to decide the number of next() calls it need to make.

In [None]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

Let us now fit the model. This will start training the model and with the help of the checkpoints, you'll be able to save the model at the end of each epoch.

In [None]:
num_epochs = 100
print ('# epochs =', num_epochs)

In [None]:
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)