# Train a model with sampel data #
This code is based on the code here: https://github.com/anasmorahhib/3D-CNN-Gesture-recognition/blob/master/main.ipynb

In [1]:
#################### Imports ####################

import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
from datetime import datetime

import os
import math
import pandas as pd
import matplotlib.image as img
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler


### Utility Functions ###

In [2]:
# Free the RAM
def release_list(a):
   del a[:]
   del a


In [3]:
# Read the metadata file, which is a csv with 2 columns.
# The first column being the id of the video, and the second
# column being the label of the video

def read_metadata(file_path):
    data = pd.read_csv(file_path, header = None, sep = ";")
    data = data.set_index(0)[1].to_dict()
    return data


In [4]:
# Make sure all vidoes have the same number of frames
def get_unify_frames(video_path, hm_frames = 30):
    offset = 0
    # pick frames
    frames = os.listdir(video_path)
    frames_count = len(frames)
    # unify number of frames 
    if hm_frames > frames_count:
        # duplicate last frame if video is shorter than necessary
        frames += [frames[-1]] * (hm_frames - frames_count)
    elif hm_frames < frames_count:
        # If there are more frames, then sample starting offset
        #diff = (frames_count - hm_frames)
        #offset = diff-1 
        frames = frames[0:hm_frames]
    return frames  


In [5]:
# Resize frames
def resize_frame(frame):
    frame = img.imread(frame)
    frame = cv2.resize(frame, (64, 64))
    return frame


In [6]:
# Normalize the data

def normalize(data):
    print('old mean', data.mean())
    scaler = StandardScaler()
    scaled_images  = scaler.fit_transform(data.reshape(-1, 15*64*64))
    print('new mean', scaled_images.mean())
    scaled_images  = scaled_images.reshape(-1, 15, 64, 64, 1)
    print(scaled_images.shape)
    return scaled_images


In [7]:
def load_video_frames(parent_dir_for_videos, labels, metadata, show_img = False):
    dirs = os.listdir(parent_dir_for_videos)

    # Adjust training data
    training_targets = [] # training targets 
    new_frames = [] # training data after resize & unify
    for directory in dirs:
        new_frame = [] # one training
        # Frames in each folder
        frames = get_unify_frames(os.path.join(parent_dir_for_videos, directory))
        for frame in frames:
            frame = resize_frame(os.path.join(parent_dir_for_videos, directory, frame))
            new_frame.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
            if len(new_frame) == 15: # partition each training on two trainings.
                new_frames.append(new_frame) # append each partition to training data
                training_targets.append(labels.index(metadata[int(directory)]))
                new_frame = []

    if show_img:
        #show data
        fig = plt.figure()
        for i in range(2, 4):
            for num, frame in enumerate(new_frames[i][0:18]):
                y = fig.add_subplot(4, 5, num + 1)
                y.imshow(frame, cmap = 'gray')
            fig = plt.figure()
        plt.show()

    new_frames_in_narray = np.array(new_frames[:], dtype = np.float32)
    release_list(new_frames)
    scaled_images = normalize(new_frames_in_narray)

    return scaled_images, training_targets


### Constants ###

In [8]:
BASE_DIR = '/home/clin/Springboard/Capstone/Jester/Samples'
TRAIN_DATA = BASE_DIR + '/train.csv'
TRAIN_VIDS = BASE_DIR + '/train'
VALID_DATA = BASE_DIR + '/validation.csv'
VALID_VIDS = BASE_DIR + '/validation'


### Pre-Processing ###

In [9]:
train_metadata = read_metadata(TRAIN_DATA)
valid_metadata = read_metadata(VALID_DATA)
labels = list(set(train_metadata.values()))

# Get the data directories

train_images, train_labels = load_video_frames(TRAIN_VIDS, labels, train_metadata)
valid_images, valid_labels = load_video_frames(VALID_VIDS, labels, valid_metadata)

x_train = np.array(train_images)
y_train = np.array(train_labels)
x_val = np.array(valid_images)
y_val = np.array(valid_labels)


old mean 108.18764
new mean -6.7296924e-09
(16492, 15, 64, 64, 1)
old mean 105.743706
new mean -8.016216e-09
(1960, 15, 64, 64, 1)


### Build & Train Model ###

In [10]:
# My model
class Conv3DModel(tf.keras.Model):
  def __init__(self):
    super(Conv3DModel, self).__init__()
    # Convolutions
    self.conv1 = tf.compat.v2.keras.layers.Conv3D(32, (3, 3, 3), activation='relu', name="conv1", data_format='channels_last')
    self.pool1 = tf.keras.layers.MaxPool3D(pool_size=(2, 2, 2), data_format='channels_last')
    self.conv2 = tf.compat.v2.keras.layers.Conv3D(64, (3, 3, 3), activation='relu', name="conv1", data_format='channels_last')
    self.pool2 = tf.keras.layers.MaxPool3D(pool_size=(2, 2,2), data_format='channels_last')
   
    # LSTM & Flatten
    self.convLSTM =tf.keras.layers.ConvLSTM2D(40, (3, 3))
    self.flatten =  tf.keras.layers.Flatten(name="flatten")

    # Dense layers
    self.d1 = tf.keras.layers.Dense(128, activation='relu', name="d1")
    self.out = tf.keras.layers.Dense(4, activation='softmax', name="output")
    

  def call(self, x):
    x = self.conv1(x)
    x = self.pool1(x)
    x = self.conv2(x)
    x = self.pool2(x)
    x = self.convLSTM(x)
    #x = self.pool2(x)
    #x = self.conv3(x)
    #x = self.pool3(x)
    x = self.flatten(x)
    x = self.d1(x)
    return self.out(x)


In [11]:
# Training

todays_date = datetime.now().strftime('%Y%m%d')
model = Conv3DModel()

# choose the loss and optimizer methods
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(),
              metrics = ['accuracy'])




# include the epoch in the file name. (uses `str.format`)
checkpoint_path = "training_" + todays_date + "/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path, verbose=1, save_weights_only=True)


# Run the training 
history = model.fit(x_train, y_train,
                    callbacks = [cp_callback],
                    validation_data=(x_val, y_val),
                    batch_size=32,
                    epochs=10)


# save the model for use in the application
model.save_weights('weights_{}/my_weights'.format(todays_date), save_format='tf')


Epoch 1/10
Epoch 00001: saving model to training_20200714/cp-0001.ckpt
Epoch 2/10
Epoch 00002: saving model to training_20200714/cp-0002.ckpt
Epoch 3/10
Epoch 00003: saving model to training_20200714/cp-0003.ckpt
Epoch 4/10
Epoch 00004: saving model to training_20200714/cp-0004.ckpt
Epoch 5/10
Epoch 00005: saving model to training_20200714/cp-0005.ckpt
Epoch 6/10
Epoch 00006: saving model to training_20200714/cp-0006.ckpt
Epoch 7/10
Epoch 00007: saving model to training_20200714/cp-0007.ckpt
Epoch 8/10
Epoch 00008: saving model to training_20200714/cp-0008.ckpt
Epoch 9/10
Epoch 00009: saving model to training_20200714/cp-0009.ckpt
Epoch 10/10
Epoch 00010: saving model to training_20200714/cp-0010.ckpt
