In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2
from tqdm import tqdm

In [2]:
import seaborn as sns
import matplotlib.image as img
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

  import pandas.util.testing as tm


In [3]:
import os
import gc
import math

In [4]:
LABELS = {
    "Swiping Right": 0,
    "Swiping Left": 1,
    "Stop Sign": 2,
    "Thumb Up": 3,
}

## Loading Data

In [5]:
BASE_PATH = 'D:\Jester'
TRAIN_DATA_CSV = BASE_PATH + '/Train.csv'
TEST_DATA_CSV = BASE_PATH + '/Test.csv'
VAL_DATA_CSV = BASE_PATH + '/Validation.csv'

TRAIN_SAMPLES_PATH = BASE_PATH + '/Train/'
TEST_SAMPLES_PATH = BASE_PATH + '/Test/'
VAL_SAMPLES_PATH = BASE_PATH + '/Validation/'

In [6]:
train_csv=pd.read_csv(TRAIN_DATA_CSV)

In [7]:
train_csv.head(10)

Unnamed: 0,video_id,label,frames,label_id,shape,format
0,1,Doing other things,37,0,"(100, 176)",JPEG
1,3,Pushing Two Fingers Away,37,6,"(100, 176)",JPEG
2,6,Drumming Fingers,37,1,"(100, 176)",JPEG
3,11,Sliding Two Fingers Down,37,10,"(100, 176)",JPEG
4,14,Pushing Hand Away,37,5,"(100, 176)",JPEG
5,17,Shaking Hand,37,9,"(100, 176)",JPEG
6,20,Doing other things,37,0,"(100, 176)",JPEG
7,28,Pulling Two Fingers In,37,4,"(100, 176)",JPEG
8,31,Stop Sign,37,14,"(100, 176)",JPEG
9,34,Zooming In With Two Fingers,37,24,"(100, 176)",JPEG


Training targets, you can use your custom csv file if you already created it before

In [8]:
targets = pd.read_csv(TRAIN_DATA_CSV)
targets = targets[targets['label'].isin(LABELS.keys())]
targets['label'] = targets['label'].map(LABELS)
targets = targets[['video_id', 'label']]
targets = targets.reset_index()
targets

Unnamed: 0,index,video_id,label
0,8,31,2
1,16,51,0
2,20,59,2
3,34,95,0
4,35,100,0
...,...,...,...
7149,50404,148053,0
7150,50406,148059,3
7151,50408,148061,0
7152,50410,148070,0


Validation targets, you can use your custom csv file if you already created it before

In [9]:
targets_validation = pd.read_csv(VAL_DATA_CSV)
targets_validation = targets_validation[targets_validation['label'].isin(LABELS.keys())]
targets_validation['label'] = targets_validation['label'].map(LABELS)
targets_validation = targets_validation[['video_id', 'label']]
targets_validation = targets_validation.reset_index()
targets_validation

Unnamed: 0,index,video_id,label
0,4,96,0
1,10,234,2
2,17,420,3
3,20,475,2
4,26,610,3
...,...,...,...
970,7010,147180,3
971,7019,147378,1
972,7020,147395,3
973,7036,147804,2


## Preprocessing

In [10]:
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])

In [11]:
def resize_frame(frame):
    frame = img.imread(frame)
    frame = cv2.resize(frame, (64, 64))
    return frame

In [12]:
hm_frames = 30 # number of frames
def get_unify_frames(path):
    offset = 0
    # pick frames
    frames = os.listdir(path)
    frames_count = len(frames)
    # unify number of frames 
    if hm_frames > frames_count:
        # duplicate last frame if video is shorter than necessary
        frames += [frames[-1]] * (hm_frames - frames_count)
    elif hm_frames < frames_count:
        # If there are more frames, then sample starting offset
        # diff = (frames_count - hm_frames)
        # offset = diff-1 
        frames = frames[0:hm_frames]
    return frames

In [13]:
# Adjust training data
train_targets = [] # training targets 
test_targets = [] # testing targets

new_frames = [] # training data after resize & unify
new_frames_test = [] # testing data after resize & unify

for idx, row in tqdm(targets.iterrows(), total=len(targets)):
    if idx % 4 == 0:
        continue
    
    partition = [] 
    # Frames in each folder
    frames = get_unify_frames(TRAIN_SAMPLES_PATH + str(row['video_id']))
    if len(frames) == hm_frames: 
        for frame in frames:
            frame = resize_frame(TRAIN_SAMPLES_PATH + str(row['video_id']) + '/' + frame)
            partition.append(rgb2gray(frame))
            if len(partition) == 15: # partition each training on two trainings.
                if idx % 6 == 0:
                    new_frames_test.append(partition) # append each partition to training data
                    test_targets.append(row['label'])
                else:
                    new_frames.append(partition) # append each partition to test data
                    train_targets.append(row['label'])
                partition = []

train_data = np.asarray(new_frames, dtype=np.float16)
del new_frames[:]
del new_frames

test_data = np.asarray(new_frames_test, dtype=np.float16)
del new_frames_test[:]
del new_frames_test

gc.collect()

100%|██████████| 7154/7154 [19:41<00:00,  6.05it/s]


7

In [14]:
# we do the same for the validation data
cv_targets = []
new_frames_cv = []
for idx, row in tqdm(targets_validation.iterrows(), total=len(targets_validation)):
    if idx % 4 == 0:
        continue

    partition = []
    # Frames in each folder
    frames = get_unify_frames(VAL_SAMPLES_PATH+str(row["video_id"]))
    for frame in frames:
        frame = resize_frame(VAL_SAMPLES_PATH+str(row["video_id"])+'/'+frame)
        partition.append(rgb2gray(frame))
        if len(partition) == 15:
            new_frames_cv.append(partition)
            cv_targets.append(row['label'])
            partition = []
                
cv_data = np.array(new_frames_cv, dtype=np.float16)
del new_frames_cv[:]
del new_frames_cv
gc.collect()

100%|██████████| 975/975 [05:37<00:00,  2.89it/s]


0

In [15]:
print(f"Training = {len(train_data)}/{len(train_targets)} samples/labels")
print(f"Test = {len(test_data)}/{len(test_targets)} samples/labels")
print(f"Validation = {len(cv_data)}/{len(cv_targets)} samples/labels")

Training = 9538/9538 samples/labels
Test = 1192/1192 samples/labels
Validation = 1462/1462 samples/labels


Feature scaling

In [16]:
# Normalisation: training
print('old mean', train_data.mean())

scaler = StandardScaler(copy=False)
scaled_images  = scaler.fit_transform(train_data.reshape(-1, 15*64*64))
del train_data
print('new mean', scaled_images.mean())

scaled_images  = scaled_images.reshape(-1, 15, 64, 64, 1)
print(scaled_images.shape)

old mean 115.6
new mean -2.4e-07
(9538, 15, 64, 64, 1)


In [17]:
# Normalisation: test
print('old mean', test_data.mean())

scaler = StandardScaler(copy=False)
scaled_images_test = scaler.fit_transform(test_data.reshape(-1, 15*64*64))
del test_data
print('new mean', scaled_images_test.mean())

scaled_images_test = scaled_images_test.reshape(-1, 15, 64, 64, 1)
print(scaled_images_test.shape)

old mean 115.8
new mean -1e-07
(1192, 15, 64, 64, 1)


In [18]:
# Normalisation: validation
print('old mean', cv_data.mean())

scaler = StandardScaler(copy=False)
scaled_images_cv  = scaler.fit_transform(cv_data.reshape(-1, 15*64*64))
del cv_data
print('new mean',scaled_images_cv.mean())

scaled_images_cv  = scaled_images_cv.reshape(-1, 15, 64, 64, 1)
print(scaled_images_cv.shape)

old mean 111.6
new mean -2.4e-07
(1462, 15, 64, 64, 1)


In [19]:
del scaler

## Make sets

In [20]:
y_train = np.array(train_targets, dtype=np.int8)
y_test = np.array(test_targets, dtype=np.int8)
y_val = np.array(cv_targets, dtype=np.int8)
del train_targets
del test_targets
del cv_targets

In [21]:
x_train = scaled_images
x_test = scaled_images_test
x_val = scaled_images_cv
del scaled_images
del scaled_images_test
del scaled_images_cv

In [22]:
gc.collect()

160

In [23]:
x_train

array([[[[[-7.1436e-01],
          [-5.3711e-01],
          [-5.6445e-01],
          ...,
          [-1.0010e+00],
          [-9.9316e-01],
          [-9.7559e-01]],

         [[-6.5234e-01],
          [-5.5029e-01],
          [-5.6152e-01],
          ...,
          [-9.9902e-01],
          [-1.0820e+00],
          [-9.8340e-01]],

         [[-6.2988e-01],
          [-5.5566e-01],
          [-5.9668e-01],
          ...,
          [-1.0342e+00],
          [-1.0693e+00],
          [-1.0127e+00]],

         ...,

         [[-3.1958e-01],
          [-2.7832e-01],
          [-3.7744e-01],
          ...,
          [-2.0972e-01],
          [-2.7393e-01],
          [-3.4937e-01]],

         [[-2.8711e-01],
          [-2.8662e-01],
          [-3.4399e-01],
          ...,
          [-1.3367e-01],
          [-2.1863e-01],
          [-3.0225e-01]],

         [[-2.4744e-01],
          [-2.7222e-01],
          [-3.3276e-01],
          ...,
          [-1.4502e-01],
          [-1.9214e-01],
          

## Model

In [31]:
class Conv3DModel(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
    
        # Convolutions
        self.conv1 = tf.compat.v2.keras.layers.Conv3D(32, (3, 3, 3), activation='relu', name="conv1", data_format='channels_last')
        self.pool1 = tf.keras.layers.MaxPool3D(pool_size=(2, 2, 2), data_format='channels_last')
        self.conv2 = tf.compat.v2.keras.layers.Conv3D(64, (3, 3, 3), activation='relu', name="conv2", data_format='channels_last')
        self.pool2 = tf.keras.layers.MaxPool3D(pool_size=(2, 2,2), data_format='channels_last')
   
        # LSTM & Flatten
        self.convLSTM =tf.keras.layers.ConvLSTM2D(40, (3, 3))
        self.flatten =  tf.keras.layers.Flatten(name="flatten")

        # Dense layers
        self.d1 = tf.keras.layers.Dense(128, activation='relu', name="d1")
        self.out = tf.keras.layers.Dense(4, activation='softmax', name="output")

    def call(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.convLSTM(x)
        x = self.flatten(x)
        x = self.d1(x)
        return self.out(x)

In [32]:
model = Conv3DModel()

In [33]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(),
              metrics = ['accuracy'])

In [27]:
history = model.fit(x_train, y_train,
                    validation_data=(x_val, y_val),
                    batch_size=32,
                    epochs=5)

Train on 9538 samples, validate on 1462 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [28]:
model.save_weights('weights/w.tf', save_format='tf')