This is the main training and prediction file.
Run augment.ipynb before training to augment the training data.

In [1]:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, Flatten
from keras.layers.advanced_activations import LeakyReLU
from keras.layers import Conv2D, MaxPooling2D, Cropping2D
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers.normalization import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from sklearn.model_selection import train_test_split
import pickle
import numpy as np
import h5py
import matplotlib.pyplot as plt
from tqdm import tqdm

Using TensorFlow backend.


In [2]:
# Load data
data = pickle.load(open('a2_dataTrain.pkl', 'rb'))

for k in data.keys():
    print(k, data[k].shape)

gestureLabels (77421,)
segmentation (77421, 120, 90, 3)
subjectLabels (77421,)
rgb (77421, 120, 90, 3)
depth (77421, 120, 90)


In [3]:
# Training parameters
batch_size = 128
num_classes = 20
epochs = 100

In [4]:
# Concatenate data into numpy matrix
def prep_data(data):
    X = np.concatenate((data['rgb'], data['segmentation'], data['depth'][:, :, :, np.newaxis]), axis=3)
    return X

X = prep_data(data)
Y = data['gestureLabels']
del data

In [5]:
# Train/test split, random_state for reproducability
# The random_state used is the same as in augment.ipynb. This is crucial to 
# reproduce the same test set!
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=1337, stratify=Y)
del X, Y

print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

69678 train samples
7743 test samples


In [6]:
# Replace training data with augmented data
# Skip to train on original training data

del X_train, Y_train

# Number of samples in augmented training data, should be the same as in augment.ipynb
num_aug = 300000

X_train = np.memmap('aug_train.pkl', dtype='uint8', mode='r', shape=(num_aug,) + X_test.shape[1:])
Y_train = np.memmap('aug_train_y.pkl', dtype='uint8', mode='r', shape=(num_aug,))

print(X_train.shape[0], 'augmentation samples')

# Move everything to RAM
X_train_mem = X_train.copy()
del X_train
X_train = X_train_mem

300000 augmentation samples


In [7]:
# Calculate class weights to compensate for non-uniform distribution
class_weights = Y_train.shape[0] / num_classes / np.bincount(Y_train)

In [8]:
# Convert Y to one-hot encoding
Y_train = keras.utils.to_categorical(Y_train, num_classes)
Y_test = keras.utils.to_categorical(Y_test, num_classes)

In [9]:
X_shape = X_train.shape[1:]

In [11]:
# Model definition

model = Sequential()

# 120x90x7
model.add(Cropping2D(cropping=((0, 0), (1, 1)), input_shape=X_shape))
# 120x88x7
model.add(Conv2D(32, kernel_size=(5, 5), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
# 120x88x32
model.add(Conv2D(64, kernel_size=(3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
# 60x44x64
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
# 30x22x64
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
# 15x11x64
model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(BatchNormalization())
# 8x6x64
model.add(Flatten())
# 3072
model.add(Dense(4096))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(2048))
model.add(Activation('relu'))
model.add(BatchNormalization())

model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

# Choose filename for saved models here
checkpoint = ModelCheckpoint('model10.{epoch:02d}-{val_acc:.4f}.hdf5', 
                             monitor='val_acc', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='max')

earlystop = EarlyStopping(monitor='val_acc', min_delta=0, patience=3, verbose=0, mode='min')

In [None]:
# Train model
model.fit(X_train, Y_train, 
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          shuffle=True,
          class_weight=class_weights,
          validation_data=(X_test, Y_test),
          callbacks=[checkpoint, earlystop])

Train on 300000 samples, validate on 7743 samples
Epoch 1/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
 45696/300000 [===>..........................] - ETA: 781s - loss: 0.1036 - acc: 0.9652

In [None]:
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [1]:
# Results
# Convnet v1, RGB mean, 10 epochs: 0.915 / 0.645        (model.10-0.28.hdf5)
# Convnet v1, all features, 46 epochs: 0.954 / 0.700    (model.45-0.21.hdf5)
# Convnet v1.1, bnorm before relu, 32 epochs: 0.936 / 0.683 (model.28-0.24.hdf5)
# Convnet v1.2, dropout, 50 epochs: 0.897 / 0.771 (model3.47-0.90.hdf5)
# Convnet v2: 9 epochs: 0.93 / 0.73 (model3.09-0.93.hdf5)
# Convnet v2: 26 epochs: 0.97 / 0.75 (model3.26-0.97.hdf5)
# Convnet v3: 39 epochs: 0.97 / (model4.39-0.97.hdf5)
# Convnet v3, all data, 71 epochs: 0.96 /  (a2/model5.71-0.96.hdf5)
# Convnet v3.1, no dropout in convolutions, last conv d=64, 27 epochs: 0.97 /   (model9.27-0.9735.hdf5)
# Convnet v3.1, augmented data: 0.96 / 0.87 (model10.32-0.9598.hdf5)

In [27]:
# Load model for prediction
model_file = 'model10.32-0.9598.hdf5'

real_data = pickle.load(open('a2_dataTest.pkl', 'rb'))
real_X = prep_data(real_data)

model.load_weights(model_file)

In [29]:
# Do prediction
real_Y = model.predict_classes(real_X)



In [30]:
# Save predictions
f = open(model_file + '.pred.csv', 'w')
print('Id,Prediction', file=f)
for i, p in enumerate(real_Y):
    print('%d,%d' % (i+1, p), file=f)
f.close()