In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import time
from datetime import timedelta
import math
import pickle
import numpy.random as random

import prettytensor as pt

In [2]:
def plot(imgs, title=None):
    # make sure input is a list
    if not isinstance(imgs, list):
        imgs = [imgs]
    
    plt.figure()
        
    for i in range(len(imgs)):
        if(title is not None): 
            plt.suptitle(title)
        plt.subplot(1, len(imgs), i+1)
        plt.axis("off")
        # fix channels for rgb
        if len(imgs[i].shape) > 2:
            plt.imshow(imgs[i][:,:,[2,1,0]])
        else:
            plt.imshow(imgs[i])

def plot_images(images, cls_true, cls_pred=None):
    assert len(images) == len(cls_true) == 9
    
    # Create figure with 3x3 sub-plots.
    fig, axes = plt.subplots(3, 3)
    fig.subplots_adjust(hspace=0.3, wspace=0.3)

    for i, ax in enumerate(axes.flat):
        # Plot image.
        ax.imshow(images[i].reshape(img_shape), cmap='binary')

        # Show true and predicted classes.
        if cls_pred is None:
            xlabel = "True: {0}".format(cls_true[i])
        else:
            xlabel = "True: {0}, Pred: {1}".format(cls_true[i], cls_pred[i])

        # Show the classes as the label on the x-axis.
        ax.set_xlabel(xlabel)
        
        # Remove ticks from the plot.
        ax.set_xticks([])
        ax.set_yticks([])
    
    # Ensure the plot is shown correctly with multiple plots
    # in a single Notebook cell.
    plt.show()

In [3]:
img_size = 96

# Tuple with height and width of images used to reshape arrays.
img_shape = (img_size, img_size)

# Number of colour channels for the images
n_channels = 5

# Number of classes, one class for each of the gestures.
n_classes = 20

In [4]:
def transform_data(data_in):
    
    n = data_in.shape[0]
    d = np.zeros(shape=n, dtype=[('x', np.uint8, (img_size, img_size, n_channels)), ('y', np.uint8, (20,))])

    for i, (x, y) in enumerate(d):

        x[...,:3] = data_in['rgb'][i]
        x[..., 3] = data_in['dep'][i]
        x[..., 4] = data_in['seg'][i]

        y[data_in['lbl'][i]] = 1
        
    return d

In [5]:
data_in = pickle.load(open("./data_pp/train_sa_new.pkl", 'rb'))
n_samples = data_in.shape[0]
print('n_samples: {}'.format(n_samples))
print('done')

n_samples: 77421
done


In [6]:
data = transform_data(data_in)
print('done')

done


In [7]:
# separate data into training and validation set
by_subjects = True

# min number of samples in validation set
valid_min = data_in.shape[0]//8

if by_subjects:
    subjects = np.array(list(set(data_in['sub'])))
    random.shuffle(subjects)
    counts = [(data_in['sub']==i).sum() for i in subjects]

    index = 0
    remaining = valid_min

    while remaining > 0:
        remaining -= counts[index]
        index += 1
        
    print('subs in valid set: {}/{}'.format(index, subjects.shape[0]))
    
    train = data[np.in1d(data_in['sub'], subjects[:index])]
    valid = data[np.in1d(data_in['sub'], subjects[index:])]
    
else:
    random.shuffle(data)
    
    train = data[:valid_min]
    valid = data[valid_min:]

n_samples_train = train.shape[0]
n_samples_valid = valid.shape[0]
               
print('training set:\t{0: >5} samples'.format(n_samples_train))
print('validation set:\t{0: >5} samples'.format(n_samples_valid))
print('done')

subs in valid set: 25/200
training set:	 9754 samples
validation set:	67667 samples
done


In [8]:
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size, n_channels], name='x')

In [9]:
y_true = tf.placeholder(tf.float32, shape=[None, 20], name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1)

In [10]:
x_pretty = pt.wrap(x)

In [11]:
with pt.defaults_scope(activation_fn=tf.nn.relu, l2loss=1e-4):
    y_pred, loss = x_pretty.\
        conv2d  (kernel=7, stride=1, depth= 32).\
        max_pool(kernel=2, stride=2).\
        conv2d  (kernel=5, stride=1, depth= 64).\
        max_pool(kernel=2, stride=2).\
        conv2d  (kernel=5, stride=1, depth=128).\
        max_pool(kernel=2, stride=2).\
        conv2d  (kernel=3, stride=1, depth=128).\
        max_pool(kernel=2, stride=2).\
        conv2d  (kernel=3, stride=1, depth= 64).\
        flatten().\
        dropout(keep_prob=0.5).\
        fully_connected(size=1024).\
        dropout(keep_prob=0.5).\
        fully_connected(size=128).\
        softmax_classifier(num_classes=n_classes, labels=y_true)

In [12]:
def get_weights_variable(layer_name):
    # Retrieve an existing variable named 'weights' in the scope
    # with the given layer_name.
    # This is awkward because the TensorFlow function was
    # really intended for another purpose.

    with tf.variable_scope(layer_name, reuse=True):
        variable = tf.get_variable('weights')

    return variable

In [14]:
total_iterations = 0

In [15]:
train_batch_size = 64

In [25]:
learning_rate = 3e-4

In [17]:
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

y_pred_cls = tf.argmax(y_pred, dimension=1)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [18]:
session = tf.Session()

In [19]:
session.run(tf.global_variables_initializer())

In [20]:
def optimize(n_iterations):
    # ensure we update the global variable rather than a local copy.
    global total_iterations

    start_time = time.time()

    for i in range(total_iterations, total_iterations + n_iterations):

        # draw a random batch of training samples
        draw = random.choice(a=train, size=train_batch_size, replace=False)
    
        batch = {x: draw['x'], y_true: draw['y']}
        
        # run the optimizer on the batch
        session.run(optimizer, feed_dict=batch)

        if i % 1000 == 0:
            print_valid_accuracy()
        
        if i % 200 == 0:
            # calculate training accuracy
            acc = session.run(accuracy, feed_dict=batch)

            msg = "iteration: {0:>6}, accuracy (training): {1:>6.1%}, learning rate: {2}"
            rate = learning_rate if type(learning_rate) == float else session.run(learning_rate)
            
            print(msg.format(i, acc, rate))

    total_iterations += n_iterations
    
    print_valid_accuracy()
    
    end_time = time.time()
    time_dif = end_time - start_time

    print("elapsed: " + str(timedelta(seconds=int(round(time_dif)))))

In [21]:
def plot_example_errors(cls_pred, correct):
    # This function is called from print_valid_accuracy() below.

    # Negate the boolean array.
    incorrect = (correct == False)
    
    # get misclassified images from the valid-set, prediction and correct value
    images = valid['x'][incorrect][...,0]
    cls_pred = cls_pred[incorrect]
    cls_true = valid['y'][incorrect].argmax(axis=1)
    
    # Plot the first 9 images.
    plot_images(images=images[:9,...], cls_true=cls_true[:9,...], cls_pred=cls_pred[:9])

In [22]:
# Split the valid-set into smaller batches of this size.
valid_batch_size = 256

def print_valid_accuracy(show_example_errors=False):

    # Allocate array for the prediction
    cls_pred = np.zeros(shape=n_samples_valid, dtype=np.int)
    
    i = 0

    while i < n_samples_valid:
        # The ending index for the next batch is denoted j.
        j = min(i + valid_batch_size, n_samples_valid)

        # Get the images and labels from the valid-set between index i and j.
        images = valid['x'][i:j]
        labels = valid['y'][i:j]

        # Calculate the predicted class
        cls_pred[i:j] = session.run(y_pred_cls, feed_dict={x: images, y_true: labels})

        # move indices to next batch
        i = j

    cls_true = valid['y'].argmax(axis=1)

    correct = (cls_true == cls_pred)
    correct_sum = correct.sum()
    
    acc = float(correct_sum) / n_samples_valid
    
    msg = "accuracy (valid): {0:.1%} ({1} / {2})"
    print(msg.format(acc, correct_sum, n_samples_valid))
    
    if show_example_errors:
        print("example errors:")
        plot_example_errors(cls_pred=cls_pred, correct=correct)

In [24]:
optimize(n_iterations=5000)

accuracy (valid): 5.7% (3873 / 67667)
iteration:      0, accuracy (training):   9.4%, learning rate: 0.0001
iteration:    200, accuracy (training):   6.2%, learning rate: 0.0001
iteration:    400, accuracy (training):   6.2%, learning rate: 0.0001
iteration:    600, accuracy (training):   9.4%, learning rate: 0.0001
iteration:    800, accuracy (training):  15.6%, learning rate: 0.0001
accuracy (valid): 11.0% (7467 / 67667)
iteration:   1000, accuracy (training):  14.1%, learning rate: 0.0001
iteration:   1200, accuracy (training):  10.9%, learning rate: 0.0001
iteration:   1400, accuracy (training):  12.5%, learning rate: 0.0001
iteration:   1600, accuracy (training):  15.6%, learning rate: 0.0001
iteration:   1800, accuracy (training):  18.8%, learning rate: 0.0001
accuracy (valid): 14.6% (9888 / 67667)
iteration:   2000, accuracy (training):  14.1%, learning rate: 0.0001
iteration:   2200, accuracy (training):  15.6%, learning rate: 0.0001
iteration:   2400, accuracy (training):  17.

In [26]:
optimize(n_iterations=1000)

accuracy (valid): 23.0% (15544 / 67667)
iteration:   5000, accuracy (training):  37.5%, learning rate: 0.0003
iteration:   5200, accuracy (training):  40.6%, learning rate: 0.0003
iteration:   5400, accuracy (training):  46.9%, learning rate: 0.0003
iteration:   5600, accuracy (training):  46.9%, learning rate: 0.0003
iteration:   5800, accuracy (training):  50.0%, learning rate: 0.0003
accuracy (valid): 24.3% (16454 / 67667)
elapsed: 0:02:23


In [None]:
optimize(n_iterations=1000)

In [None]:
print_valid_accuracy()

In [None]:
print_valid_accuracy(show_example_errors=True)

In [None]:
# add validation data to data for final training run
train = np.concatenate((train, valid))
n_samples_train = train.shape[0]
print('done')

In [None]:
data_test = pickle.load(open("./data_pp/test_sa_new.pkl", 'rb'))
n_samples_test = len(data_test['dep'])
print('n_samples: {}'.format(n_samples_test))
print('done')

In [None]:
test = transform_data(data_test)
print('done')

In [None]:
test_batch_size = 256

cls_pred = np.zeros(shape=n_samples_test, dtype=np.int)

i = 0

while i < n_samples_test:
    # The ending index for the next batch is denoted j.
    j = min(i + test_batch_size, n_samples_test)

    # Get the images from the test-set between index i and j.
    images = test['x'][i:j]

    # Create a feed-dict with these images and labels.
    batch = {x: images}

    # Calculate the predicted class using TensorFlow.
    cls_pred[i:j] = session.run(y_pred_cls, feed_dict=batch)

    # Set the start-index for the next batch to the
    # end-index of the current batch.
    i = j
    
print('done')

In [None]:
prediction = np.zeros((n_samples_test, 2))
prediction[:, 0] = np.arange(n_samples_test)+1
prediction[:, 1] = cls_pred
print('done')

In [None]:
with open('prediction.csv', 'wb') as f:
    f.write(b'ID,Prediction\n')
    np.savetxt(f, prediction, fmt='%i', delimiter=",")
print('done')