In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import time
from datetime import timedelta
import math
import pickle
import numpy.random as random

import prettytensor as pt

In [2]:
def plot(imgs, title=None):
    # make sure input is a list
    if not isinstance(imgs, list):
        imgs = [imgs]
    
    plt.figure()
        
    for i in range(len(imgs)):
        if(title is not None): 
            plt.suptitle(title)
        plt.subplot(1, len(imgs), i+1)
        plt.axis("off")
        # fix channels for rgb
        if len(imgs[i].shape) > 2:
            plt.imshow(imgs[i][:,:,[2,1,0]])
        else:
            plt.imshow(imgs[i])

def plot_images(images, cls_true, cls_pred=None):
    assert len(images) == len(cls_true) == 9
    
    # Create figure with 3x3 sub-plots.
    fig, axes = plt.subplots(3, 3)
    fig.subplots_adjust(hspace=0.3, wspace=0.3)

    for i, ax in enumerate(axes.flat):
        # Plot image.
        ax.imshow(images[i].reshape(img_shape), cmap='binary')

        # Show true and predicted classes.
        if cls_pred is None:
            xlabel = "True: {0}".format(cls_true[i])
        else:
            xlabel = "True: {0}, Pred: {1}".format(cls_true[i], cls_pred[i])

        # Show the classes as the label on the x-axis.
        ax.set_xlabel(xlabel)
        
        # Remove ticks from the plot.
        ax.set_xticks([])
        ax.set_yticks([])
    
    # Ensure the plot is shown correctly with multiple plots
    # in a single Notebook cell.
    plt.show()

In [5]:
img_size = 72

# Tuple with height and width of images used to reshape arrays.
img_shape = (img_size, img_size)

# Number of colour channels for the images: 1 channel for gray-scale.
n_channels = 5

# Number of classes, one class for each of the gestures.
n_classes = 20

In [6]:
def transform_data(data_in):
    
    n = data_in.shape[0]
    d = np.zeros(shape=n, dtype=[('x', np.uint8, (img_size, img_size, n_channels)), ('y', np.uint8, (20,))])

    for i, (x, y) in enumerate(d):

        x[...,:3] = data_in['rgb'][i]
        x[..., 3] = data_in['dep'][i]
        x[..., 4] = data_in['seg'][i][...,0]

        y[data_in['lbl'][i]] = 1
        
    return d

In [8]:
data_in = pickle.load(open("./data_pp/train_sa.pkl", 'rb'))
n_samples = data_in.shape[0]
print('n_samples: {}'.format(n_samples))
print('done')

n_samples: 77421
done


In [9]:
data = transform_data(data_in)
print('done')

done


In [10]:
# shuffle
random.shuffle(data)

# use fraction as validation set
cut = int(0.9*n_samples)

train = data[:cut]
valid = data[cut:]

n_samples_train = train.shape[0]
n_samples_valid = valid.shape[0]

print('training set:\t{0: >5} samples'.format(n_samples_train))
print('validation set:\t{0: >5} samples'.format(n_samples_valid))
print('done')

training set:	69678 samples
validation set:	 7743 samples
done


In [11]:
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size, n_channels], name='x')

In [12]:
y_true = tf.placeholder(tf.float32, shape=[None, 20], name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1)

In [13]:
x_pretty = pt.wrap(x)

In [14]:
with pt.defaults_scope(activation_fn=tf.nn.relu, l2loss=1e-5):
    y_pred, loss = x_pretty.\
        conv2d  (kernel=5, stride=1, depth= 32).\
        conv2d  (kernel=5, stride=1, depth= 32).\
        max_pool(kernel=2, stride=2).\
        conv2d  (kernel=3, stride=1, depth= 64).\
        max_pool(kernel=2, stride=2).\
        conv2d  (kernel=3, stride=2, depth= 64).\
        conv2d  (kernel=3, stride=2, depth=128).\
        flatten().\
        fully_connected(size=256).\
        dropout(keep_prob=0.6).\
        fully_connected(size=512).\
        dropout(keep_prob=0.6).\
        softmax_classifier(num_classes=n_classes, labels=y_true)

In [15]:
def get_weights_variable(layer_name):
    # Retrieve an existing variable named 'weights' in the scope
    # with the given layer_name.
    # This is awkward because the TensorFlow function was
    # really intended for another purpose.

    with tf.variable_scope(layer_name, reuse=True):
        variable = tf.get_variable('weights')

    return variable

In [16]:
# weights_conv1 = get_weights_variable(layer_name='layer_conv1')
# weights_conv2 = get_weights_variable(layer_name='layer_conv2')
# weights_conv3 = get_weights_variable(layer_name='layer_conv3')
# weights_conv4 = get_weights_variable(layer_name='layer_conv4')
# weights_conv5 = get_weights_variable(layer_name='layer_conv5')

In [17]:
total_iterations = 0

In [18]:
train_batch_size = 128

In [19]:
learning_rate = 1e-5

In [20]:
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)

y_pred_cls = tf.argmax(y_pred, dimension=1)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
session = tf.Session()

In [None]:
session.run(tf.global_variables_initializer())

In [20]:
def optimize(n_iterations):
    # Ensure we update the global variable rather than a local copy.
    global total_iterations

    # Start-time used for printing time-usage below.
    start_time = time.time()

    for i in range(total_iterations, total_iterations + n_iterations):

        # Get a batch of training examples.
        # x_batch now holds a batch of images and
        # y_batch are the true labels for those images.
        draw = random.choice(a=train, size=train_batch_size, replace=False)
        
        x_batch = draw['x']
        y_batch = draw['y']
    
        batch = {x: x_batch, y_true: y_batch}
        
        # Run the optimizer using this batch of training data.
        # TensorFlow assigns the variables in feed_dict_train
        # to the placeholder variables and then runs the optimizer.
        session.run(optimizer, feed_dict=batch)

        if i % 1000 == 0:
            print_valid_accuracy()
        
        # Print status every so often
        if i % 200 == 0:
            # Calculate the accuracy on the training-set.
            acc = session.run(accuracy, feed_dict=batch)

            # Message for printing.
            msg = "iteration: {0:>6}, accuracy (training): {1:>6.1%}, learning rate: {2}"

            rate = learning_rate if type(learning_rate) == float else session.run(learning_rate)
            
            # Print it.
            print(msg.format(i, acc, rate))

    # Update the total number of iterations performed.
    total_iterations += n_iterations

    # Ending time.
    end_time = time.time()

    # Difference between start and end-times.
    time_dif = end_time - start_time
    
    # Print final validation accuracy
    print_valid_accuracy()

    # Print the time-usage.
    print("elapsed: " + str(timedelta(seconds=int(round(time_dif)))))

In [21]:
def plot_example_errors(cls_pred, correct):
    # This function is called from print_valid_accuracy() below.

    # Negate the boolean array.
    incorrect = (correct == False)
    
    # Get the images from the test-set that have been
    # incorrectly classified.
    images = valid['x'][incorrect][...,0]
    
    # Get the predicted classes for those images.
    cls_pred = cls_pred[incorrect]

    # Get the true classes for those images.
    cls_true = valid['y'][incorrect].argmax(axis=1)
    
    # Plot the first 9 images.
    plot_images(images=images[:9,...], cls_true=cls_true[:9,...], cls_pred=cls_pred[:9])

In [22]:
# Split the valid-set into smaller batches of this size.
valid_batch_size = 256

def print_valid_accuracy(show_example_errors=False):

    # Allocate an array for the predicted classes which
    # will be calculated in batches and filled into this array.
    cls_pred = np.zeros(shape=n_samples_valid, dtype=np.int)

    # The starting index for the next batch is denoted i.
    i = 0

    while i < n_samples_valid:
        # The ending index for the next batch is denoted j.
        j = min(i + valid_batch_size, n_samples_valid)

        # Get the images from the valid-set between index i and j.
        images = valid['x'][i:j]

        # Get the associated labels.
        labels = valid['y'][i:j]

        # Calculate the predicted class using TensorFlow.
        cls_pred[i:j] = session.run(y_pred_cls, feed_dict={x: images, y_true: labels})

        # Set the start-index for the next batch to the
        # end-index of the current batch.
        i = j

    cls_true = valid['y'].argmax(axis=1)

    correct = (cls_true == cls_pred)
    correct_sum = correct.sum()
    
    acc = float(correct_sum) / n_samples_valid
    
    msg = "accuracy (valid): {0:.1%} ({1} / {2})"
    print(msg.format(acc, correct_sum, n_samples_valid))
    
    if show_example_errors:
        print("example errors:")
        plot_example_errors(cls_pred=cls_pred, correct=correct)

In [42]:
optimize(n_iterations=10000)

accuracy (valid): 89.2% (6908 / 7743)
iteration:  30000, accuracy (training):  95.3%, learning rate: 1e-05
iteration:  30200, accuracy (training):  98.4%, learning rate: 1e-05
iteration:  30400, accuracy (training):  99.2%, learning rate: 1e-05
iteration:  30600, accuracy (training):  99.2%, learning rate: 1e-05
iteration:  30800, accuracy (training):  97.7%, learning rate: 1e-05
accuracy (valid): 91.8% (7105 / 7743)
iteration:  31000, accuracy (training):  99.2%, learning rate: 1e-05
iteration:  31200, accuracy (training):  99.2%, learning rate: 1e-05
iteration:  31400, accuracy (training): 100.0%, learning rate: 1e-05
iteration:  31600, accuracy (training):  97.7%, learning rate: 1e-05
iteration:  31800, accuracy (training):  99.2%, learning rate: 1e-05
accuracy (valid): 91.9% (7114 / 7743)
iteration:  32000, accuracy (training):  96.9%, learning rate: 1e-05
iteration:  32200, accuracy (training):  96.9%, learning rate: 1e-05
iteration:  32400, accuracy (training):  96.9%, learning r

In [None]:
print_valid_accuracy()

In [None]:
print_valid_accuracy(show_example_errors=True)

In [24]:
# add validation data to data for final training run
train = np.concatenate((train, valid))
n_samples_train = train.shape[0]
print('done')

done


In [26]:
data_test = pickle.load(open("./data_pp/test.pkl", 'rb'))
n_samples_test = len(data_test['dep'])
print('n_samples: {}'.format(n_samples_test))
print('done')

n_samples: 13340
done


In [27]:
test = transform_data(data_test)
print('done')

done


In [43]:
test_batch_size = 256

cls_pred = np.zeros(shape=n_samples_test, dtype=np.int)

i = 0

while i < n_samples_test:
    # The ending index for the next batch is denoted j.
    j = min(i + test_batch_size, n_samples_test)

    # Get the images from the test-set between index i and j.
    images = test['x'][i:j]

    # Create a feed-dict with these images and labels.
    batch = {x: images}

    # Calculate the predicted class using TensorFlow.
    cls_pred[i:j] = session.run(y_pred_cls, feed_dict=batch)

    # Set the start-index for the next batch to the
    # end-index of the current batch.
    i = j
    
print('done')

done


In [44]:
prediction = np.zeros((n_samples_test, 2))
prediction[:, 0] = np.arange(n_samples_test)+1
prediction[:, 1] = cls_pred
print('done')

done


In [45]:
with open('prediction.csv', 'wb') as f:
    f.write(b'ID,Prediction\n')
    np.savetxt(f, prediction, fmt='%i', delimiter=",")
print('done')

done
