In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell

from sklearn.model_selection import train_test_split
from util import minibatcher, RNN

## Indicate all of the training constants

In [2]:
model_type = 'robot'

n_hidden = 100 # Size of the LSTM hidden layer
batch_size = 8 # Number of data points in a batch
learning_rate = 0.01 # Learning rate of the optimizer
dropout_keep_prob = .8


## Import the data, and split it into training and testing sets

In [4]:
clean_data = np.load("gmail/clean_data.npy")
data_labels = np.load("gmail/labels.npy")
fake_data = np.load("mix.npy")

if model_type == 'motion_type':
    classes = ["walking", "sitting", "table", "stairs", "car"]
    # Update the dataset to only be the labeled data (the ones that aren't 0)
    labeled = data_labels != 0
    input_motion_data = clean_data[labeled]
    output_motion_data = data_labels[labeled] - 1 # Need to decrement by 1 since we removed all the 0s

else:
    classes = ["robot", "human"]
    n_human = clean_data.shape[0]
    n_robot = fake_data.shape[0]
    input_motion_data = np.append(clean_data, fake_data, axis=0)
    output_motion_data = np.append(np.ones(n_human), np.zeros(n_robot)).astype(int)
    
n_samples = input_motion_data.shape[0]
n_steps = input_motion_data.shape[1]
n_input = input_motion_data.shape[2]
n_classes = np.max(output_motion_data) + 1

In [5]:
X_train = np.load("X_train.npy")
Y_train = np.load("Y_train.npy")
X_test = np.load("X_test.npy")
Y_test = np.load("Y_test.npy")

In [6]:
zip(classes, np.bincount(np.argmax(Y_train, axis=1)))

[('robot', 439), ('human', 439)]

## Define the placeholders and variables to be optimized

In [7]:
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])
keep_prob = tf.placeholder(tf.float32)
# Define weights
weights = {
    'hidden': tf.Variable(tf.random_normal([n_hidden, n_hidden])),
    'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
    'hidden': tf.Variable(tf.random_normal([n_hidden])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

## Define the various graphs: notably cost, optimizer, and accuracy

In [8]:
pred = RNN(x, weights, biases, n_input, n_steps, n_hidden, keep_prob)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

## Load the trained model

In [9]:
# Add ops to save and restore all the variables.
saver = tf.train.Saver()

# Later, launch the model, use the saver to restore variables from disk, and
# do some work with the model.
sess = tf.Session()
# Restore variables from disk.
saver.restore(sess, model_name + .ckpt")

In [10]:
sess.run(accuracy, feed_dict={x: X_test, y: Y_test, keep_prob: 1.0})

0.84545457

In [18]:
np.argmax(sess.run(pred, feed_dict={x: X_test, y: Y_test, keep_prob: 1.0}), axis=1)

array([1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0])

In [19]:
np.argmax(Y_test, axis=1)

array([1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0])

## Try model on some new robot data

In [36]:
robots = np.load('more_var.npy')

human_count = sum(np.argmax(sess.run(pred, feed_dict={x: robots, keep_prob: 1.0}), axis=1))
# All input samples are robots, so everything should be marked as class 0
accuracy = 100 - (robot_count / float(robots.shape[0]) * 100)
print accuracy

90.6


## Count which classes were confused 

In [20]:
# Get indices of incorrect predictions in the test set
test_predicted = np.argmax(sess.run(pred, feed_dict={x: X_test, keep_prob: 1.0}), axis=1)
test_actual = np.argmax(Y_test, axis=1)
wrong_predictions = test_predicted != test_actual

mistakes = zip(test_predicted[wrong_predictions], test_actual[wrong_predictions])
# Sort the predicted/expected so that mistaking class 1 for class 3 is the same 
# as mistaking class 3 for class 1, for example
mistakes = sorted(map(lambda x: sorted(x), mistakes))
# Convert classs number to class names
mistakes = map(lambda p: (classes[p[0]], classes[p[1]]), mistakes)

from collections import Counter
for m, n in Counter(mistakes).most_common():
    print "%s: %d" % (str(m), n)


('robot', 'human'): 8


## Classify the Unlabelled data

In [None]:
unlabeled = data_labels == 0
input_motion_data = clean_data[unlabeled]

Y = np.argmax(sess.run(pred, feed_dict={x: input_motion_data, keep_prob: 1.0}), axis=1)
class_count = zip(classes, np.bincount(Y))

for c in class_count:
    print "%s: %d" % (c[0], c[1])
