In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell

from sklearn.model_selection import train_test_split
from util import minibatcher, RNN

## Indicate all of the training constants

In [4]:
model_type = 'motion_type'

n_hidden = 100 # Size of the LSTM hidden layer
batch_size = 8 # Number of data points in a batch
learning_rate = 0.01 # Learning rate of the optimizer
dropout_keep_prob = .8

model_name = "spy_model"
model_fake_data = "hmm.npy"

## Import the data, and split it into training and testing sets

In [5]:
clean_data = np.load("gmail/clean_data.npy")
data_labels = np.load("gmail/labels.npy")
fake_data = np.load(model_fake_data)

if model_type == 'motion_type':
    classes = ["walking", "sitting", "table", "stairs", "car"]
    # Update the dataset to only be the labeled data (the ones that aren't 0)
    labeled = data_labels != 0
    input_motion_data = clean_data[labeled]
    output_motion_data = data_labels[labeled] - 1 # Need to decrement by 1 since we removed all the 0s

else:
    classes = ["robot", "human"]
    n_human = clean_data.shape[0]
    n_robot = fake_data.shape[0]
    input_motion_data = np.append(clean_data, fake_data, axis=0)
    output_motion_data = np.append(np.ones(n_human), np.zeros(n_robot)).astype(int)
    
n_samples = input_motion_data.shape[0]
n_steps = input_motion_data.shape[1]
n_input = input_motion_data.shape[2]
n_classes = np.max(output_motion_data) + 1

In [6]:
X_train = np.load("npy/X_train_" + model_name + ".npy")
Y_train = np.load("npy/Y_train_" + model_name + ".npy")
X_test = np.load("npy/X_test_" + model_name + ".npy")
Y_test = np.load("npy/Y_test_" + model_name + ".npy")

In [7]:
zip(classes, np.bincount(np.argmax(Y_train, axis=1)))

[('walking', 22), ('sitting', 22), ('table', 22), ('stairs', 22), ('car', 22)]

## Define the placeholders and variables to be optimized

In [8]:
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])
keep_prob = tf.placeholder(tf.float32)
# Define weights
weights = {
    'hidden': tf.Variable(tf.random_normal([n_hidden, n_hidden])),
    'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
    'hidden': tf.Variable(tf.random_normal([n_hidden])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

## Define the various graphs: notably cost, optimizer, and accuracy

In [9]:
pred = RNN(x, weights, biases, n_input, n_steps, n_hidden, keep_prob)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

## Load the trained model

In [10]:
# Add ops to save and restore all the variables.
saver = tf.train.Saver()

# Later, launch the model, use the saver to restore variables from disk, and
# do some work with the model.
sess = tf.Session()
# Restore variables from disk.
saver.restore(sess, "ckpt/" + model_name + ".ckpt")

In [11]:
sess.run(accuracy, feed_dict={x: X_test, y: Y_test, keep_prob: 1.0})

0.73134327

## FP vs FN

In [None]:
predictions = np.argmax(sess.run(pred, feed_dict={x: X_test, keep_prob: 1.0}), axis=1)
actual = np.argmax(Y_test, axis=1)

test_size = predictions.shape[0]
fps = 0
fns = 0
tps = 0
tns = 0
Y = zip(predictions, actual)
for y in Y:
    if y[0] == 0:
        if y[1] == 0:
            tns += 1
        else:
            fns += 1
    else:
        if y[1] == 0:
            fps += 1
        else:
            tps += 1

print "FP rate: %f" % (fps / float(test_size) * 100)
print "FN rate: %f" % (fns / float(test_size) * 100)

## Try model on some new robot data

In [None]:
robots = np.load('np/hmm.npy')

human_count = sum(np.argmax(sess.run(pred, feed_dict={x: robots, keep_prob: 1.0}), axis=1))
# All input samples are robots, so everything should be marked as class 0
accuracy = 100 - (human_count / float(robots.shape[0]) * 100)
print accuracy

## Count which classes were confused 

In [13]:
# Get indices of incorrect predictions in the test set
test_predicted = np.argmax(sess.run(pred, feed_dict={x: X_test, keep_prob: 1.0}), axis=1)
test_actual = np.argmax(Y_test, axis=1)
wrong_predictions = test_predicted != test_actual

mistakes = zip(test_predicted[wrong_predictions], test_actual[wrong_predictions])
# Convert classs number to class names
mistakes = map(lambda p: (classes[p[0]], classes[p[1]]), mistakes)

from collections import Counter
for m, n in Counter(mistakes).most_common():
    print "%s: %d" % (str(m), n)


('car', 'walking'): 8
('stairs', 'walking'): 4
('car', 'sitting'): 3
('walking', 'stairs'): 1
('walking', 'car'): 1
('stairs', 'car'): 1


## Classify the Unlabelled data

In [14]:
unlabeled = data_labels == 0
input_motion_data = clean_data[unlabeled]

Y = np.argmax(sess.run(pred, feed_dict={x: input_motion_data, keep_prob: 1.0}), axis=1)
class_count = zip(classes, np.bincount(Y))

for c in class_count:
    print "%s: %d" % (c[0], c[1])


walking: 43
sitting: 86
table: 19
stairs: 6
car: 59


In [None]:
def hash_sequence(time_steps, num_bins=500):

    min_possible = np.array([ -15,   -15,  -50, -400, -400, -250])
    max_possible = np.array([20, 30, 5, 400, 500, 300])
    bin_sizes = (max_possible-min_possible)/np.float(num_bins)
    summed = np.mean(time_steps, axis=0)
    # Keep everything within the above bounds
    for i, v in enumerate(summed):
        if v < min_possible[i]:
            summed[i] = min_possible[i]
        elif v > max_possible[i]:
            summed[i] = max_possible[i]
    # determine the bins
    binned = np.round((summed - min_possible)/bin_sizes)
    x = str(binned)
    return hash(x)

hashes = []
fn = 0
hash_hits = 0

for d in clean_data:
    h = hash_sequence(d)
    if h in hashes:
        hash_hits += 1
    else:
        d = np.array([d])
        klass = np.argmax(sess.run(pred, feed_dict={x: d, keep_prob: 1.0}))
        if klass == 0:
            fn += 1
        hashes.append(h)
            
print fn, hash_hits

In [None]:
new_hashes = list(hashes)

robots = np.load('more_hmm.npy')
print robots.shape
hash_hits = 0
hash_hit_class_miss = 0
fps = 0
tns = 0
for d in robots:
    h = hash_sequence(d)
    d = np.array([d])
    if h in new_hashes:
        hash_hits += 1
        if (np.argmax(sess.run(pred, feed_dict={x: d, keep_prob: 1.0}))) == 1:
            hash_hit_class_miss += 1
    else:
        klass = np.argmax(sess.run(pred, feed_dict={x: d, keep_prob: 1.0}))
        if klass == 0:
            tns += 1
            pass
        else:
            fps += 1
        new_hashes.append(h) 
                
print hash_hits, hash_hit_class_miss, fps, tns