In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas
import math
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import rnn

from create_train_test_val_maps import *

In [2]:
num_windows = 10
window_size = 10
train_combined, val_combined, test_combined = load_maps(num_windows, window_size)
selected_codes = [0,45021, 44004, 43004, 45008, 45002, 45007]

In [3]:
# get global index from (row, col) index
def sub2ind(array_shape, row, col):
    ind = row*array_shape[1] + col
    if row < 0 or row >= array_shape[0]:
        ind = -1
    if col < 0 or col >= array_shape[1]:
        ind = -1
    return ind

# get (row, col) index from global index
def ind2sub(array_shape, ind):
    row = int(ind) / array_shape[1]
    col = ind % array_shape[1]
    if ind < 0:
        row = -1
        col = -1
    if ind >=  array_shape[0]*array_shape[1]:
        row = -1
        col = -1
    return (row, col)

def one_hot_labels(labels, num_classes):
    one_hot_labels = np.zeros((labels.size, num_classes))
    one_hot_labels[np.arange(labels.size),labels.astype(int)] = 1
    return one_hot_labels

In [4]:
def get_data(selected_codes, data_map, num_windows):
    X = []
    Y = []
    for vehicleID in data_map.keys():
        for ATA6code in data_map[vehicleID].keys():
            if ATA6code not in selected_codes:
                continue
            for window in data_map[vehicleID][ATA6code].keys():
                for sequence in data_map[vehicleID][ATA6code][window]:
                    Y.append(sub2ind((len(selected_codes),num_windows),selected_codes.index(ATA6code), window))
                    X.append(sequence.as_matrix()) 
    X = np.array(X)
    Y = np.array(Y)
    return X, Y

In [5]:
fieldsnaps,results = get_data(selected_codes, train_combined, num_windows)
print('shape: (number of samples) x (number of time steps) x (number of features)')
print("fieldsnaps shape: ", fieldsnaps.shape)
print("results.shape: ", results.shape)
one_hot_results = one_hot_labels(results, 70)
print("one_hot_results.shape: ", one_hot_results.shape)

shape: (number of samples) x (number of time steps) x (number of features)
fieldsnaps shape:  (1807, 10, 61)
results.shape:  (1807,)
one_hot_results.shape:  (1807, 70)


In [6]:
# Training Parameters
learning_rate = 0.001
training_steps = 10000
batch_size = 139 # since 1807/139 = 13.0
display_step = 200

# Network Parameters
num_input = 61 # MNIST data input (img shape: 28*28)
timesteps = 10 # timesteps
num_hidden = 139 # hidden layer num of features
num_classes = 70 # MNIST total classes (0-69 digits)

# tf Graph input
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes]) # num_classes

In [7]:
# Define weights
weights = {
    'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([num_classes]))
}

In [9]:
def RNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, n_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)

    # Define a lstm cell with tensorflow
    lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    # Get lstm cell output
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

In [10]:
logits = RNN(X, weights, biases)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

In [None]:
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)
    
    # Start training
    for step in range(1, training_steps+1):
        batch_x = fieldsnaps[step:step + batch_size]
        batch_y = one_hot_results[step:step + batch_size]
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batch_size, timesteps, num_input))
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for 139 sequences
    #test_len = 139
    #test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
    #test_label = mnist.test.labels[:test_len]
    #print("Testing Accuracy:", \
    #    sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))

Step 1, Minibatch Loss= 12.4039, Training Accuracy= 0.000
Step 200, Minibatch Loss= 0.1090, Training Accuracy= 0.993
Step 400, Minibatch Loss= 2.2088, Training Accuracy= 0.763