In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas
import math
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import rnn

from create_train_test_val_maps import *

In [2]:
num_windows = 10
window_size = 10
train_combined, val_combined, test_combined = load_maps(num_windows, window_size)
selected_codes = [0,45021, 44004, 43004, 45008, 45002, 45007]

In [3]:
# get global index from (row, col) index
def sub2ind(array_shape, row, col):
    ind = row*array_shape[1] + col
    if row < 0 or row >= array_shape[0]:
        ind = -1
    if col < 0 or col >= array_shape[1]:
        ind = -1
    return ind

# get (row, col) index from global index
def ind2sub(array_shape, ind):
    row = int(ind) / array_shape[1]
    col = ind % array_shape[1]
    if ind < 0:
        row = -1
        col = -1
    if ind >=  array_shape[0]*array_shape[1]:
        row = -1
        col = -1
    return (row, col)

def one_hot_labels(labels, num_classes):
    one_hot_labels = np.zeros((labels.size, num_classes))
    one_hot_labels[np.arange(labels.size),labels.astype(int)] = 1
    return one_hot_labels

In [4]:
def get_data(selected_codes, data_map, num_windows):
    X = []
    Y = []
    for vehicleID in data_map.keys():
        for ATA6code in data_map[vehicleID].keys():
            if ATA6code not in selected_codes:
                continue
            for window in data_map[vehicleID][ATA6code].keys():
                for sequence in data_map[vehicleID][ATA6code][window]:
                    Y.append(sub2ind((len(selected_codes),num_windows),selected_codes.index(ATA6code), window))
                    X.append(sequence.as_matrix()) 
    X = np.array(X)
    Y = np.array(Y)
    return X, Y

In [5]:
# Get data
train_fieldsnaps,train_results = get_data(selected_codes, train_combined, num_windows)
validation_fieldsnaps,validation_results = get_data(selected_codes, val_combined, num_windows)
test_fieldsnaps,test_results = get_data(selected_codes, test_combined, num_windows)

# Turn into One Hot Labels
one_hot_train_results = one_hot_labels(train_results, 70)
one_hot_validation_results = one_hot_labels(validation_results, 70)
one_hot_test_results = one_hot_labels(test_results, 70)

print('shape: (number of samples) x (number of time steps) x (number of features)')
print("train_fieldsnaps shape: ", train_fieldsnaps.shape)
print("train_results.shape: ", train_results.shape)
print("validation_fieldsnaps shape: ", validation_fieldsnaps.shape)
print("validation_results.shape: ", validation_results.shape)


print("one_hot_train_results.shape: ", one_hot_train_results.shape)
print("one_hot_validation_results.shape: ", one_hot_validation_results.shape)
print("one_hot_validation_results.shape: ", one_hot_test_results.shape)

shape: (number of samples) x (number of time steps) x (number of features)
train_fieldsnaps shape:  (1907, 10, 61)
train_results.shape:  (1907,)
validation_fieldsnaps shape:  (397, 10, 61)
validation_results.shape:  (397,)
one_hot_train_results.shape:  (1907, 70)
one_hot_validation_results.shape:  (397, 70)
one_hot_validation_results.shape:  (280, 70)


In [6]:
#print(np.count_nonzero(train_results))
print('number of validation samples: ', len(validation_results))
print('number of non repairs in validation set: ',len(validation_results) - np.count_nonzero(validation_results))
print('percent of non repairsin validation set: ',(len(validation_results) - np.count_nonzero(validation_results))/len(validation_results))
#print(np.count_nonzero(test_results))
print('number of test samples: ',len(test_results))
print('number of non repairs in test set: ',len(test_results) - np.count_nonzero(test_results))
print('percent of non repairsin test set: ',(len(test_results) - np.count_nonzero(test_results))/len(test_results))

number of validation samples:  397
number of non repairs in validation set:  186
percent of non repairsin validation set:  0.46851385390428213
number of test samples:  280
number of non repairs in test set:  138
percent of non repairsin test set:  0.4928571428571429


In [7]:
# Training Parameters
learning_rate = 0.001
training_steps = 10000
batch_size = 139 # since 1807/139 = 13.0
display_step = 200
num_epochs = 10
iters_per_epoch = 13

# Network Parameters
num_input = 61 # MNIST data input (img shape: 28*28)
timesteps = 10 # timesteps
num_hidden = 139 # hidden layer num of features
num_classes = 70 # MNIST total classes (0-69 digits)

# tf Graph input
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes]) # num_classes

In [8]:
# Define weights
weights = {
    'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([num_classes]))
}

In [9]:
def RNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, n_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)

    # Define a lstm cell with tensorflow
    lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    # Get lstm cell output
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

In [10]:
logits = RNN(X, weights, biases)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

In [11]:
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)
    
    # Start training
    for epoch in range(0,num_epochs):
        for iter_ in range(0, iters_per_epoch):
            batch_x = train_fieldsnaps[iter_:iter_ + batch_size]
            #print(iter_)
            #print("batch_x.shape: ", batch_x.shape)
            batch_y = one_hot_train_results[iter_:iter_ + batch_size]
            #print("batch_y.shape: ", batch_y.shape)
            # Reshape data to get 28 seq of 28 elements
            batch_x = batch_x.reshape((batch_size, timesteps, num_input))
            # Run optimization op (backprop)
            sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
           
        #print("batch_x.shape: ", batch_x.shape)
        #print("batch_y.shape: ", batch_y.shape)
        # Calculate batch loss and accuracy
        train_loss, train_acc = sess.run([loss_op, accuracy], feed_dict={X: train_fieldsnaps,
                                                             Y: one_hot_train_results})
        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: validation_fieldsnaps,
                                                             Y: one_hot_validation_results})
        print("epoch " + str(epoch) + '\n' + "Epoch Training Loss= " + \
              "{:.4f}".format(train_loss) + ", Epoch Training Accuracy= " + \
              "{:.3f}".format(train_acc) + '\n' + "Epoch Validation Loss= " + \
             "{:.4f}".format(val_loss) + ", Epoch Validation Accuracy= " + \
              "{:.3f}".format(val_acc))


    print("Optimization Finished!")

    # Calculate accuracy for 139 sequences
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: test_fieldsnaps, Y: one_hot_test_results}))

epoch 0
Epoch Training Loss= 9.0019, Epoch Training Accuracy= 0.023
Epoch Validation Loss= 9.5036, Epoch Validation Accuracy= 0.015
epoch 1
Epoch Training Loss= 6.9872, Epoch Training Accuracy= 0.098
Epoch Validation Loss= 7.9215, Epoch Validation Accuracy= 0.123
epoch 2
Epoch Training Loss= 7.2516, Epoch Training Accuracy= 0.499
Epoch Validation Loss= 8.2876, Epoch Validation Accuracy= 0.441
epoch 3
Epoch Training Loss= 7.3818, Epoch Training Accuracy= 0.538
Epoch Validation Loss= 8.4932, Epoch Validation Accuracy= 0.446
epoch 4
Epoch Training Loss= 7.2407, Epoch Training Accuracy= 0.542
Epoch Validation Loss= 8.3713, Epoch Validation Accuracy= 0.451
epoch 5
Epoch Training Loss= 7.0317, Epoch Training Accuracy= 0.545
Epoch Validation Loss= 8.1329, Epoch Validation Accuracy= 0.453
epoch 6
Epoch Training Loss= 7.2444, Epoch Training Accuracy= 0.545
Epoch Validation Loss= 8.3545, Epoch Validation Accuracy= 0.458
epoch 7
Epoch Training Loss= 7.2479, Epoch Training Accuracy= 0.545
Epoch Va