In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf  # Version 1.0.0 (some previous versions are used in past commits)
from sklearn import metrics
import random, time, os, json, glob, re, time
from random import randint
import pandas as pd

In [2]:
n_steps = 37

train_labels = '../labels/train.csv'
val_labels = '../labels/validation.csv'
all_labels = '../labels/labels.csv'

labels = pd.read_csv(train_labels, index_col=0, sep=';', header=None)
labels = json.loads(labels.to_json())['1']

validation_labels = pd.read_csv(val_labels, index_col=0, sep=';', header=None)
validation_labels = json.loads(validation_labels.to_json())['1']

labels.update(validation_labels)
id_label_dict = labels
df = pd.read_csv(all_labels, header = None).reset_index()
label_class_dict = dict(df[[0,'index']].values)

def get_class(file):
  f_id = re.findall(r'([0-9]*).npy', file)[0]
  f_label = id_label_dict[f_id]
  f_class = label_class_dict[f_label]
  
  return f_class



In [3]:
files = glob.glob('../../data/train/*.npy')
len(files)

12023

In [4]:
y_data = np.array([[get_class(file)] for file in files])

X_data = np.r_[[np.load(files[i]) for i in range(len(files))]]

In [5]:
permute = np.random.permutation(y_data.shape[0])
y_data = y_data[permute]
X_data = X_data[permute]

y_train, y_test = y_data[:10000], y_data[10000:]
X_train, X_test = X_data[:10000], X_data[10000:]

In [6]:
y_train.shape, X_train.shape, y_test.shape

((10000, 1), (10000, 37, 120), (2023, 1))

In [7]:
# Input Data 
tf.reset_default_graph()

training_data_count = len(X_train)  # 4519 training series (with 50% overlap between each serie)
test_data_count = len(X_test)  # 1197 test series
n_input = len(X_train[0][0])  # num input parameters per timestep

n_hidden = [90,50] # Hidden layer num of features
n_classes = 27 

#updated for learning-rate decay
# calculated as: decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
decaying_learning_rate = True
learning_rate = 0.0025 #used if decaying_learning_rate set to False
init_learning_rate = 0.005
decay_rate = 0.96 #the base of the exponential in the decay
decay_steps = 100000 #used in decay every 60000 steps with a base of 0.96

global_step = tf.Variable(0, trainable=False)
lambda_loss_amount = 0.0015

epochs = 300
training_iters = training_data_count *300  # Loop 300 times on the dataset, ie 300 epochs
batch_size = 512
display_iter = batch_size*64 # To show test set accuracy during training

print("(X shape, y shape, every X's mean, every X's standard deviation)")
print((X_train.shape, y_test.shape, np.mean(X_test), np.std(X_test)))
print("\nThe dataset has not been preprocessed, is not normalised etc")



(X shape, y shape, every X's mean, every X's standard deviation)
((10000, 37, 120), (2023, 1), 26.679430246985433, 40.321621301793)

The dataset has not been preprocessed, is not normalised etc


In [8]:
def LSTM_RNN(_X):
    # model architecture based on "guillaume-chevalier" and "aymericdamien" under the MIT license.
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    _X = tf.reshape(_X, [-1, n_input])  
    # Rectifies Linear Unit activation function used
    y_1 = tf.layers.dense(_X, n_hidden[0], 
                          bias_initializer=tf.truncated_normal_initializer(stddev=0.1), 
                          kernel_initializer=tf.truncated_normal_initializer(stddev=0.1), 
                          activation=tf.nn.relu, name='layer_1')
    
    # So the reader might be wondering why we take the transpose before the reshape
    # This has to do with how the lstm wants to eat the data.
    # the lstm want n_frames tensors, where each tensor is of size batchsize x num_features
    # So basically first all the 1st frames then all the 2nd frames etc.
    _X_split = tf.split(y_1, n_steps, 0) 

    
    # Define two stacked LSTM cells (two recurrent layers deep) with tensorflow
    lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(n_hidden[0], forget_bias=1.0, state_is_tuple=True)
    lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(n_hidden[0], forget_bias=1.0, state_is_tuple=True)
    lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True)
    outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X_split, dtype=tf.float32)
    
    # A single output is produced, in style of "many to one" classifier, refer to http://karpathy.github.io/2015/05/21/rnn-effectiveness/ for details
    lstm_last_output = outputs[-1]
    
    y_2 = tf.layers.dense(lstm_last_output, n_hidden[1], 
                          bias_initializer=tf.truncated_normal_initializer(stddev=0.1), 
                          kernel_initializer=tf.truncated_normal_initializer(stddev=0.1), 
                          activation=tf.nn.relu, name='layer_2')
    
    
    output = tf.layers.dense(y_2, n_classes, 
                                  bias_initializer=tf.truncated_normal_initializer(stddev=0.1), 
                                  kernel_initializer=tf.truncated_normal_initializer(stddev=0.1), 
                                  activation=None, 
                                  name='final_layer')
    return output


def extract_batch_size(_train, _labels, _unsampled, batch_size):
    # Fetch a "batch_size" amount of data and labels from "(X|y)_train" data. 
    # Elements of each batch are chosen randomly, without replacement, from X_train with corresponding label from Y_train
    # unsampled_indices keeps track of sampled data ensuring non-replacement. Resets when remaining datapoints < batch_size    
    
    shape = list(_train.shape)
    shape[0] = batch_size
    batch_s = np.empty(shape)
    batch_labels = np.empty((batch_size,1)) 

    for i in range(batch_size):
        # Loop index
        # index = random sample from _unsampled (indices)
        index = random.choice(_unsampled)
        batch_s[i] = _train[index] 
        batch_labels[i] = _labels[index]
        _unsampled.remove(index)


    return batch_s, batch_labels, _unsampled


def one_hot(y_):
    # One hot encoding of the network outputs
    # e.g.: [[5], [0], [3]] --> [[0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0]]
    
    y_ = y_.reshape(len(y_))
    n_values = int(np.max(y_)) + 1
    return np.eye(n_values)[np.array(y_, dtype=np.int32)]  # Returns FLOATS


In [9]:
# Graph input/output
x = tf.placeholder(tf.float32, [None, n_steps, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])

pred = LSTM_RNN(x)

# Loss, optimizer and evaluation
l2 = lambda_loss_amount * sum(
    tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()
) # L2 loss prevents this overkill neural network to overfit the data
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=pred)) + l2 # Softmax loss
if decaying_learning_rate:
    learning_rate = tf.train.exponential_decay(init_learning_rate, global_step*batch_size, decay_steps, decay_rate, staircase=True)


#decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) #exponentially decayed learning rate
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost,global_step=global_step) # Adam Optimizer

correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))


unsampled_indices = list(range(0,len(X_train)))
sample_X, _, _ = extract_batch_size(X_train, y_train, unsampled_indices, batch_size)



In [None]:
test_losses = []
test_accuracies = []
train_losses = []
train_accuracies = []
sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))
init = tf.global_variables_initializer()
sess.run(init)
# sess.run(ds_init_op)
# Perform Training steps with "batch_size" amount of data at each loop. 
# Elements of each batch are chosen randomly, without replacement, from X_train, 
# restarting when remaining datapoints < batch_size
step = 1
time_start = time.time()
unsampled_indices = list(range(0,len(X_train)))

while step * batch_size <= training_iters:
    #print (sess.run(learning_rate)) #decaying learning rate
    #print (sess.run(global_step)) # global number of iterations
    if len(unsampled_indices) < batch_size:
      unsampled_indices = list(range(0,len(X_train))) 
    batch_xs, raw_labels, unsampled_indicies = extract_batch_size(X_train, y_train, unsampled_indices, batch_size)
    
    #batch_xs, raw_labels = sess.run(ds_next_element)
    
    batch_ys = one_hot(raw_labels)
    
    
    # check that encoded output is same length as num_classes, if not, pad it 
    if len(batch_ys[0]) < n_classes:
        temp_ys = np.zeros((batch_size, n_classes))
        temp_ys[:batch_ys.shape[0],:batch_ys.shape[1]] = batch_ys
        batch_ys = temp_ys
       
    

    # Fit training using batch data
    _, loss, acc = sess.run(
        [optimizer, cost, accuracy],
        feed_dict={
            x: batch_xs, 
            y: batch_ys
        }
    )
    train_losses.append(loss)
    train_accuracies.append(acc)
    
    # Evaluate network only at some steps for faster training: 
    if (step*batch_size % display_iter == 0) or (step == 1) or (step * batch_size > training_iters):
        
        # To not spam console, show training accuracy/loss in this "if"
        print(("Iter #" + str(step*batch_size) + \
              ":  Learning rate = " + "{:.6f}".format(sess.run(learning_rate)) + \
              ":   Batch Loss = " + "{:.6f}".format(loss) + \
              ", Accuracy = {}".format(acc)))
        
        # Evaluation on the test set (no learning made here - just evaluation for diagnosis)
        loss, acc = sess.run(
            [cost, accuracy], 
            feed_dict={
                x: X_test,
                y: one_hot(y_test)
            }
        )
        test_losses.append(loss)
        test_accuracies.append(acc)
        print(("PERFORMANCE ON TEST SET:             " + \
              "Batch Loss = {}".format(loss) + \
              ", Accuracy = {}".format(acc)))

    step += 1
print("Optimization Finished!")

# Accuracy for test data

one_hot_predictions, accuracy, final_loss = sess.run(
    [pred, accuracy, cost],
    feed_dict={
        x: X_test,
        y: one_hot(y_test)
    }
)

test_losses.append(final_loss)
test_accuracies.append(accuracy)

print(("FINAL RESULT: " + \
      "Batch Loss = {}".format(final_loss) + \
      ", Accuracy = {}".format(accuracy)))
time_stop = time.time()
print(("TOTAL TIME:  {}".format(time_stop - time_start)))

Iter #512:  Learning rate = 0.005000:   Batch Loss = 3.765934, Accuracy = 0.025390625
PERFORMANCE ON TEST SET:             Batch Loss = 3.7118875980377197, Accuracy = 0.05388037487864494
Iter #32768:  Learning rate = 0.005000:   Batch Loss = 3.015604, Accuracy = 0.16796875
PERFORMANCE ON TEST SET:             Batch Loss = 3.0834388732910156, Accuracy = 0.14434008300304413
Iter #65536:  Learning rate = 0.005000:   Batch Loss = 2.823562, Accuracy = 0.212890625
PERFORMANCE ON TEST SET:             Batch Loss = 2.8091678619384766, Accuracy = 0.23035097122192383
Iter #98304:  Learning rate = 0.005000:   Batch Loss = 2.550646, Accuracy = 0.298828125
PERFORMANCE ON TEST SET:             Batch Loss = 2.611361265182495, Accuracy = 0.2753336727619171


In [None]:
type(files[0])

In [None]:
unsampled_indices = list(range(0,len(X_train)))
sample_X, _, _ = extract_batch_size(X_train, y_train, unsampled_indices, batch_size)

In [None]:
tf.get_default_graph

x = tf.constant(sample_X)
xt = tf.transpose(x, [1,0,2])
x1 = tf.reshape(xt ,[-1,120])

# here is the relu dense layer

xs = tf.split(x1,37)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

x_split, x_tran = sess.run([xs,xt])

In [None]:
np.array(x_split).shape, np.array(x_org).shape

In [None]:
np.array_equal(x_split,x_tran)