<a href="https://colab.research.google.com/github/gmxavier/TEP-meets-LSTM/blob/master/tep-meets-lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Step 0 - Setup and helper functions

In [None]:
# Setup

# NOTE: Uncomment the lines bellow in order to run the notebook in Colab (RECOMMENDED)
#from google.colab import drive
#drive.mount('/content/drive/', force_remount=True) # follow the instructions to get the key
#%cd drive
#%cd MyDrive
#!git clone https://github.com/gmxavier/TEP-meets-LSTM.git # clone the repo
#%cd TEP-meets-LSTM
#!ls # check the repo folder contents
#%tensorflow_version 1.x # set the Colab tf version

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import metrics
import os
from functools import reduce


# Normalised input features

INPUT_SIGNAL_TYPES = ["XMV(1)", 
                      "XMV(2)", 
                      "XMV(3)", 
                      "XMV(4)", 
                      "XMV(5)", 
                      "XMV(6)", 
                      "XMV(7)", 
                      "XMV(8)", 
                      "XMV(9)", 
                      "XMV(10)", 
                      "XMV(11)"]


# Output classes

LABELS = ["NORMAL ", 
          "FAULT 1", 
          "FAULT 2", 
          "FAULT 3", 
          "FAULT 4", 
          "FAULT 5",
          "FAULT 7"]


# Input folders paths

DATA_PATH = "tep/input/"
DATASET_PATH = DATA_PATH + "tep_dataset/"

TRAIN = "train/"
TEST = "test/"

X_train_signals_paths = [
    DATASET_PATH + TRAIN + signal + ".txt" for signal in INPUT_SIGNAL_TYPES
]
X_test_signals_paths = [
    DATASET_PATH + TEST + signal + ".txt" for signal in INPUT_SIGNAL_TYPES
]

y_train_path = DATASET_PATH + TRAIN + "idv.txt"
y_test_path = DATASET_PATH + TEST + "idv.txt"


# Helper functions

def load_X(X_signals_paths):
    # Function returns the input features tensor.
    X_signals = []
    for signal_type_path in X_signals_paths:
        file = open(signal_type_path, 'r')
        # Read dataset from disk, dealing with text files' syntax
        X_signals.append(
            [np.array(serie, dtype=np.float32) for serie in [
                row.split(' ') for row in file
            ]]
        )
        file.close()
    
    return np.transpose(np.array(X_signals), (1, 2, 0))


def load_y(y_path):
    # Function returns the fault labels vector.
    file = open(y_path, 'r')
    # Read dataset from disk, dealing with text file's syntax
    y_ = np.array(
        [elem for elem in [
            row.split(' ') for row in file
        ]], 
        dtype=np.int32
    )
    file.close()
    
    return y_


def LSTM_RNN(_X, _weights, _biases):
    # Function returns a tensorflow LSTM (RNN) artificial neural network from given parameters. 
    # Moreover, two LSTM cells are stacked which adds deepness to the neural network. 
    # Note, some code of this notebook is inspired from an slightly different 
    # RNN architecture used on another dataset, some of the credits goes to 
    # "aymericdamien" under the MIT license.

    # (NOTE: This step could be greatly optimised by shaping the dataset once
    # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input]) 
    # new shape: (n_steps*batch_size, n_input)
    
    # Linear activation
    _X = tf.nn.relu(tf.matmul(_X, _weights['hidden']) + _biases['hidden'])
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(_X, n_steps, 0) 
    # new shape: n_steps * (batch_size, n_hidden)

    # Define two stacked LSTM cells (two recurrent layers deep) with tensorflow
    lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
    lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
    lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True)
    # Get LSTM cell output
    outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X, dtype=tf.float32)

    # Get last time step's output feature for a "many to one" style classifier, 
    # as in the image describing RNNs at the top of this page
    lstm_last_output = outputs[-1]
    
    # Linear activation
    return tf.matmul(lstm_last_output, _weights['out']) + _biases['out']


def extract_batch_size(_train, step, batch_size):
    # Function to fetch a "batch_size" amount of data from "(X|y)_train" data. 
    
    shape = list(_train.shape)
    shape[0] = batch_size
    batch_s = np.empty(shape)

    for i in range(batch_size):
        # Loop index
        index = ((step-1)*batch_size + i) % len(_train)
        batch_s[i] = _train[index] 

    return batch_s


def one_hot(y_):
    # Function to encode output labels from number indexes 
    # e.g.: [[5], [0], [3]] --> [[0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0]]
    
    y_ = y_.reshape(len(y_))
    n_values = int(np.max(y_)) + 1
    return np.eye(n_values)[np.array(y_, dtype=np.int32)]  # Returns FLOATS


def model_size():
    # Function to print the number of trainable variables
    
    size = lambda v: reduce(lambda x, y: x*y, v.get_shape().as_list())
    n = sum(size(v) for v in tf.trainable_variables())
    print("Overall model size: %d" % (n,))

    
def parameter_size():
    # Function to print the size of trainable variables
    
    print("Parameters sizes:")
    for tf_var in tf.trainable_variables():
        print(tf_var.shape)

## Step 1 - Load the data

In [None]:
# Input features tensors

X_train = load_X(X_train_signals_paths)
X_test = load_X(X_test_signals_paths)


# Fault labels

y_train = load_y(y_train_path)
y_test = load_y(y_test_path)


# Some debugging info

print("Some useful info to get an insight on dataset's shape and normalisation:")
print("(X shape, y shape, every X's mean, every X's standard deviation)")
print(X_test.shape, y_test.shape, np.mean(X_test), np.std(X_test))
print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.")
print("")
unique_elements, counts_elements = np.unique(y_train, return_counts=True)
print('Faults distribution in the training set:')
print(np.asarray((unique_elements, counts_elements)))
unique_elements, counts_elements = np.unique(y_test, return_counts=True)
print('Faults distribution in the test set:')
print(np.asarray((unique_elements, counts_elements)))


# Input tensor data 

training_data_count = len(X_train)  # 5733 training sequences (with 50% overlap between each sequence)
test_data_count = len(X_test)  # 2458 testing sequences
n_steps = len(X_train[0])  # 128 timesteps per sequence
n_input = len(X_train[0][0])  # 11 input features per timestep

## Step 2 - Build the LSTM network

In [None]:
# LSTM internal structure

n_hidden = 32 # Hidden layer num of features
n_classes = 8 # Total classes (due one-hot-encode it should be 8 not 7, 
              #                as fault 6 is omitted)


# Training hyperparameters

learning_rate = 0.0025
lambda_loss_amount = 0.0015
training_iters = training_data_count * 300  # Loop 300 times on the dataset
batch_size = 1500
display_iter = 30000  # To show test set accuracy during training


# Graph input/output

x = tf.placeholder(tf.float32, [None, n_steps, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])


# Graph weights

weights = {
    'hidden': tf.Variable(tf.random_normal([n_input, n_hidden])), # Hidden layer weights
    'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0))
}
biases = {
    'hidden': tf.Variable(tf.random_normal([n_hidden])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

pred = LSTM_RNN(x, weights, biases)


# Loss, optimizer and evaluation
l2 = lambda_loss_amount * sum(
    tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()
) # L2 loss prevents this overkill neural network to overfit the data
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred)) + l2 # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer

correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

## Step 3 - Train the LSTM network

In [None]:
# To keep track of training's performance
test_losses = []
test_accuracies = []
train_losses = []
train_accuracies = []

X_ = np.append(X_train, X_test, axis=0)
y_ = np.append(y_train, y_test, axis=0)

nfold = 5
dr = []
ks = np.array_split(np.arange(len(y_)), nfold)
    
for k in ks:
    
    # Launch the graph
    sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))
    init = tf.global_variables_initializer()
    sess.run(init)
    
    # Some useful info
    print("Some useful info ...")
    model_size()
    parameter_size()
    print("")
    print("Starting training ...")

    # Perform Training steps with "batch_size" amount of example data at each loop
    step = 1
    while step * batch_size <= training_iters:
        batch_xs =         extract_batch_size(np.delete(X_, k, axis=0), step, batch_size)
        batch_ys = one_hot(extract_batch_size(np.delete(y_, k, axis=0), step, batch_size))

        # Fit training using batch data
        _, loss, acc = sess.run(
            [optimizer, cost, accuracy],
            feed_dict={
                x: batch_xs, 
                y: batch_ys
            }
        )
        train_losses.append(loss)
        train_accuracies.append(acc)
    
        # Evaluate network only at some steps for faster training: 
        if (step*batch_size % display_iter == 0) or (step == 1) or (step * batch_size > training_iters):
            
            # To not spam console, show training accuracy/loss in this "if"
            print("Iteration #" + str(step*batch_size) + "\n" + \
                  "TRAINING SET: " + \
                  "Batch Loss = {:.6f}".format(loss) + \
                  ", Accuracy = {:.6f}".format(acc))
        
            # Evaluation on the test set (no learning made here - just evaluation for diagnosis)
            loss, acc = sess.run(
                [cost, accuracy], 
                feed_dict={
                    x: X_[k],
                    y: one_hot(y_)[k]
                }
            )
            test_losses.append(loss)
            test_accuracies.append(acc)
            print("    TEST SET: " + \
                  "Batch Loss = {:.6f}".format(loss) + \
                  ", Accuracy = {:.6f}".format(acc))
    
        step += 1

    print("Optimization finished!")

    # Accuracy for test data

    one_hot_predictions, final_acc, final_loss = sess.run(
        [pred, accuracy, cost],
        feed_dict={
            x: X_[k],
            y: one_hot(y_)[k]
        }
    )

    test_losses.append(final_loss)
    test_accuracies.append(final_acc)
    
    print("FINAL RESULT: " + \
          "Batch Loss = {:.6f}".format(final_loss) + \
          ", Accuracy = {:.6f}".format(final_acc))
    
    predictions = one_hot_predictions.argmax(1)
    aux = metrics.confusion_matrix(y_[k], predictions, labels = np.unique(y_))
    dr.append(100*aux.diagonal()/(np.sum(aux, axis = 1)+1e-12))
    
    print("Cross-validation fold #" + str(len(dr)) + " of " + str(nfold))
    
    sess.close()

## Step 4 - Plot the training progress

In [None]:
# (Inline plots: )
%matplotlib inline

font = {
    'family' : 'Bitstream Vera Sans',
    'weight' : 'bold',
    'size'   : 18
}
matplotlib.rc('font', **font)

width = 12
height = 12
plt.figure(figsize=(width, height))

indep_train_axis = np.array(range(batch_size, (len(train_losses)+1)*batch_size, batch_size))
plt.plot(indep_train_axis, np.array(train_losses),     "b--", label="Train losses")
plt.plot(indep_train_axis, np.array(train_accuracies), "g--", label="Train accuracies")

indep_test_axis = np.append(
    np.array(range(batch_size, len(test_losses)*display_iter, display_iter)[:-1]),
    [training_iters]
)
plt.plot(indep_test_axis, np.array(test_losses),     "b-", label="Test losses")
plt.plot(indep_test_axis, np.array(test_accuracies), "g-", label="Test accuracies")

plt.title("Training session's progress over iterations and folds")
plt.legend(loc='upper right', shadow=True)
plt.ylabel('Training Progress (Loss or Accuracy values)')
plt.xlabel('Training iteration')

plt.show()

## Step 5 - Print and plot the final results

In [None]:
# Print results

predictions = one_hot_predictions.argmax(1)

print("Testing accuracy: {:.2f}%".format(100*final_acc))

print("")
print("Precision: {:.2f}%".format(100*metrics.precision_score(y_[k], predictions, average="weighted")))
print("Recall: {:.2f}%".format(100*metrics.recall_score(y_[k], predictions, average="weighted")))
print("f1_score: {:.2f}%".format(100*metrics.f1_score(y_[k], predictions, average="weighted")))

print("")
print("Confusion matrix:")
confusion_matrix = metrics.confusion_matrix(y_[k], predictions)
print(confusion_matrix)

print("")
print("Confusion matrix (normalised to % of total test data):")
normalised_confusion_matrix = np.array(confusion_matrix, dtype=np.float32)/np.sum(confusion_matrix)*100
print(np.array_str(normalised_confusion_matrix, precision=2, suppress_small=True))


# Plot results 

width = 12
height = 12
plt.figure(figsize=(width, height))

res = plt.imshow(np.array(confusion_matrix), cmap=plt.cm.summer, interpolation='nearest')
for i, row in enumerate(confusion_matrix):
    for j, c in enumerate(row):
        if c>0:
            plt.text(j-.2, i+.1, c, fontsize=16)
            
plt.title('Confusion Matrix')
plt.colorbar()
_ = plt.xticks(range(n_classes), [l for l in LABELS], rotation=90)
_ = plt.yticks(range(n_classes), [l for l in LABELS])
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()