# Training Time Series Model with LSTM

# What is LSTM

The Long Short-Term Memory network (LSTM), is a recurrent neural network (RNN) that is trained using backpropagation by unrolling in the time dimension. This network can overcome the [vanishing gradient](https://en.wikipedia.org/wiki/Vanishing_gradient_problem) problem found in earlier RNNs.
Instead of neurons, LSTM networks have memory blocks thus making it "smarter" given that it can store recent sequences. These LSTM blocks are connected through layers. Each block contains gates that manage the state and output.


# Problem

In this lab, we will train several time series models using 3 different datasets using LSTM network

# Loading modules


In [None]:
import mxnet as mx
import numpy as np
import pandas
import matplotlib.pyplot as plt

# Enable logging so we will see output during the training
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# Utility functions

In [None]:
def load_data(filename, seq_len, normalise_window, cols, reverse=True):
    
    dataframe = pandas.read_csv(filename, usecols=cols)

    if reverse:
        values = dataframe.values[::-1] # reverse to order the dataset chronologically
    else:
        values = dataframe.values
        
    data = values[:,0] # get column1 
    
    sequence_length = seq_len + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])
    
    if normalise_window:
        result = normalise_windows(result)

    result = np.array(result)

    row = round(0.9 * result.shape[0])
    train = result[:int(row), :]
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  

    return [x_train, y_train, x_test, y_test]

def normalise_windows(window_data):
    normalised_data = []
    for window in window_data:
        normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
        normalised_data.append(normalised_window)
    return normalised_data

def plot_true_data(true_data):
    fig = plt.figure(facecolor='white')
    fig.set_size_inches(20, 15)
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    plt.legend()
    plt.show()

def plot_results(predicted_data, true_data):
    fig = plt.figure(facecolor='white')
    fig.set_size_inches(20, 15)
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    plt.plot(predicted_data, label='Prediction')
    plt.legend()
    plt.show()

# Create a 1 layer LSTM network

## LAB INSTRUCTION
- Enter **50** as the value for variable **num_hidden**  - (This is the number of hidden nodes LSTM cell gates)
- Enter **50** as the value for variable **seq_len** - (This is the number of LSTM cells in each LSTM network layer)
- Enter **50** as the value for variable **batch_size** - (This is the batch size for the training)

In [None]:
# Build LSTM Network

num_hidden =  50   # Follow instruction above
seq_len =  50      # Follow instruction above
batch_size = 50    # Follow instruction above

data = mx.sym.Variable('data')
target = mx.sym.Variable ('target')

lstm1 = mx.rnn.LSTMCell(num_hidden = num_hidden, prefix="lstm1_")
L1, L1_State = lstm1.unroll(length=seq_len, inputs=data, 
                             merge_outputs=True,
                             layout="NTC")  

pred = mx.sym.FullyConnected(L1, num_hidden=1, name="pred")
pred = mx.sym.LinearRegressionOutput(data=pred, label=target)

model = mx.mod.Module(symbol=pred, data_names=['data'], label_names=['target'], context=mx.cpu(0))

# Train with Sine Wave Data
You will first see a sinusoidal wave appear. Then another image will be generated showing the prediction superimposed on the original chart.

In [None]:
# Load Sine Wave Data

filename = "data/sinwave.csv"

X_train, y_train, X_test, y_test = load_data(filename, seq_len, False, [0])

In [None]:
# Create NDArrayIter for the training and testing set

trainIter = mx.io.NDArrayIter(X_train, y_train, batch_size, label_name = 'target', shuffle=False)
testIter = mx.io.NDArrayIter(X_test, y_test, batch_size, label_name = 'target', shuffle=False)

# Lab Instruction

- Enter **1** as the value for the **num_epochs** variable in the cell below and take a look at plot
- Then enter **10** as the value for the **num_epochs** variable, re-run the cell and compare the difference in the plots. You should see the second plot fits the true data much closer than the first plot

In [None]:
num_epochs = 10  #Enter numner of epochs here

model.fit(train_data=trainIter, eval_data=testIter,
            initializer=mx.init.Xavier(rnd_type="gaussian", magnitude=1),
            optimizer="adam",
            optimizer_params={"learning_rate": 1E-3},
            eval_metric="mse", num_epoch=num_epochs)

test_preds = model.predict(testIter)
plot_true_data(y_test)
plot_results(test_preds.asnumpy(), y_test)

# Train with S&P 500 Index Data
Expect to see, first, a history of S&P500 variations in price. Then, superimposed, the associated prediction.

In [None]:
# Load S&P500 Data
filename = "data/sp500.csv"


X_train, y_train, X_test, y_test = load_data(filename, seq_len, True, [0])

# Create the MXNet NDArray Iterators for training and evaluation dataset
trainIter = mx.io.NDArrayIter(X_train, y_train, batch_size, label_name = 'target', shuffle=False)
testIter = mx.io.NDArrayIter(X_test, y_test, batch_size, label_name = 'target', shuffle=False)

In [None]:
num_epochs = 10

model.fit(train_data=trainIter, eval_data=testIter,
            initializer=mx.init.Xavier(rnd_type="gaussian", magnitude=1),
            optimizer="adam",
            optimizer_params={"learning_rate": 1E-3},
            eval_metric="mse", num_epoch=num_epochs)

In [None]:
test_preds = model.predict(testIter)
plot_true_data(y_test)
plot_results(test_preds.asnumpy(), y_test)


# Train with AMZN data
By now you should know what to expect.

In [None]:
# Load AMZN stock price
filename = 'data/amzn.csv'
X_train, y_train, X_test, y_test = load_data(filename, seq_len, True, [4])

In [None]:
# Create the MXNet NDArray Iterators for training and evaluation dataset
trainIter = mx.io.NDArrayIter(X_train, y_train, batch_size, label_name = 'target', shuffle=False)
testIter = mx.io.NDArrayIter(X_test, y_test, batch_size, label_name = 'target', shuffle=False)

In [None]:
num_epochs = 10

model.fit(train_data=trainIter, eval_data=testIter,
            initializer=mx.init.Xavier(rnd_type="gaussian", magnitude=1),
            optimizer="adam",
            optimizer_params={"learning_rate": 1E-3},
            eval_metric="mse", num_epoch=num_epochs)

In [None]:
test_preds = model.predict(testIter)
plot_true_data(y_test)
plot_results(test_preds.asnumpy(), y_test)