# Long Short Term Memory for Time Series based Forecasting Problems

In [28]:
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
import numpy as np

## Defining Hyperparameters

In [77]:
BATCH_SIZE = 7
WINDOW_SIZE = 7
HIDDEN_UNITS = 256
FEATURES = 1
GRAD_CLIP = 7 #Clip gradients at this value if they go over it.
LR = 0.001
EPOCHS = 700
NUM_FUT_PRED = 2 #No. of future time steps to predict
#TARGET_FEATURE_INDEX = 0

Since inputs to the LSTM Network is of shape [observations, time steps, features], therefore we store these using $\verb|tf.placeholder|$

In [65]:
inputs = tf.placeholder(tf.float32,(BATCH_SIZE,WINDOW_SIZE,FEATURES))
targets = tf.placeholder(tf.float32,(BATCH_SIZE,NUM_FUT_PRED))

The governing equations for the LSTM Cell are:

$c_t = f\circ c_{t-1} + i \circ g$   

$h_t = o \circ tanh(c_t)$

where, $c_t$ is the hidden cell state

## 1. INPUT GATE

$i = \sigma(W*[h_{t-1},x_t] + b_i)$

In [66]:
weights_input_gate = tf.Variable(tf.truncated_normal([FEATURES,HIDDEN_UNITS],stddev = 0.001))
weights_input_hidden = tf.Variable(tf.truncated_normal([HIDDEN_UNITS,HIDDEN_UNITS],stddev = 0.001))
bias_input = tf.Variable(tf.zeros([HIDDEN_UNITS]))

## 2. FORGET GATE

$f = \sigma (W*[h_{t-1},x_t] + b_f)$

In [67]:
weights_forget_gate = tf.Variable(tf.truncated_normal([FEATURES,HIDDEN_UNITS],stddev = 0.001))
weights_forget_hidden = tf.Variable(tf.truncated_normal([HIDDEN_UNITS,HIDDEN_UNITS],stddev = 0.001))
bias_forget = tf.Variable(tf.zeros([HIDDEN_UNITS]))

## 3. OUTPUT GATE

$o = \sigma(W*[h_{t-1},x_t] + b_o)$

In [68]:
weights_output_gate = tf.Variable(tf.truncated_normal([FEATURES,HIDDEN_UNITS],stddev = 0.001))
weights_output_hidden = tf.Variable(tf.truncated_normal([HIDDEN_UNITS,HIDDEN_UNITS],stddev = 0.001))
bias_output = tf.Variable(tf.zeros([HIDDEN_UNITS]))

## 4. GATE GATE (Also, Memory Gate)

$g = \tanh(W*[h_{t-1},x_t] + b_g)$

In [69]:
weights_gate_gate = tf.Variable(tf.truncated_normal([FEATURES,HIDDEN_UNITS],stddev = 0.001))
weights_gate_hidden = tf.Variable(tf.truncated_normal([HIDDEN_UNITS,HIDDEN_UNITS],stddev = 0.001))
bias_gate = tf.Variable(tf.zeros([HIDDEN_UNITS]))

### OUTPUT LAYER

In [70]:
outputs_weight = tf.Variable(tf.truncated_normal([HIDDEN_UNITS,NUM_FUT_PRED],stddev = 0.001))
outputs_bias = tf.Variable(tf.zeros([NUM_FUT_PRED]))

#### LSTM INTERNAL LOOP

In [71]:
def LSTMCell(inp, prev_out, prev_hidden_cell_state):
    i = tf.sigmoid(tf.matmul(inp,weights_input_gate) + tf.matmul(prev_out,weights_input_hidden) + bias_input)
    f = tf.sigmoid(tf.matmul(inp,weights_forget_gate) + tf.matmul(prev_out,weights_forget_hidden) + bias_forget)
    o = tf.sigmoid(tf.matmul(inp,weights_output_gate) + tf.matmul(prev_out,weights_output_hidden) + bias_output)
    g = tf.tanh(tf.matmul(inp,weights_gate_gate) + tf.matmul(prev_out,weights_gate_hidden) + bias_gate)
    
    hidden_cell_state = f * prev_hidden_cell_state + i * g
    out = o * tf.tanh(prev_hidden_cell_state)
    
    return out,hidden_cell_state

In [95]:
LSTMCell(tf.reshape(inputs[0][0],(-1,1)), np.zeros([NUM_FUT_PRED,HIDDEN_UNITS],dtype = np.float32), np.zeros([FEATURES,HIDDEN_UNITS],dtype = np.float32))

(<tf.Tensor 'mul_1178:0' shape=(2, 256) dtype=float32>,
 <tf.Tensor 'add_3592:0' shape=(2, 256) dtype=float32>)

In [90]:
def LSTM_Network_Loop(input_placeholder):
    outputs = []
    for i in range(BATCH_SIZE):
        #for each batch, set the states
        batch_hidden_cell_state = np.zeros([FEATURES,HIDDEN_UNITS],dtype = np.float32)
        batch_output = np.zeros([NUM_FUT_PRED,HIDDEN_UNITS],dtype = np.float32)
        
        for feature_num in range(WINDOW_SIZE):
            batch_output, batch_hidden_cell_state = LSTMCell(tf.reshape(input_placeholder[i][feature_num],(-1,1)), batch_output, batch_hidden_cell_state)
            
        outputs.append(tf.matmul(batch_output,outputs_weight) + outputs_bias)
    return outputs

In [91]:
LSTM_output = LSTM_Network_Loop(inputs)

## Loss Function

We'll Use Mean Squared Error Loss

In [79]:
tf.losses.mean_squared_error?

[0;31mSignature:[0m
[0mtf[0m[0;34m.[0m[0mlosses[0m[0;34m.[0m[0mmean_squared_error[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mlabels[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpredictions[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mweights[0m[0;34m=[0m[0;36m1.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mscope[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mloss_collection[0m[0;34m=[0m[0;34m'losses'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mreduction[0m[0;34m=[0m[0;34m'weighted_sum_by_nonzero_weights'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Adds a Sum-of-Squares loss to the training procedure.

`weights` acts as a coefficient for the loss. If a scalar is provided, then
the loss is simply scaled by the given value. If `weights` is a tensor of size
[batch_size], then the total loss for each sample of the batch is rescaled
by the corresponding element in the `weights` ve

In [106]:
def loss(target_placeholder,LSTM_output):
    losses = []
    
    for i in range(len(LSTM_output)):
        losses.append(tf.losses.mean_squared_error(tf.reshape(target_placeholder[i],(-1,1)),tf.reshape(tf.diag_part(LSTM_output[i]),(-1,1))))
    
    loss = tf.reduce_mean(losses)
    
    return loss

In [101]:
loss(targets)

<tf.Tensor 'Mean_3:0' shape=() dtype=float32>

In [102]:
LSTM_output

[<tf.Tensor 'add_3199:0' shape=(2, 2) dtype=float32>,
 <tf.Tensor 'add_3263:0' shape=(2, 2) dtype=float32>,
 <tf.Tensor 'add_3327:0' shape=(2, 2) dtype=float32>,
 <tf.Tensor 'add_3391:0' shape=(2, 2) dtype=float32>,
 <tf.Tensor 'add_3455:0' shape=(2, 2) dtype=float32>,
 <tf.Tensor 'add_3519:0' shape=(2, 2) dtype=float32>,
 <tf.Tensor 'add_3583:0' shape=(2, 2) dtype=float32>]

In [103]:
targets

<tf.Tensor 'Placeholder_10:0' shape=(7, 2) dtype=float32>

In [104]:
tf.diag_part?

[0;31mSignature:[0m [0mtf[0m[0;34m.[0m[0mdiag_part[0m[0;34m([0m[0minput[0m[0;34m,[0m [0mname[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Returns the diagonal part of the tensor.

This operation returns a tensor with the `diagonal` part
of the `input`. The `diagonal` part is computed as follows:

Assume `input` has dimensions `[D1,..., Dk, D1,..., Dk]`, then the output is a
tensor of rank `k` with dimensions `[D1,..., Dk]` where:

`diagonal[i1,..., ik] = input[i1, ..., ik, i1,..., ik]`.

For example:

```
# 'input' is [[1, 0, 0, 0]
              [0, 2, 0, 0]
              [0, 0, 3, 0]
              [0, 0, 0, 4]]

tf.diag_part(input) ==> [1, 2, 3, 4]
```

Args:
  input: A `Tensor`. Must be one of the following types: `float32`, `float64`, `int32`, `int64`, `complex64`, `complex128`.
    Rank k tensor where k is 2, 4, or 6.
  name: A name for the operation (optional).

Returns:
  A `Tensor`. Has the same type as `input`. The ex

### TRAINING FUNCTION

In [108]:
def train(X_train,y_train, EPOCHS ,BATCH_SIZE,WINDOW_SIZE,FEATURES,NUM_FUT_PRED, sitrep_at_epoch = 0.2): 
    '''X_train should be of shape (N, WINDOW_SIZE, FEATURES)'''
    
    inputs = tf.placeholder(tf.float32,(BATCH_SIZE,WINDOW_SIZE,FEATURES))
    targets = tf.placeholder(tf.float32,(BATCH_SIZE,NUM_FUT_PRED))
    
    LSTM_outputs = LSTM_Network_Loop(inputs)
    
    MSE_loss = loss(targets,LSTM_outputs)
    
    optimizer = tf.train.AdamOptimizer.minimize(MSE_loss)
    
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    
    loss = []
    train_out = []
    
    for i in range(EPOCHS):
        trained_scores = []
        j = 0
        while (j+BATCH_SIZE) <=len(X_train):
            X_batch = X_train[j:j+BATCH_SIZE]
            y_batch = y_train[j:j+BATCH_SIZE]
            
            out, epoch_loss, _ = session.run([LSTM_outputs, MSE_loss, optimizer], feed_dict={inputs:X_batch, targets:y_batch})
            
            train_out.append(out)
            loss.append(epoch_loss)
            j+=BATCH_SIZE
        
        if (i/EPOCHS) == sitrep_at_epoch:
            print("At EPOCH: {}/{}, loss : {}".format(i,EPOCHS,loss))
            
    return session
            