In [1]:
import numpy as np               #for maths
import pandas as pd              #for data manipulation
import matplotlib.pyplot as plt  #for visualization
import torch.nn as nn

In [7]:
input_units = 100
hidden_units = 256
output_units = 1000
learning_rate = 0.005
beta1 = 0.9
beta2 = 0.99

### Activation function

In [3]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [4]:
def tanh(x):
    return np.tanh(x)

In [5]:
def softmax(x):
    exp_x = np.exp(x)
    exp_x_sum = np.sum(exp_x, axis=1).reshape(-1,1)
    exp_x = exp_x/exp_x_sum

    return exp_x

### LSTM cell from scratch

In [8]:
def init_parameters():
    mean = 0
    std = 0.01

    #lstm cell weights
    forget_gate_weights = np.random.normal(mean,std,(input_units+hidden_units,hidden_units))
    input_gate_weights  = np.random.normal(mean,std,(input_units+hidden_units,hidden_units))
    output_gate_weights = np.random.normal(mean,std,(input_units+hidden_units,hidden_units))
    gate_gate_weights   = np.random.normal(mean,std,(input_units+hidden_units,hidden_units))

    hidden_output_weights = np.random.normal(mean, std, (hidden_units, output_units))

    parameters = dict()
    parameters['fgw'] = forget_gate_weights
    parameters['igw'] = input_gate_weights
    parameters['ogw'] = output_gate_weights
    parameters['ggw'] = gate_gate_weights
    parameters['how'] = hidden_output_weights

    return parameters

![image](LSTM.jpg)

Equations

* fa = sigmoid(Wf x [xt,at-1])
* ia = sigmoid(Wi x [xt,at-1])
* ga = tanh(Wg x [xt,at-1])
* oa = sigmoid(Wo x [xt,at-1])
* ct = (fa x ct-1) + (ia x ga)
* at = oa x tanh(ct)

In [None]:
def lstm_cell(batch_dataset, prev_activation_matrix, prev_cell_matrix, parameters):

    fgw = parameters['fgw']
    igw = parameters['igw']
    ogw = parameters['ogw']
    ggw = parameters['ggw']

    concat_dataset = np.concatenate((batch_dataset, prev_activation_matrix), axis=1)

    fa = np.matmul(concat_dataset, fgw)
    fa = sigmoid(fa)

    ia = np.matmul(concat_dataset, igw)
    ia = sigmoid(ia)

    oa = np.matmul(concat_dataset, ogw)
    oa = sigmoid(oa)

    ga = np.matmul(concat_dataset, ggw)
    ga = tanh(ga)

    cell_memory_matrix = np.multiply(fa, prev_cell_matrix) + np.multiply(ia, ga)
    activation_matrix = np.multiply(oa, tanh(cell_memory_matrix))

    lstm_activations = dict()
    lstm_activations['fa'] = fa
    lstm_activations['ia'] = ia
    lstm_activations['oa'] = oa
    lstm_activations['ga'] = ga

    return lstm_activations, cell_memory_matrix, activation_matrix


### Build LSTM model