# Implementation of Long short-term memory (LSTM) 
# module with numpy


<img src="lstm1.png" width="500px">

This implementation is based on materials from [[1](https://iamtrask.github.io/2015/11/15/anyone-can-code-lstm/)].

In [1]:
import numpy as np
np.random.seed(0)

# Activation functions

In [41]:
def sigmoid(z):
    return 1./(1 + np.exp(-z))

def tanh(z):
    return np.tanh(z)

def softmax(z):
    s = np.exp(z)/( np.sum( np.exp(z) ) )
    return s

# Parameters

In [47]:
np.random.randn(3, 1)

array([[ 3.4379513 ],
       [-0.50261336],
       [ 1.33089875]])

In [81]:
n_x_features = 3 #Number of features in input vector
n_inputs = 1 #number of input examples
n_hidden_units = 2
n_output_units = 3
# input at step t
x_t = np.random.randn(n_inputs, n_x_features)
# previous hidden layer output h
h_prev = np.random.randn( n_inputs, n_hidden_units )
#previous state same shape as prev hidden layer
c_prev = np.random.randn( *h_prev.shape )
concat_input = np.concatenate((h_prev, x_t), axis=1) #

print("Shape of x_t: {} | h_prev: {} | c_prev: {}".format(x_t.shape, h_prev.shape, c_prev.shape) )
print("Shape of concat_input: {}".format(concat_input.shape) )


Shape of x_t: (1, 3) | h_prev: (1, 2) | c_prev: (1, 2)
Shape of concat_input: (1, 5)


# Forget Gate

![](lstm2.png)

In [82]:
bias_f = np.ones([n_hidden_units])
weight_f = np.random.normal(loc=0.0, scale=0.1, size=(concat_input.shape[1], n_hidden_units) )
print("Forget weight shape: {}".format(weight_f.shape) )
#concat x and h
z_f = np.dot(concat_input, weight_f) + bias_f
f_t = sigmoid( z_f )

Forget weight shape: (5, 2)


# Compute state at step t


![](lstm3.png)


In [83]:
weight_i = np.random.normal(loc=0.0, scale=0.1, size=(concat_input.shape[1], n_hidden_units) )
weight_ctild = np.random.normal(loc=0.0, scale=0.1, size=(concat_input.shape[1], n_hidden_units) )
bias_i = np.ones( [n_hidden_units] )
bias_ctild = np.ones( [n_hidden_units] )
z_i = np.dot( concat_input, weight_i ) + bias_i
z_ctild = np.dot( concat_input, weight_ctild ) + bias_ctild

#apply activation
i_t = sigmoid(z_i)
ctild_t = tanh( z_ctild )

#state at time step t
c_t = f_t * c_prev + i_t * ctild_t
print("State at time t: {}".format(c_t))

State at time t: [[ 0.62436412  0.09535652]]


# Compute hidden Output

![](lstm4.png)

In [84]:
weight_o = np.random.normal(loc=0.0, scale=0.1, size=(concat_input.shape[1], n_hidden_units) )
bias_o = np.ones( [n_hidden_units] )
z_o = np.dot( concat_input, weight_o ) + bias_o
o_t = sigmoid( z_o )
h_t = o_t * tanh(c_t)

# Compute final output using h_t (FCN)


In [85]:
weight_final = np.random.normal(loc=0.0, scale=0.1, size=(n_hidden_units, n_output_units) )
bias_output = np.ones([n_output_units])
z_final = np.dot(h_t, weight_final) + bias_output
output_t = softmax(z_final)


# Backpropagation through time (BPTT)# Concatenate