In [None]:
# Math of LSTM Networks
# https://github.com/llSourcell/LSTM_Networks/blob/master/LSTM%20Demo.ipynb
# http://colah.github.io/posts/2015-08-Understanding-LSTMs/
import numpy as np

class RecurrentNeuralNetwork:
    #input (word), expected output (next workd), num of words, (num of recurrences), array expected outputs
    # learning rate
    def __init__(self, xs, ys, rl, eo, lr):
        # initial input (first word)
        self.x = np.zeros(xs)
        # input size
        self.xs = xs
        # expected output (next word)
        self.y = np.zeros(ys)
        # output size 
        self.ys = ys
        # Weights matrix for interpreting results from LSTM cell (num words x num words matrix)
        self.w = np.random.random((ys, ys)) # Weights between INPUT and HIDDEN MATRIX
        # matrix used in RMSprop
        self.G = np.zeros_like(self.w) # Technic for gradient descent to decay the learning rate
        # length of the recurrent network - number of recurrences ie num of words
        self.rl = rl
        # learning rate
        self.lr = lr
        
        # INPUTS
        # array for storing inputs
        self.ia = np.zeros((rl + 1, xs))
        # array for storing cell states
        self.ca = np.zeros((rl + 1, ys))
        # array for storing outputs
        self.oa = np.zeros((rl + 1, ys))
        # array for storing hidden states
        self.ha = np.zeros((rl + 1, ys))
        
        # GATE VALUES
        # forget gate
        self.af = np.zeros((rl + 1, ys))
        # input gate
        self.ai = np.zeros((rl + 1, ys))
        # cell gate
        self.ac = np.zeros((rl + 1, ys))
        # output gate
        self.ao = np.zeros((rl + 1, ys))
        
        # array of expected output values
        self.eo = np.vstack((np.zeros(eo.shape[0], eo.T)))
        # declare LSTM cell (input, output, amount of recurrence, learning rate)
        self.LSTM = LSTM(xs, ys, rl, lr)
        
    # activation function. Simple non linearity, converts nums into probabilities between o and 1
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    # Derivative of sigmoid function used to compute gradients for backpropagation
    def dsigmoid(self, x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))
    
    # Lets apply a series of matrix operations to our input to compute (current word) a predicted output (next word)
    def forwardProp(self):
        for i range(1, self.rl + 1):
            self.LSTM.x = np.hstack((self.ha[i-1], self.x))  # Combination of previous output and previous hidden state
            cs, hs, f, c, o = self.LSTM.forwardProp() # Computed cell state, hidden state, forget gate, cell state, output
            # store computed cell state
            self.ca[i] = cs
            self.ha[i] = hs
            self.af[i] = f
            self.ai[i] = inp
            self.ac[i] = c
            self.ao[i] = o
            self.oa[i] = self.sigmoid(np.dot(self.w, hs))
            self.x = self.eo[i-1]
        return self.oa
    
    def backProp(self):
        # Update our weight matrices (Both in our Recurrent network, as well as the weight matrices inside LSTM cell)
        # init an empty error value
        totalError = 0
        # initialize matrices for gradient updates
        # first, these are RNN level gradients
        # cell state
        dfcs = np.zeros(self.ys)
        # hidden state
        dfhs = np.zeros(self.ys)
        # weight matrix
        tu = np.zeros((self.ys, self.ys))
        # Next, these are LSTM level gradients
        # forget gate
        tfu = np.zeros((self.ys, self.xs + self.ys))
        # input gate
        tiu = np.zeros((self.ys, self.xs + self.ys))
        # cell unit
        tcu = np.zeros((self.ys, self.xs + self.ys))
        # output gate
        tou = np.zeros((self.ys, self.xs + self.ys))
        
        for i in range(self.rl, -1, -1):
            # error = calculated output - expected output
            error = self.oa[i] - self.eo[i]
            
            # Calculate update for weight matrix
            # (error * derivative of the output) * hidden state
            tu += np.dot(np.atleast_2d(error * self.dsigmoid(self.oa[i])), np.atleast_2d(self.ha[i]).T)
            
            # Time to propagate error back to exit of LSTM cell
            # 1. error * RNN weight matrix
            error = np.dot(error, self.w)
            
            # 2. set input values of LSTM cell for recurrence i (horizontal stack of arrays, hidden + input)
            self.LSTM.x = np.hstack((self.ha[i-1], self.ia[i]))
            
            # 3. Set cell state of LSTM cell for recurrence i (pre-updates)
            self.LSTM.cs = self.ca[i]
            
            # Finally, call the LSTM cell's backprop, retrieve gradient updates
            # gradient updates for forget, input, cell unit, and output gates + cell states and hidden states
            fu, iu, cu, ou, dfcs, dfhs = self.LSTM.backProp(error, 
                self.ca[i-1], self.af[i], self.ai[i], self.ac[i], self.ao[i], dfcs, dfhs)
            
            # Calculate total error(not necessary, used to measure training progress)
            totalError += np.sum(error)
            
            # Accumulate all gradient updates
            # Forget Gate
            tfu += fu
            # Input gate
            tiu += iu
            # Cell State
            tcu += cu
            # Output Gate
            tou += ou
            pass
        
        # Update LSTM matrices with average of accumulated gradient upddates
        self.LSTM.update(tfu/self.rl, tiu/self.rl, tou/self.rl)
        # Update weight matrix with average of accumulated gradient updates
        self.update(tu/self.r1)
        # return total error of this iteration
        return totalError
    
    def update(self, u):
        # Vanilla implementation of RMSProp
        self.G = 0.9 * self.G + 0.1 * u**2
        self.w -= self.lr/np.sqrt(self.G + 1e-8) * u
        return
    
    # This is where we generate some sample text after having fully trained our model
    # ie. error is below some threshold
    def sample(self):
        # loop through recurrences . - start at 1 so the oth entry of all array will be an array of 0s
        for i in range(1, self.rl+1) :
            # Set input for LSTM cell, combination of input (previous ouput) and previous hidden state
            self.LSTM.x = np.hstack((self.ha[i-1], self.x))
            
            # Run forward prop on the LSTM cell, retrieve cell state and hidden state
            cs, hs, f, inp, c, o = self.LSTM.forwardProp()
            
            # Store input as vector
            maxI = np.argmax(self.x)
            self.x = np.zeros_like(self.x)
            self.x[maxI] = 1
            self.ia[i] = self.x #USe np.argmax?
            
            # Store cell states
            self.ca[i] = cs
            # Store hidden state
            self.ha[i] = hs
            # Forget gate
            self.af[i] = f
            # Input gate
            self.ai[i] = inp
            # Cell state
            self.ac[i] = c
            # Output gate
            self.ao[i] = o
            
            # Calculate output by multiplying hidden state with weight matrix
            self.oa[i] = self.sigmoid(np.dot(self.w, hs))
            
            # Compute new input
            maxI = np.argmax(self.oa[i])
            newX = np.zeros_like(self.x)
            nexX[maxI] = 1
            self.x = newX
        # return all outputs
        return self.oa
    

    

In [None]:
class LSTM:
    
    def __init__(self):
        
    def sigmoid(self, x):
        return null
        
    def dsigmoid(self, x):
        return null
    
    def tangent(self, x):
        return null
    
    def dtangent(self, x):
        return null
    
    def forwardProp(self):
        return null
    
    def backProp(self):
        return null
    
    def update(self):
        return null
     
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
       