# Text generation using LSTM network

Steps
1. Build RNN class
2. Build LSTM Cell Class
3. Data Loading Functions
4. Training time

In [1]:
import numpy as np

In [25]:
class RecurrentNeuralNetwork:
    #input (word), expected output (next word), num of words (num of recurrences), array expected outputs, learning rate
    def __init__(self,xs,ys,rl,eo,lr):
        self.x = np.zeros(xs)
        self.xs = xs
        self.y = np.zeros(ys)
        self.ys = ys
        self.w = np.random.random((ys,ys))
        self.G = np.zeros_like(self.w)
        self.rl = rl
        self.lr = lr
        
        # stroing inputs
        self.ia = np.zeros((rl+1,xs))
        self.ca = np.zeros((rl+1,ys))
        self.oa = np.zeros((rl+1,ys))
        self.ha = np.zeros((rl+1,ys))
        self.af = np.zeros((rl+1,ys))
        self.ai = np.zeros((rl+1,ys))
        self.ac = np.zeros((rl+1,ys))
        self.ao = np.zeros((rl+1,ys))
        self.eo = np.vstack((np.zeros(eo.shape[0]), eo.T))
        self.LSTM = LSTM(xs, ys, rl, lr)
        
    # activaion
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))
    
    # derivative of the sigmoid    
    def dsigmoid(self,x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))
    
    def forwardProp(self):
        for i in range(1, self.rl+1):
            self.LSTM.x = np.hstack((self.ha[i-1], self.x))
            # self.cs, self.y, f, i, c, o
            cs, hs, f, inp, c, o = self.LSTM.forwardProp()
            #store computed cell state
            self.ca[i] = cs
            self.ha[i] = hs
            self.af[i] = f
            self.ai[i] = inp
            self.ac[i] = c
            self.ao[i] = o
            self.oa[i] = self.sigmoid(np.dot(self.w, hs))
            self.x = self.eo[i-1]
        return self.oa
    
    def backProp(self):
        totalError = 0
        # cell state
        dfcs = np.zeros(self.ys)
        # hidden state
        dfhs = np.zeros(self.ys)
        # weight matrix
        tu = np.zeros((self.ys,self.ys))
        # forget gate
        tfu = np.zeros((self.ys, self.xs+self.ys))
        # input gate
        tiu = np.zeros((self.ys, self.xs+self.ys))
        #cell unit
        tcu = np.zeros((self.ys, self.xs+self.ys))
        #output gate
        tou = np.zeros((self.ys, self.xs+self.ys))
        #loop backwards through recurrences
        for i in range(self.rl, -1, -1):
            #error 
            error = self.oa[i] - self.eo[i]
            tu += np.dot(np.atleast_2d(error * self.dsigmoid(self.oa[i])), np.atleast_2d(self.ha[i]).T)    
            error = np.dot(error, self.w)
            self.LSTM.x = np.hstack((self.ha[i-1], self.ia[i]))            
            self.LSTM.cs = self.ca[i]            
            fu, iu, cu, ou, dfcs, dfhs = self.LSTM.backProp(error, self.ca[i-1], self.af[i], self.ai[i], self.ac[i], self.ao[i], dfcs, dfhs)            
            totalError += np.sum(error)            
            tfu += fu            
            tiu += iu           
            tcu += cu            
            tou += ou
       
        self.LSTM.update(tfu/self.rl, tiu/self.rl, tcu/self.rl, tou/self.rl) 
        self.update(tu/self.rl)        
        return totalError
        
    def update(self,u):
        self.G = 0.9 * self.G + 0.1 * u**2
        self.w -= self.lr/np.sqrt(self.G + 1e-8) * u
        return
    def sample(self):
        for i in range(1, self.rl+1):
            #set input for LSTM cell, combination of input (previous output) and previous hidden state
            self.LSTM.x = np.hstack((self.ha[i-1], self.x))
            #run forward prop on the LSTM cell, retrieve cell state and hidden state
            cs, hs, f, inp, c, o = self.LSTM.forwardProp()
            #store input as vector
            maxI = np.argmax(self.x)
            self.x = np.zeros_like(self.x)
            self.x[maxI] = 1
            self.ia[i] = self.x 
            #store cell states
            self.ca[i] = cs            
            self.ha[i] = hs           
            self.af[i] = f           
            self.ai[i] = inp           
            self.ac[i] = c            
            self.ao[i] = o
            #calculate output by multiplying hidden state with weight matrix
            self.oa[i] = self.sigmoid(np.dot(self.w, hs))
            #compute new input
            maxI = np.argmax(self.oa[i])
            newX = np.zeros_like(self.x)
            newX[maxI] = 1
            self.x = newX
        #return all outputs    
        return self.oa
            

In [10]:
class LSTM:
    # LSTM cell (input, output, amount of recurrence, learning rate)
    def __init__ (self, xs, ys, rl, lr):        
        self.x = np.zeros(xs+ys)        
        self.xs = xs + ys       
        self.y = np.zeros(ys)        
        self.ys = ys       
        self.cs = np.zeros(ys)       
        self.rl = rl        
        self.lr = lr      
        # gates
        self.f = np.random.random((ys, xs+ys))       
        self.i = np.random.random((ys, xs+ys))        
        self.c = np.random.random((ys, xs+ys))       
        self.o = np.random.random((ys, xs+ys))       
        self.Gf = np.zeros_like(self.f)
        
        self.Gi = np.zeros_like(self.i)        
        self.Gc = np.zeros_like(self.c)        
        self.Go = np.zeros_like(self.o)
    
    #activation function 
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    #derivative of sigmoid 
    def dsigmoid(self, x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))
    
    
    def tangent(self, x):
        return np.tanh(x)
    
    #derivative for computing gradients
    def dtangent(self, x):
        return 1 - np.tanh(x)**2
    
    #lets compute a series of matrix multiplications to convert our input into our output
    def forwardProp(self):
        f = self.sigmoid(np.dot(self.f, self.x))
        self.cs *= f
        i = self.sigmoid(np.dot(self.i, self.x))
        c = self.tangent(np.dot(self.c, self.x))
        self.cs += i * c
        o = self.sigmoid(np.dot(self.o, self.x))
        self.y = o * self.tangent(self.cs)
        return self.cs, self.y, f, i, c, o
    
   
    def backProp(self, e, pcs, f, i, c, o, dfcs, dfhs):        
        e = np.clip(e + dfhs, -6, 6)        
        do = self.tangent(self.cs) * e       
        ou = np.dot(np.atleast_2d(do * self.dtangent(o)).T, np.atleast_2d(self.x))       
        dcs = np.clip(e * o * self.dtangent(self.cs) + dfcs, -6, 6)       
        dc = dcs * i
        cu = np.dot(np.atleast_2d(dc * self.dtangent(c)).T, np.atleast_2d(self.x))        
        di = dcs * c       
        iu = np.dot(np.atleast_2d(di * self.dsigmoid(i)).T, np.atleast_2d(self.x))        
        df = dcs * pcs        
        fu = np.dot(np.atleast_2d(df * self.dsigmoid(f)).T, np.atleast_2d(self.x))        
        dpcs = dcs * f       
        dphs = np.dot(dc, self.c)[:self.ys] + np.dot(do, self.o)[:self.ys] + np.dot(di, self.i)[:self.ys] + np.dot(df, self.f)[:self.ys] 
        #return update gradinets for forget, input, cell, output, cell state, hidden state
        return fu, iu, cu, ou, dpcs, dphs
            
    def update(self, fu, iu, cu, ou):
        #update forget, input, cell, and output gradients
        self.Gf = 0.9 * self.Gf + 0.1 * fu**2 
        self.Gi = 0.9 * self.Gi + 0.1 * iu**2   
        self.Gc = 0.9 * self.Gc + 0.1 * cu**2   
        self.Go = 0.9 * self.Go + 0.1 * ou**2   
        
        #update our gates using our gradients
        self.f -= self.lr/np.sqrt(self.Gf + 1e-8) * fu
        self.i -= self.lr/np.sqrt(self.Gi + 1e-8) * iu
        self.c -= self.lr/np.sqrt(self.Gc + 1e-8) * cu
        self.o -= self.lr/np.sqrt(self.Go + 1e-8) * ou
        return

In [5]:
def LoadText():
    #open text and return input and output data (series of words)
    with open("./data/eminem.txt", "r") as text_file:
        data = text_file.read()
    text = list(data)
    outputSize = len(text)
    data = list(set(text))
    uniqueWords, dataSize = len(data), len(data) 
    returnData = np.zeros((uniqueWords, dataSize))
    for i in range(0, dataSize):
        returnData[i][i] = 1
    returnData = np.append(returnData, np.atleast_2d(data), axis=0)
    output = np.zeros((uniqueWords, outputSize))
    for i in range(0, outputSize):
        index = np.where(np.asarray(data) == text[i])
        output[:,i] = returnData[0:-1,index[0]].astype(float).ravel()  
    return returnData, uniqueWords, output, outputSize, data

#write the predicted output (series of words) to disk
def ExportText(output, data):
    finalOutput = np.zeros_like(output)
    prob = np.zeros_like(output[0])
    outputText = ""
    print(len(data))
    print(output.shape[0])
    for i in range(0, output.shape[0]):
        for j in range(0, output.shape[1]):
            prob[j] = output[i][j] / np.sum(output[i])
        outputText += np.random.choice(data, p=prob)    
    with open("./data/output.txt", "w") as text_file:
        text_file.write(outputText)
    return

In [27]:
#Begin program    
print("Beginning")
iterations = 5000
learningRate = 0.001
#load input output data (words)
returnData, numCategories, expectedOutput, outputSize, data = LoadText()
print("Done Reading")
#init our RNN using our hyperparams and dataset

RNN = RecurrentNeuralNetwork(numCategories, numCategories, outputSize, expectedOutput, learningRate)

#training time!
for i in range(1, iterations):
    #compute predicted next word
    RNN.forwardProp()
    #update all our weights using our error
    error = RNN.backProp()
    #once our error/loss is small enough
    print("Error on iteration ", i, ": ", error)
    if error > -100 and error < 100 or i % 100 == 0:
        #we can finally define a seed word
        seed = np.zeros_like(RNN.x)
        maxI = np.argmax(np.random.random(RNN.x.shape))
        seed[maxI] = 1
        RNN.x = seed  
        #and predict some new text!
        output = RNN.sample()
        print(output)    
        #write it all to disk
        ExportText(output, data)
        print("Done Writing")
print("Complete")


Beginning
Done Reading
Error on iteration  1 :  9075603.62346
Error on iteration  2 :  9019862.93407
Error on iteration  3 :  8978144.85976
Error on iteration  4 :  8943192.00947
Error on iteration  5 :  8912165.97083
Error on iteration  6 :  8883734.75863
Error on iteration  7 :  8857155.93685
Error on iteration  8 :  8831967.61349
Error on iteration  9 :  8807860.42884
Error on iteration  10 :  8784615.90188
Error on iteration  11 :  8762073.4507
Error on iteration  12 :  8740111.32434
Error on iteration  13 :  8718634.89436
Error on iteration  14 :  8697569.1118
Error on iteration  15 :  8676853.45301
Error on iteration  16 :  8656438.42017
Error on iteration  17 :  8636283.05012
Error on iteration  18 :  8616353.09804
Error on iteration  19 :  8596619.68539
Error on iteration  20 :  8577058.27504
Error on iteration  21 :  8557647.88169
Error on iteration  22 :  8538370.4549
Error on iteration  23 :  8519210.39062
Error on iteration  24 :  8500154.14011
Error on iteration  25 :  848



Done Writing
Error on iteration  101 :  7091871.25416
Error on iteration  102 :  7073678.93611
Error on iteration  103 :  7055486.68411
Error on iteration  104 :  7037294.45126
Error on iteration  105 :  7019102.23566
Error on iteration  106 :  7000910.03558
Error on iteration  107 :  6982717.84946
Error on iteration  108 :  6964525.6759
Error on iteration  109 :  6946333.51364
Error on iteration  110 :  6928141.36156
Error on iteration  111 :  6909949.21863
Error on iteration  112 :  6891757.08393
Error on iteration  113 :  6873564.95664
Error on iteration  114 :  6855372.83602
Error on iteration  115 :  6837180.72138
Error on iteration  116 :  6818988.61214
Error on iteration  117 :  6800796.50774
Error on iteration  118 :  6782604.4077
Error on iteration  119 :  6764412.31158
Error on iteration  120 :  6746220.21897
Error on iteration  121 :  6728028.12954


KeyboardInterrupt: 