In [31]:
import numpy as np
import copy

# Helper functions
def softmax(array):
    return np.exp(array)/ np.sum(np.exp(array)) # return an array

def sigmoid(x):
    return (1/(1+np.exp(-x)))

def sigmoid_deriv(y):
    return (y*(1-y))

def tanh(x):
    return np.tanh(x)

def tanh_deriv(y):
    return 1 - pow(np.tanh(y),2)

# RNN
class basicRNN:
    
    def __init__ (self, lenIn, lenOut, lenRec, sizeHidden, inputs_encoded, targets, learningRate):
        
        # Hyper parameters
        self.lenIn          = lenIn
        self.lenOut         = lenOut
        self.lenRec         = lenRec
        self.sizeHidden     = sizeHidden
        self.learningRate   = learningRate
        
        # input & expected output
        self.inputs_encoded = inputs_encoded;
        self.targets = targets;
        
        # parameters for inference
        self.x  = np.zeros(lenIn)  
        self.y  = np.zeros(lenOut)
        self.h  = np.zeros(sizeHidden)
        
        self.W  = np.zeros((lenOut,sizeHidden)) # for the last fully connected layer        
        self.b  = np.zeros(lenOut)
        
        
        # for training phase 
        self.xs = np.zeros((lenRec,lenIn))
        self.ys = np.zeros((lenRec,lenOut))
        self.hs = np.zeros((lenRec,sizeHidden))
        self.GW = np.zeros((lenOut,sizeHidden)) # Gradient, for W-update using RMSprop
        self.Gb = np.zeros(lenOut)
        
        # CELL class
        self.RNN_cell = RNN_cell(sizeHidden+lenIn,sizeHidden,lenRec,learningRate)
        
        ''' end of basicRNN.__init__ '''
       
    ''' This is used when mini-batch is used '''            
    def update_inputs_targets(self, inputs_encoded, targets):
        self.inputs_encoded  = inputs_encoded
        self.targets         = targets
    
    def fwd_pass(self): 
        prev_h = np.zeros_like(self.hs[0])
        for t in range(0,self.lenRec):
            # update input
            self.x    = self.inputs_encoded[t]
            self.xs[t]= self.inputs_encoded[t]
            
            self.RNN_cell.hx = np.hstack((prev_h, self.x));
           
            h = self.RNN_cell.fwd_pass()
            # bookkeeping
            self.hs[t] = h
            
            # output layer - fully connected layer
            self.ys[t] = np.dot(self.W,self.hs[t]) + self.b
            prev_h = self.hs[t]
            
        return;              
    
    def bwd_pass(self):        

        avg_loss = 0; # using cross entropy average
        h2next_grad  = np.zeros(self.sizeHidden)
        
        # output bp
        W_grad   = np.zeros((self.lenOut,self.sizeHidden))
        b_grad  = np.zeros(self.lenOut)
        hxW_grad  = np.zeros((self.sizeHidden,self.RNN_cell.lenIn));
        hb_grad   = np.zeros((self.sizeHidden));
                   
        # propagates through time and layers

        for t in reversed(range(0,self.lenRec)):
            
            prob = softmax(self.ys[t]) # prevent zero
            prob_fix  = prob + 1e-9

            # cross entropy
            err       = np.log(prob_fix[self.targets[t]])
            avg_loss += err
     
            dy = copy.deepcopy(prob)
            dy[self.targets[t]] -= 1
            
            W_grad += np.dot((np.atleast_2d(dy)).T,np.atleast_2d(self.hs[t]))
            b_grad += dy
            
            dh = np.dot(self.W.T,dy) + h2next_grad
            
            x_grad  = np.zeros(self.lenIn)
            
            if(t > 0):
                prev_h = self.hs[t-1]
            else:
                prev_h = np.zeros_like(self.hs[0])
                
            self.RNN_cell.hx = np.hstack((prev_h,self.xs[t]))
            self.RNN_cell.h  = self.hs[t]

            dhxW, dhb, h2next_grad,x_grad = \
            self.RNN_cell.bwd_pass( dh );
            
            hxW_grad  +=  dhxW
            hb_grad   +=  dhb
            
        self.RNN_cell.update(hxW_grad/self.lenRec, hb_grad/self.lenRec);
        
        self.update(W_grad/self.lenRec,b_grad/self.lenRec);
        return avg_loss/self.lenRec;
            
          
            
    def update(self, W_grad, b_grad):
        self.GW = self.GW + W_grad**2;
        self.W -= self.learningRate/np.sqrt(self.GW + 1e-8) * W_grad;
        self.Gb = self.Gb + b_grad**2;
        self.b -= self.learningRate/np.sqrt(self.Gb + 1e-8) * b_grad;

    def inference(self,x):
        # update input
        self.x = x
        self.RNN_cell.hx = np.hstack((self.h,self.x))
        self.h = self.RNN_cell.fwd_pass()

        # output layer - may replace with softmax instead
        self.y = np.dot(self.W,self.h) + self.b
        p   = softmax(self.y)
        
        
        return np.random.choice(range(self.lenIn), p=p.ravel())
  


In [32]:
class RNN_cell:
    
    def __init__ (self,lenIn,sizeHidden,lenRec,learningRate):
        self.lenIn        = lenIn
        self.sizeHidden   = sizeHidden
        self.lenRec       = lenRec
        self.learningRate = learningRate
        
        # hx == x is x and h horizontally stacked together
        self.hx = np.zeros(lenIn)
        self.h = np.zeros(sizeHidden)
        
        # Weight matrices
        self.hxW = np.random.random((sizeHidden,lenIn));

        # biases
        self.hb = np.zeros(sizeHidden);
              
        # for RMSprop only
        self.GhxW = np.random.random((sizeHidden,lenIn));
        self.Ghb = np.zeros(sizeHidden);

        
        ''' end of RNN_cell.__init__ '''
        
    def fwd_pass(self):
        self.h = tanh(np.dot(self.hxW, self.hx) + self.hb)       
        return self.h;
    
    def bwd_pass(self, dh):
        
        #dh = np.clip(dh, -6, 6);       
        # h = o*tanh(c)
        dh  = tanh_deriv(self.h) * dh
        dhb = dh
        dhxW = np.dot((np.atleast_2d(dh)).T,np.atleast_2d(self.hx)) 
        
        hx_grad = np.dot(self.hxW.T, dh)
               
        return dhxW, dhb, hx_grad[:self.sizeHidden],hx_grad[self.sizeHidden:];
    
    def update(self, hxW_grad, hb_grad):

        self.GhxW = self.GhxW + hxW_grad**2
        self.Ghb = self.Ghb + hb_grad**2
        
        self.hxW -= self.learningRate/np.sqrt(self.GhxW + 1e-8) * hxW_grad
        self.hb -= self.learningRate/np.sqrt(self.Ghb + 1e-8) * hb_grad

        

In [33]:
data = open('HP1.txt','r', encoding="utf8").read();
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(data_size,", ",vocab_size)

char_to_ix = {ch:i for i, ch in enumerate(chars)}
ix_to_char = {i:ch for i,ch in enumerate(chars)}
print(char_to_ix)
print(ix_to_char)

def encode(idx,num_entry):
    ret = np.zeros(num_entry)
    ret[idx] = 1
    return ret;

def encode_array(array,num_entry):
    xs = np.zeros((len(array),num_entry))
    for i in range(len(array)):
        xs[i][array[i]] = 1; 
    return xs;


seq_length,position = 75,0
inputs = [char_to_ix[ch] for ch in data[position:position+seq_length]]
print(data[position:position+seq_length])
print("inputs",inputs)

targets = [char_to_ix[ch] for ch in data[position+1:position+seq_length+1]] 
print(data[position+1:position+seq_length+1])
print("targets",targets)

n,position = 0,0;
epoch = 20*1000;
lenIn, lenOut, lenRec = vocab_size,vocab_size, seq_length
sizeHidden, numHiddenLayer = 100,1;
learningRate = 0.1;


R = basicRNN(lenIn, lenOut, lenRec, sizeHidden, encode_array(inputs,vocab_size),targets, learningRate)

# training
while n<epoch:
    
    if(position+seq_length+1 >= len(data) or n == 0):
        print("!!!!",len(data))
        position = 0;
        
    inputs  = [char_to_ix[ch] for ch in data[position:position+seq_length]]
    targets = [char_to_ix[ch] for ch in data[position+1:position+seq_length+1]] 

    R.update_inputs_targets(encode_array(inputs,vocab_size),targets)
    R.fwd_pass();
    
    err = R.bwd_pass();
    
    if(n%1000 == 0):
        print(n,"err:",err)
        seed = encode(n % vocab_size,vocab_size)
        print(ix_to_char[n % vocab_size])
        result = [];
        R.h  = np.zeros(sizeHidden)
        R.c  = np.zeros(sizeHidden)
        for i in range(100):
            ret = R.inference(seed)
            #print(i,":",ret)
            result.append(ret)
            seed = encode(ret,vocab_size)
        decode = ''.join([ix_to_char[ch] for ch in result] )
        print(decode)

    position += seq_length;
    n += 1;

431677 ,  79
{'J': 0, ')': 1, 'I': 2, '-': 3, ' ': 4, 'w': 5, 'p': 6, '1': 7, '8': 8, ':': 9, '~': 10, 't': 11, '\\': 12, 'F': 13, '?': 14, 'Q': 15, '\t': 16, 'O': 17, 'r': 18, 'H': 19, 'Z': 20, '!': 21, 'T': 22, 'q': 23, '6': 24, 'j': 25, 'L': 26, 'a': 27, 'y': 28, 'R': 29, 'W': 30, 'g': 31, 'G': 32, '4': 33, 'f': 34, '7': 35, '5': 36, 'P': 37, '"': 38, 'e': 39, '2': 40, ';': 41, 'u': 42, 'K': 43, '.': 44, '*': 45, '(': 46, 'N': 47, '0': 48, 'z': 49, 'M': 50, '9': 51, 'X': 52, 'd': 53, 'o': 54, 'n': 55, 'h': 56, '\n': 57, 'C': 58, 'A': 59, 'E': 60, 'V': 61, 'm': 62, 'i': 63, 'b': 64, 'v': 65, 'S': 66, 'x': 67, 'k': 68, 'B': 69, 'U': 70, '3': 71, ',': 72, 'c': 73, 's': 74, 'Y': 75, "'": 76, 'l': 77, 'D': 78}
{0: 'J', 1: ')', 2: 'I', 3: '-', 4: ' ', 5: 'w', 6: 'p', 7: '1', 8: '8', 9: ':', 10: '~', 11: 't', 12: '\\', 13: 'F', 14: '?', 15: 'Q', 16: '\t', 17: 'O', 18: 'r', 19: 'H', 20: 'Z', 21: '!', 22: 'T', 23: 'q', 24: '6', 25: 'j', 26: 'L', 27: 'a', 28: 'y', 29: 'R', 30: 'W', 31: 'g', 3

KeyboardInterrupt: 