In [11]:
import numpy as np
import copy

# Helper functions
def softmax(array):
    return np.exp(array)/ np.sum(np.exp(array)) # return an array

def sigmoid(x):
    return (1/(1+np.exp(-x)))

def sigmoid_deriv(y):
    return (y*(1-y))

def tanh(x):
    return np.tanh(x)

def tanh_deriv(y):
    return 1 - pow(np.tanh(y),2)

# RNN
class basicRNN:
    
    def __init__ (self, lenIn, lenOut, lenRec, sizeHidden, inputs_encoded, targets, learningRate):
        
        # Hyper parameters
        self.lenIn          = lenIn
        self.lenOut         = lenOut
        self.lenRec         = lenRec
        self.sizeHidden     = sizeHidden
        self.learningRate   = learningRate
        
        # input & expected output
        self.inputs_encoded = inputs_encoded;
        self.targets = targets;
        
        # parameters for inference
        self.x  = np.zeros(lenIn)  
        self.y  = np.zeros(lenOut)
        self.hls_infer = np.zeros((lenRec,sizeHidden))
        self.hrs_infer = np.zeros((lenRec,sizeHidden))
        
        self.W  = np.zeros((lenOut,sizeHidden*2)) # for the last fully connected layer
        self.b  = np.zeros(lenOut)
       
        # for training phase 
        self.xs = np.zeros((lenRec,lenIn))
        self.ys = np.zeros((lenRec,lenOut))
        self.hls = np.zeros((lenRec,sizeHidden))
        self.hrs = np.zeros((lenRec,sizeHidden))
        self.GW = np.zeros((lenOut,sizeHidden*2)) # Gradient, for W-update using RMSprop
        self.Gb = np.zeros(lenOut)
        
        # CELL class
        self.RNN_cell_L = RNN_cell(sizeHidden+lenIn,sizeHidden,lenRec,learningRate)
        self.RNN_cell_R = RNN_cell(sizeHidden+lenIn,sizeHidden,lenRec,learningRate)
        
        ''' end of basicRNN.__init__ '''
       
    ''' This is used when mini-batch is used '''            
    def update_inputs_targets(self, inputs_encoded, targets):
        self.inputs_encoded  = inputs_encoded
        self.targets         = targets
    
    def fwd_pass(self): 
        # fwd layer
        prev_h = np.zeros_like(self.hls[0])
        for t in range(0,self.lenRec):
            # update input
            self.x    = self.inputs_encoded[t]
            self.xs[t]= self.inputs_encoded[t]
            
            self.RNN_cell_L.hx = np.hstack((prev_h, self.x));
           
            hl = self.RNN_cell_L.fwd_pass()
            # bookkeeping
            self.hls[t] = hl
            prev_h = self.hls[t]
                           
        # bwd layer
        prev_h = np.zeros_like(self.hrs[0])                 
        for t in reversed(range(0,self.lenRec)):
            # update input
            self.x    = self.xs[t]
            self.RNN_cell_R.hx = np.hstack((prev_h, self.x));
           
            hr = self.RNN_cell_R.fwd_pass()
            # bookkeeping
            self.hrs[t] = hr
            prev_h = self.hrs[t] 
                           
            # output layer - fully connected layer
            self.ys[t] = np.dot(self.W,np.hstack((self.hls[t],self.hrs[t]))) + self.b            
        return;              
    
    def bwd_pass(self):        

        avg_loss = 0; # using cross entropy average
        h2next_grad  = np.zeros(self.sizeHidden)
        
        # output bp
        W_grad   = np.zeros((self.lenOut,self.sizeHidden*2))
        b_grad  = np.zeros(self.lenOut)
                                
        hlxW_grad  = np.zeros((self.sizeHidden,self.RNN_cell_L.lenIn));
        hrxW_grad  = np.zeros((self.sizeHidden,self.RNN_cell_R.lenIn));                        
        hlb_grad   = np.zeros((self.sizeHidden));
        hrb_grad   = np.zeros((self.sizeHidden)); 
                                
        # propagates through time and layers      
        dh = np.zeros((self.lenRec,self.sizeHidden*2))                

        for t in reversed(range(0,self.lenRec)):
            
            prob = softmax(self.ys[t]) # prevent zero
            prob_fix  = prob + 1e-9

            # cross entropy
            err       = np.log(prob_fix[int(self.targets[t])])
            avg_loss += err
     
            dy = copy.deepcopy(prob)
            dy[int(self.targets[t])] -= 1
            
            W_grad += np.dot((np.atleast_2d(dy)).T,np.atleast_2d(np.hstack((self.hls[t],self.hrs[t])) ))
            b_grad += dy
            
            dh[t] = np.dot(self.W.T,dy) 
                                
        for t in reversed(range(0,self.lenRec)):                 
            dhl = dh[t,:self.sizeHidden] + h2next_grad         
            x_grad  = np.zeros(self.lenIn)
            
            if(t > 0):
                prev_h = self.hls[t-1]
            else:
                prev_h = np.zeros_like(self.hls[0])
                
            self.RNN_cell_L.hx = np.hstack((prev_h,self.xs[t]))
            self.RNN_cell_L.h  = self.hls[t]

            dhlxW, dhlb, h2next_grad,x_grad = \
            self.RNN_cell_L.bwd_pass( dhl );
            
            hlxW_grad  +=  dhlxW
            hlb_grad   +=  dhlb
                                
        h2next_grad  = np.zeros(self.sizeHidden)                        
        for t in range(0,self.lenRec):                 
            dhr = dh[t,self.sizeHidden:] + h2next_grad         
            x_grad  = np.zeros(self.lenIn)
            
            if(t < self.lenRec-1):
                prev_h = self.hrs[t+1]
            else:
                prev_h = np.zeros_like(self.hrs[0])
                
            self.RNN_cell_R.hx = np.hstack((prev_h,self.xs[t]))
            self.RNN_cell_R.h  = self.hrs[t]

            dhrxW, dhrb, h2next_grad,x_grad = \
            self.RNN_cell_R.bwd_pass( dhr );
            
            hrxW_grad  +=  dhrxW
            hrb_grad   +=  dhrb
                                
        self.RNN_cell_L.update(hlxW_grad/self.lenRec, hlb_grad/self.lenRec);
        self.RNN_cell_R.update(hrxW_grad/self.lenRec, hrb_grad/self.lenRec);
        
        self.update(W_grad/self.lenRec,b_grad/self.lenRec);
        return avg_loss/self.lenRec;
            
    def update(self, W_grad, b_grad):
        self.GW = self.GW + W_grad**2;
        self.W -= self.learningRate/np.sqrt(self.GW + 1e-8) * W_grad;
        self.Gb = self.Gb + b_grad**2;
        self.b -= self.learningRate/np.sqrt(self.Gb + 1e-8) * b_grad;

    def inference(self,xs):
        # fwd layer
        prev_h = np.zeros_like(self.hls_infer[0])
        for t in range(0,self.lenRec):
            # update input
            self.x    = xs[t]
            
            self.RNN_cell_L.hx = np.hstack((prev_h, self.x));
           
            hl = self.RNN_cell_L.fwd_pass()
            # bookkeeping
            self.hls_infer[t] = hl
            prev_h = self.hls_infer[t]
                           
        # bwd layer
        prev_h = np.zeros_like(self.hrs[0])                 
        for t in reversed(range(0,self.lenRec)):
            # update input
            self.x    = xs[t]
            self.RNN_cell_R.hx = np.hstack((prev_h, self.x));
           
            hr = self.RNN_cell_R.fwd_pass()
            # bookkeeping
            self.hrs_infer[t] = hr
            prev_h = self.hrs_infer[t] 
                           
            # output layer - fully connected layer
        y = np.dot(self.W,np.hstack((self.hls_infer[0],self.hrs_infer[0]))) + self.b 
        p = softmax(y)
             
        return np.random.choice(range(self.lenOut), p=p.ravel())
  


In [12]:
class RNN_cell:
    
    def __init__ (self,lenIn,sizeHidden,lenRec,learningRate):
        self.lenIn        = lenIn
        self.sizeHidden   = sizeHidden
        self.lenRec       = lenRec
        self.learningRate = learningRate
        
        # hx == x is x and h horizontally stacked together
        self.hx = np.zeros(lenIn)
        self.h = np.zeros(sizeHidden)
        
        # Weight matrices
        self.hxW = np.random.random((sizeHidden,lenIn));
        
        # biases
        self.hb = np.zeros(sizeHidden);

        # for RMSprop only
        self.GhxW = np.random.random((sizeHidden,lenIn));
        self.Ghb = np.zeros(sizeHidden);
        
        ''' end of RNN_cell.__init__ '''
        
    def fwd_pass(self):
        self.h = tanh(np.dot(self.hxW, self.hx) + self.hb)       
        return self.h;

    def bwd_pass(self, dh):
        
        dh = np.clip(dh, -6, 6);       
        # h = o*tanh(c)
        dh  = tanh_deriv(self.h) * dh
        dhb = dh
        dhxW = np.dot((np.atleast_2d(dh)).T,np.atleast_2d(self.hx)) 
        
        hx_grad = np.dot(self.hxW.T, dh)
               
        return dhxW, dhb, hx_grad[:self.sizeHidden],hx_grad[self.sizeHidden:];
    
    def update(self, hxW_grad, hb_grad):

        # adagrad
        self.GhxW = self.GhxW + hxW_grad**2
        self.Ghb  = self.Ghb  + hb_grad**2
        
        self.hxW -= self.learningRate/np.sqrt(self.GhxW + 1e-8) * hxW_grad
        self.hb  -= self.learningRate/np.sqrt(self.Ghb + 1e-8) * hb_grad
        

In [10]:
data = open('HP1.txt','r', encoding="utf8").read();
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(data_size,", ",vocab_size)

char_to_ix = {ch:i for i, ch in enumerate(chars)}
ix_to_char = {i:ch for i,ch in enumerate(chars)}
print(char_to_ix)
print(ix_to_char)

def encode(idx,num_entry):
    ret = np.zeros(num_entry)
    ret[idx] = 1
    return ret;

def encode_array(array,num_entry):
    xs = np.zeros((len(array),num_entry))
    for i in range(len(array)):
        xs[i][array[i]] = 1; 
    return xs;


seq_length,position = 100,0
inputs = [char_to_ix[ch] for ch in data[position:position+seq_length]]
print(data[position:position+seq_length])
print("inputs",inputs)

targets = [char_to_ix[ch] for ch in data[position+1:position+seq_length+1]] 
print(data[position+1:position+seq_length+1])
print("targets",targets)

n,position = 0,0;
epoch = 30*1000;
lenIn, lenOut, lenRec = vocab_size,vocab_size, seq_length
sizeHidden, numHiddenLayer = 100,1;
learningRate = 0.1;


R = basicRNN(lenIn, lenOut, lenRec, sizeHidden, encode_array(inputs,vocab_size),targets, learningRate)

# training
while n<epoch:
    
    if(position+seq_length+1 >= len(data) or n == 0):
        print("!!!!",len(data))
        position = 0;
        
    inputs  = [char_to_ix[ch] for ch in data[position:position+seq_length]]
    targets = [char_to_ix[ch] for ch in data[position+1:position+seq_length+1]] 

    R.update_inputs_targets(encode_array(inputs,vocab_size),targets)
    R.fwd_pass();
    
    err = R.bwd_pass();
    
    if(n%500 == 0):
        print(n,"err:",err)
        infer_in  = [char_to_ix[ch] for ch in data[position:position+seq_length]]
        infer_in_enc = encode_array(infer_in,vocab_size)
        result = [];

        for i in range(200):
            ret = R.inference(infer_in_enc)
            #print(i,":",ret)
            result.append(ret)
            infer_in.append(ret)
            infer_in_enc = encode_array(infer_in[i+1:],vocab_size)
        decode = ''.join([ix_to_char[ch] for ch in result] )
        print(decode+'\n')

    position += seq_length;
    n += 1;

431677 ,  79
{':': 0, 'c': 1, 'K': 2, '~': 3, 'P': 4, 'y': 5, '\n': 6, 'f': 7, 'b': 8, 'B': 9, 'G': 10, '0': 11, '9': 12, '6': 13, '*': 14, 'n': 15, '7': 16, '8': 17, 'h': 18, '3': 19, 'C': 20, ' ': 21, 'l': 22, 'J': 23, 'Q': 24, 'U': 25, 'X': 26, '\\': 27, '-': 28, 'T': 29, '\t': 30, 'k': 31, 'v': 32, '?': 33, 'm': 34, ';': 35, '4': 36, 'E': 37, 'q': 38, "'": 39, 'Z': 40, 'W': 41, 'O': 42, 'i': 43, 'g': 44, 'M': 45, 'L': 46, 'a': 47, 't': 48, '!': 49, 'I': 50, 'H': 51, 'F': 52, 'S': 53, 'd': 54, 'o': 55, 'j': 56, 'r': 57, 's': 58, 'x': 59, 'w': 60, ')': 61, ',': 62, 'p': 63, '(': 64, 'N': 65, 'R': 66, '5': 67, 'V': 68, 'A': 69, '"': 70, 'z': 71, 'D': 72, 'u': 73, 'e': 74, '1': 75, '2': 76, '.': 77, 'Y': 78}
{0: ':', 1: 'c', 2: 'K', 3: '~', 4: 'P', 5: 'y', 6: '\n', 7: 'f', 8: 'b', 9: 'B', 10: 'G', 11: '0', 12: '9', 13: '6', 14: '*', 15: 'n', 16: '7', 17: '8', 18: 'h', 19: '3', 20: 'C', 21: ' ', 22: 'l', 23: 'J', 24: 'Q', 25: 'U', 26: 'X', 27: '\\', 28: '-', 29: 'T', 30: '\t', 31: 'k', 

KeyboardInterrupt: 

In [10]:
dimW = R.W.shape[0]*R.W.shape[1]
dimb = R.b.shape[0]
dimHL= R.RNN_cell.hlxW.shape[0] *R.RNN_cell.hlxW.shape[1] 
dimHR= R.RNN_cell.hrxW.shape[0] *R.RNN_cell.hrxW.shape[1] 
dimHLB=R.RNN_cell.hlb.shape[0]
dimHRB=R.RNN_cell.hrb.shape[0]

print(dimHL)

17900


In [11]:
np.savetxt("./100R_W.CSV",R.W.reshape(1,dimW),delimiter=',')
np.savetxt("./100R_b.CSV",R.b.reshape(1,dimb),delimiter=',')
np.savetxt("./100HL.CSV",R.RNN_cell.hlxW.reshape(1,dimHL),delimiter=',')
np.savetxt("./100HR.CSV",R.RNN_cell.hrxW.reshape(1,dimHR),delimiter=',')
np.savetxt("./100HLB.CSV",R.RNN_cell.hlb.reshape(1,dimHLB),delimiter=',')
np.savetxt("./100HRB.CSV",R.RNN_cell.hrb.reshape(1,dimHRB),delimiter=',')


In [6]:
pwd

'C:\\Users\\peter\\Anaconda3\\study'