In [7]:
import numpy as np
import copy

# Helper functions
def softmax(array):
    return np.exp(array)/ np.sum(np.exp(array)) # return an array

def sigmoid(x):
    return (1/(1+np.exp(-x)))

def sigmoid_deriv(y):
    return (y*(1-y))

def tanh(x):
    return np.tanh(x)

def tanh_deriv(y):
    return 1 - pow(np.tanh(y),2)

# RNN
class basicRNN:
    
    def __init__ (self, lenIn, lenOut, lenRec, sizeHidden, inputs_encoded, targets, learningRate):
        
        # Hyper parameters
        self.lenIn          = lenIn
        self.lenOut         = lenOut
        self.lenRec         = lenRec
        self.sizeHidden     = sizeHidden
        self.learningRate   = learningRate
        
        # input & expected output
        self.inputs_encoded = inputs_encoded;
        self.targets = targets;
        
        # parameters for inference
        self.x  = np.zeros(lenIn)  
        self.y  = np.zeros(lenOut)
        self.hls_infer = np.zeros((lenRec,sizeHidden))
        self.hrs_infer = np.zeros((lenRec,sizeHidden))
        
        self.W  = np.zeros((lenOut,sizeHidden*2)) # for the last fully connected layer
        self.b  = np.zeros(lenOut)
       
        # for training phase 
        self.xs = np.zeros((lenRec,lenIn))
        self.ys = np.zeros((lenRec,lenOut))
        self.hls = np.zeros((lenRec,sizeHidden))
        self.hrs = np.zeros((lenRec,sizeHidden))
        self.GW = np.zeros((lenOut,sizeHidden*2)) # Gradient, for W-update using RMSprop
        self.Gb = np.zeros(lenOut)
        
        # CELL class
        self.RNN_cell = RNN_cell(sizeHidden+lenIn,sizeHidden,lenRec,learningRate)
        
        ''' end of basicRNN.__init__ '''
       
    ''' This is used when mini-batch is used '''            
    def update_inputs_targets(self, inputs_encoded, targets):
        self.inputs_encoded  = inputs_encoded
        self.targets         = targets
    
    def fwd_pass(self): 
        # fwd layer
        prev_h = np.zeros_like(self.hls[0])
        for t in range(0,self.lenRec):
            # update input
            self.x    = self.inputs_encoded[t]
            self.xs[t]= self.inputs_encoded[t]
            
            self.RNN_cell.hlx = np.hstack((prev_h, self.x));
           
            hl = self.RNN_cell.fwd_pass_L()
            # bookkeeping
            self.hls[t] = hl
            prev_h = self.hls[t]
                           
        # bwd layer
        prev_h = np.zeros_like(self.hrs[0])                 
        for t in reversed(range(0,self.lenRec)):
            # update input
            self.x    = self.xs[t]
            self.RNN_cell.hrx = np.hstack((prev_h, self.x));
           
            hr = self.RNN_cell.fwd_pass_R()
            # bookkeeping
            self.hrs[t] = hr
            prev_h = self.hrs[t] 
                           
            # output layer - fully connected layer
            self.ys[t] = np.dot(self.W,np.hstack((self.hls[t],self.hrs[t]))) + self.b            
        return;              
    
    def bwd_pass(self):        

        avg_loss = 0; # using cross entropy average
        h2next_grad  = np.zeros(self.sizeHidden)
        
        # output bp
        W_grad   = np.zeros((self.lenOut,self.sizeHidden*2))
        b_grad  = np.zeros(self.lenOut)
                                
        hlxW_grad  = np.zeros((self.sizeHidden,self.RNN_cell.lenIn));
        hrxW_grad  = np.zeros((self.sizeHidden,self.RNN_cell.lenIn));                        
        hlb_grad   = np.zeros((self.sizeHidden));
        hrb_grad   = np.zeros((self.sizeHidden)); 
                                
        # propagates through time and layers      
        dh = np.zeros((lenRec,sizeHidden*2))                

        for t in reversed(range(0,self.lenRec)):
            
            prob = softmax(self.ys[t]) # prevent zero
            prob_fix  = prob + 1e-9

            # cross entropy
            err       = np.log(prob_fix[self.targets[t]])
            avg_loss += err
     
            dy = copy.deepcopy(prob)
            dy[self.targets[t]] -= 1
            
            W_grad += np.dot((np.atleast_2d(dy)).T,np.atleast_2d(np.hstack((self.hls[t],self.hrs[t])) ))
            b_grad += dy
            
            dh[t] = np.dot(self.W.T,dy) 
                                
        for t in reversed(range(0,self.lenRec)):                 
            dhl = dh[t,:self.sizeHidden] + h2next_grad         
            x_grad  = np.zeros(self.lenIn)
            
            if(t > 0):
                prev_h = self.hls[t-1]
            else:
                prev_h = np.zeros_like(self.hls[0])
                
            self.RNN_cell.hlx = np.hstack((prev_h,self.xs[t]))
            self.RNN_cell.hl  = self.hls[t]

            dhlxW, dhlb, h2next_grad,x_grad = \
            self.RNN_cell.bwd_pass_L( dhl );
            
            hlxW_grad  +=  dhlxW
            hlb_grad   +=  dhlb
                                
        h2next_grad  = np.zeros(self.sizeHidden)                        
        for t in range(0,self.lenRec):                 
            dhr = dh[t,self.sizeHidden:] + h2next_grad         
            x_grad  = np.zeros(self.lenIn)
            
            if(t < self.lenRec-1):
                prev_h = self.hrs[t+1]
            else:
                prev_h = np.zeros_like(self.hrs[0])
                
            self.RNN_cell.hrx = np.hstack((prev_h,self.xs[t]))
            self.RNN_cell.hr  = self.hrs[t]

            dhrxW, dhrb, h2next_grad,x_grad = \
            self.RNN_cell.bwd_pass_R( dhr );
            
            hrxW_grad  +=  dhrxW
            hrb_grad   +=  dhrb
                                
        self.RNN_cell.update(hlxW_grad/self.lenRec, hlb_grad/self.lenRec,hrxW_grad/self.lenRec, hrb_grad/self.lenRec);
        
        self.update(W_grad/self.lenRec,b_grad/self.lenRec);
        return avg_loss/self.lenRec;
            
    def update(self, W_grad, b_grad):
        self.GW = self.GW + W_grad**2;
        self.W -= self.learningRate/np.sqrt(self.GW + 1e-8) * W_grad;
        self.Gb = self.Gb + b_grad**2;
        self.b -= self.learningRate/np.sqrt(self.Gb + 1e-8) * b_grad;

    def inference(self,xs):
        # fwd layer
        prev_h = np.zeros_like(self.hls_infer[0])
        for t in range(0,self.lenRec):
            # update input
            self.x    = xs[t]
            
            self.RNN_cell.hlx = np.hstack((prev_h, self.x));
           
            hl = self.RNN_cell.fwd_pass_L()
            # bookkeeping
            self.hls_infer[t] = hl
            prev_h = self.hls_infer[t]
                           
        # bwd layer
        prev_h = np.zeros_like(self.hrs[0])                 
        for t in reversed(range(0,self.lenRec)):
            # update input
            self.x    = xs[t]
            self.RNN_cell.hrx = np.hstack((prev_h, self.x));
           
            hr = self.RNN_cell.fwd_pass_R()
            # bookkeeping
            self.hrs_infer[t] = hr
            prev_h = self.hrs_infer[t] 
                           
            # output layer - fully connected layer
        y = np.dot(self.W,np.hstack((self.hls_infer[0],self.hrs_infer[0]))) + self.b 
        p = softmax(y)
             
        return np.random.choice(range(self.lenIn), p=p.ravel())
  


In [8]:
class RNN_cell:
    
    def __init__ (self,lenIn,sizeHidden,lenRec,learningRate):
        self.lenIn        = lenIn
        self.sizeHidden   = sizeHidden
        self.lenRec       = lenRec
        self.learningRate = learningRate
        
        # hx == x is x and h horizontally stacked together
        self.hlx = np.zeros(lenIn)
        self.hrx = np.zeros(lenIn)
        self.hl = np.zeros(sizeHidden)
        self.hr = np.zeros(sizeHidden)
        
        # Weight matrices
        self.hlxW = np.random.random((sizeHidden,lenIn));
        self.hrxW = np.random.random((sizeHidden,lenIn));
        
        # biases
        self.hlb = np.zeros(sizeHidden);
        self.hrb = np.zeros(sizeHidden);
        
        # for RMSprop only
        self.GhlxW = np.random.random((sizeHidden,lenIn));
        self.GhrxW = np.random.random((sizeHidden,lenIn));
        self.Ghlb = np.zeros(sizeHidden);
        self.Ghrb = np.zeros(sizeHidden);
        
        ''' end of RNN_cell.__init__ '''
        
    def fwd_pass_L(self):
        self.hl = tanh(np.dot(self.hlxW, self.hlx) + self.hlb)       
        return self.hl;

    def fwd_pass_R(self):
        self.hr = tanh(np.dot(self.hrxW, self.hrx) + self.hrb)       
        return self.hr;

    def bwd_pass_L(self, dhl):
        
        dhl = np.clip(dhl, -6, 6);       
        # h = o*tanh(c)
        dhl  = tanh_deriv(self.hl) * dhl
        dhlb = dhl
        dhlxW = np.dot((np.atleast_2d(dhl)).T,np.atleast_2d(self.hlx)) 
        
        hlx_grad = np.dot(self.hlxW.T, dhl)
               
        return dhlxW, dhlb, hlx_grad[:self.sizeHidden],hlx_grad[self.sizeHidden:];

    def bwd_pass_R(self, dhr):
        
        dhr = np.clip(dhr, -6, 6);       
        # h = o*tanh(c)
        dhr  = tanh_deriv(self.hr) * dhr
        dhrb = dhr
        dhrxW = np.dot((np.atleast_2d(dhr)).T,np.atleast_2d(self.hrx)) 
        
        hrx_grad = np.dot(self.hrxW.T, dhr)
               
        return dhrxW, dhrb, hrx_grad[:self.sizeHidden],hrx_grad[self.sizeHidden:];
    
    def update(self, hlxW_grad, hlb_grad, hrxW_grad, hrb_grad):

        # adagrad
        self.GhlxW = self.GhlxW + hlxW_grad**2
        self.Ghlb = self.Ghlb + hlb_grad**2
        self.GhrxW = self.GhrxW + hrxW_grad**2
        self.Ghrb = self.Ghrb + hrb_grad**2
        
        self.hlxW -= self.learningRate/np.sqrt(self.GhlxW + 1e-8) * hlxW_grad
        self.hlb -= self.learningRate/np.sqrt(self.Ghlb + 1e-8) * hlb_grad
        self.hrxW -= self.learningRate/np.sqrt(self.GhrxW + 1e-8) * hrxW_grad
        self.hrb -= self.learningRate/np.sqrt(self.Ghrb + 1e-8) * hrb_grad
        

In [9]:
data = open('HP1.txt','r', encoding="utf8").read();
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(data_size,", ",vocab_size)

char_to_ix = {ch:i for i, ch in enumerate(chars)}
ix_to_char = {i:ch for i,ch in enumerate(chars)}
print(char_to_ix)
print(ix_to_char)

def encode(idx,num_entry):
    ret = np.zeros(num_entry)
    ret[idx] = 1
    return ret;

def encode_array(array,num_entry):
    xs = np.zeros((len(array),num_entry))
    for i in range(len(array)):
        xs[i][array[i]] = 1; 
    return xs;


seq_length,position = 100,0
inputs = [char_to_ix[ch] for ch in data[position:position+seq_length]]
print(data[position:position+seq_length])
print("inputs",inputs)

targets = [char_to_ix[ch] for ch in data[position+1:position+seq_length+1]] 
print(data[position+1:position+seq_length+1])
print("targets",targets)

n,position = 0,0;
epoch = 30*1000;
lenIn, lenOut, lenRec = vocab_size,vocab_size, seq_length
sizeHidden, numHiddenLayer = 100,1;
learningRate = 0.1;


R = basicRNN(lenIn, lenOut, lenRec, sizeHidden, encode_array(inputs,vocab_size),targets, learningRate)

# training
while n<epoch:
    
    if(position+seq_length+1 >= len(data) or n == 0):
        print("!!!!",len(data))
        position = 0;
        
    inputs  = [char_to_ix[ch] for ch in data[position:position+seq_length]]
    targets = [char_to_ix[ch] for ch in data[position+1:position+seq_length+1]] 

    R.update_inputs_targets(encode_array(inputs,vocab_size),targets)
    R.fwd_pass();
    
    err = R.bwd_pass();
    
    if(n%500 == 0):
        print(n,"err:",err)
        infer_in  = [char_to_ix[ch] for ch in data[position:position+seq_length]]
        infer_in_enc = encode_array(infer_in,vocab_size)
        result = [];

        for i in range(200):
            ret = R.inference(infer_in_enc)
            #print(i,":",ret)
            result.append(ret)
            infer_in.append(ret)
            infer_in_enc = encode_array(infer_in[i+1:],vocab_size)
        decode = ''.join([ix_to_char[ch] for ch in result] )
        print(decode+'\n')

    position += seq_length;
    n += 1;

431677 ,  79
{'S': 0, '9': 1, '\\': 2, 'H': 3, '.': 4, ',': 5, 'z': 6, 'A': 7, '4': 8, '?': 9, '"': 10, '3': 11, '5': 12, 'c': 13, 'h': 14, 'W': 15, 'Y': 16, 'I': 17, 'u': 18, 'i': 19, 'a': 20, ';': 21, 'X': 22, 'n': 23, 'r': 24, 'l': 25, 'Q': 26, '0': 27, "'": 28, 's': 29, '!': 30, '~': 31, '8': 32, 'T': 33, 'D': 34, ' ': 35, 'v': 36, 'q': 37, 'Z': 38, 'C': 39, '-': 40, 'R': 41, 'k': 42, 'F': 43, 'b': 44, 'B': 45, 'j': 46, 'J': 47, 'y': 48, 'e': 49, 'L': 50, '6': 51, 'G': 52, 'U': 53, '1': 54, 't': 55, '2': 56, 'g': 57, '\t': 58, 'P': 59, 'o': 60, 'E': 61, 'O': 62, 'N': 63, '\n': 64, 'V': 65, 'd': 66, 'M': 67, 'K': 68, '(': 69, ':': 70, '7': 71, 'w': 72, '*': 73, 'm': 74, 'f': 75, 'x': 76, 'p': 77, ')': 78}
{0: 'S', 1: '9', 2: '\\', 3: 'H', 4: '.', 5: ',', 6: 'z', 7: 'A', 8: '4', 9: '?', 10: '"', 11: '3', 12: '5', 13: 'c', 14: 'h', 15: 'W', 16: 'Y', 17: 'I', 18: 'u', 19: 'i', 20: 'a', 21: ';', 22: 'X', 23: 'n', 24: 'r', 25: 'l', 26: 'Q', 27: '0', 28: "'", 29: 's', 30: '!', 31: '~', 32

12500 err: -0.48170132018874
and struprled towand a damu qall. The had tostrigire bed use whe mome't she had sandid, phe wlant hand steccrled Rowand w damI qall. bhe had tosteimir. yed.use whe cimect the had sandid, ghe wlant han

!!!! 431677
13000 err: -0.5869899487643402
e rson, Harry!
ur. hursvey stopHed dead. pear Floided hin. qe looked paskay the whisherers ad oI heeursonc Harryc ur. wuroley stoppem br ". pear cloided hin. ke louked p scab the whisherershudLof heeu

13500 err: -0.47773713549064234
t ye the wact yhat "arrd had f.aboe. Uncly Vernonaround the nect uroI me,iLd. tyter apUinity om dout he the pact That parrd had E.aboe! Uncle bernonayound the ndct uroI Mefiud. thter aqoinitt ok bius 

14000 err: -0.5388548830299232
ed t. Grippookd yoaulg selenhungred and Thirteen sok, phease, and,gkn Ie go nore slowly "
"
ne shevld t. Grippookd yoall. solethunErnd and Thifteen soG, wheYsed andogi- pe do more slowly A
" ne sheald

14500 err: -0.5105433082940317
 Har.y thoughu There must 

In [10]:
dimW = R.W.shape[0]*R.W.shape[1]
dimb = R.b.shape[0]
dimHL= R.RNN_cell.hlxW.shape[0] *R.RNN_cell.hlxW.shape[1] 
dimHR= R.RNN_cell.hrxW.shape[0] *R.RNN_cell.hrxW.shape[1] 
dimHLB=R.RNN_cell.hlb.shape[0]
dimHRB=R.RNN_cell.hrb.shape[0]

print(dimHL)

17900


In [11]:
np.savetxt("./100R_W.CSV",R.W.reshape(1,dimW),delimiter=',')
np.savetxt("./100R_b.CSV",R.b.reshape(1,dimb),delimiter=',')
np.savetxt("./100HL.CSV",R.RNN_cell.hlxW.reshape(1,dimHL),delimiter=',')
np.savetxt("./100HR.CSV",R.RNN_cell.hrxW.reshape(1,dimHR),delimiter=',')
np.savetxt("./100HLB.CSV",R.RNN_cell.hlb.reshape(1,dimHLB),delimiter=',')
np.savetxt("./100HRB.CSV",R.RNN_cell.hrb.reshape(1,dimHRB),delimiter=',')


In [6]:
pwd

'C:\\Users\\peter\\Anaconda3\\study'