In [1]:
import numpy as np


# To read the training data and make a vocabulary and dictiornary to index the chars
class DataReader:
    def __init__(self, path, seq_length, progress_len=5, max_trace_len = 200):
        #uncomment below , if you dont want to use any file for text reading and comment next 2 lines
        
        self.fp = open(path, "r")
        self.data = self.fp.read()
        #find unique chars
        chars = list(set(self.data))
        #create dictionary mapping for each char
        self.char_to_ix = {ch:i for (i,ch) in enumerate(chars)}
        self.ix_to_char = {i:ch for (i,ch) in enumerate(chars)}
        #total data
        self.data_size = len(self.data)
        print(chars)
        #num of unique chars
        self.vocab_size = len(chars)
        self.pointer = 0
        self.seq_length = seq_length
        self.progress_len = progress_len
        self.max_trace_len = max_trace_len

    def next_batch(self):
        input_start = self.pointer
        input_end = self.pointer + self.seq_length
        inputs = [self.char_to_ix[ch] for ch in self.data[input_start:input_end]]
        targets = [self.char_to_ix[ch] for ch in self.data[input_start+1:input_end+1]]
        self.pointer += self.progress_len
        if self.pointer + self.seq_length + 1 >= self.data_size:
            # reset pointer, start new epoch
            self.pointer = -1
        trace_start = 0
        if self.pointer > self.max_trace_len:
            trace_start = self.pointer - self.max_trace_len
        prev_input = [self.char_to_ix[ch] for ch in self.data[trace_start:input_start]]
        return prev_input, inputs, targets

    def new_epoch(self):
        return self.pointer == -1
    def start_epoch(self):
        self.pointer = 0

    def close(self):
        self.fp.close()

In [2]:
  
class RNN:
    def __init__(self, hidden_size, vocab_size, seq_length, learning_rate):
        # hyper parameters
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.seq_length = seq_length
        self.learning_rate = learning_rate
        # model parameters
        self.U = np.random.uniform(-np.sqrt(1./vocab_size), np.sqrt(1./vocab_size), (hidden_size, vocab_size))
        self.V = np.random.uniform(-np.sqrt(1./hidden_size), np.sqrt(1./hidden_size), (vocab_size, hidden_size))
        self.W = np.random.uniform(-np.sqrt(1./hidden_size), np.sqrt(1./hidden_size), (hidden_size, hidden_size))
        self.b = np.zeros((hidden_size, 1)) # bias for hidden layer
        self.c = np.zeros((vocab_size, 1)) # bias for output
        
        # memory vars for adagrad, 
        #ignore if you implement another approach
        self.mU = np.zeros_like(self.U)
        self.mW = np.zeros_like(self.W)
        self.mV = np.zeros_like(self.V)
        self.mb = np.zeros_like(self.b)
        self.mc = np.zeros_like(self.c)

    def softmax(self, x):
        p = np.exp(x- np.max(x))
        return p / np.sum(p)
        
    def forward(self, start, inputs):
        xs, hs, os, ycap = {}, {}, {}, {}

        oh_start = np.zeros((self.vocab_size, 1))
        hs_start = np.zeros((self.hidden_size, 1))
        for c in start:
            oh_start[c] = 1
            hs_start = np.tanh(np.dot(self.U, oh_start) + np.dot(self.W, hs_start)+self.b)
            oh_start[c] = 0
        
            
        hs[-1] = hs_start

        
        for t in range(len(inputs)):
            xs[t] = np.zeros((self.vocab_size,1))
            xs[t][inputs[t]] = 1 # one hot encoding , 1-of-k
            hs[t] = np.tanh(np.dot(self.U,xs[t]) + np.dot(self.W,hs[t-1]) + self.b) # hidden state
            os[t] = np.dot(self.V,hs[t]) + self.c # unnormalised log probs for next char
            ycap[t] = self.softmax(os[t]) # probs for next char
        return xs, hs, ycap
        
    def backward(self, xs, hs, ps, targets):
            # backward pass: compute gradients going backwards
            dU, dW, dV = np.zeros_like(self.U), np.zeros_like(self.W), np.zeros_like(self.V)
            db, dc = np.zeros_like(self.b), np.zeros_like(self.c)
            dhnext = np.zeros_like(hs[0])
            for t in reversed(range(self.seq_length)):
                dy = np.copy(ps[t])
                #through softmax
                dy[targets[t]] -= 1 # backprop into y
                #calculate dV, dc
                dV += np.dot(dy, hs[t].T)
                dc += dc
                #dh includes gradient from two sides, next cell and current output
                dh = np.dot(self.V.T, dy) + dhnext # backprop into h
                # backprop through tanh non-linearity 
                dhrec = (1 - hs[t] * hs[t]) * dh  #dhrec is the term used in many equations
                db += dhrec
                #calculate dU and dW
                dU += np.dot(dhrec, xs[t].T)
                dW += np.dot(dhrec, hs[t-1].T)
                #pass the gradient from next cell to the next iteration.
                dhnext = np.dot(self.W.T, dhrec)
            # clip to mitigate exploding gradients
            for dparam in [dU, dW, dV, db, dc]:
                np.clip(dparam, -5, 5, out=dparam) 
            return dU, dW, dV, db, dc
    
    def loss(self, ps, targets):
        """loss for a sequence"""
        # calculate cross-entrpy loss
        return sum(-np.log(ps[t][targets[t],0]) for t in range(self.seq_length))
        
    
    def update_model(self, dU, dW, dV, db, dc):
        # parameter update with adagrad
        for param, dparam, mem in zip([self.U, self.W, self.V, self.b, self.c],
                                  [dU, dW, dV, db, dc],
                                  [self.mU, self.mW, self.mV, self.mb, self.mc]):
            mem += dparam*dparam
            param += -self.learning_rate*dparam/np.sqrt(mem+1e-8) # adagrad update
                
                
    def sample(self, h, seed_ix, n):
            """
            sample a sequence of integers from the model
            h is memory state, seed_ix is seed letter from the first time step
            """
            x = np.zeros((self.vocab_size, 1))
            x[seed_ix] = 1
            ixes = []
            for t in range(n):
                h = np.tanh(np.dot(self.U, x) + np.dot(self.W, h) + self.b)
                y = np.dot(self.V, h) + self.c
                p = np.exp(y)/np.sum(np.exp(y))
                ix = np.random.choice(range(self.vocab_size), p = p.ravel())
                x = np.zeros((self.vocab_size,1))
                x[ix] = 1
                ixes.append(ix)
            return ixes

    def train(self, data_reader, threshold = 0.01, max_epoch = 5, func_string = ""):
        epoches = 0
        loss = threshold + 1

        batch_count = 0
        while (loss > threshold and max_epoch > epoches):
            data_reader.start_epoch()
            while not data_reader.new_epoch():
                start, inputs, targets = data_reader.next_batch()
                batch_count += 1
                
                xs, hs, ps = self.forward(start, inputs)
                dU, dW, dV, db, dc = self.backward(xs, hs, ps, targets)
                self.update_model(dU, dW, dV, db, dc)
                    
                
                exec(func_string)
                #     print(f'batch {batch_count} completed')
                #     predicted_string = ''.join(data_reader.ix_to_char[num_val] for num_val in ps)
                #     print(f'\tpredicted result: {predicted_string}\n')
                # predicted = self.predict(data_reader, 'Ham ', 50)
                # print(f'Epoch {epoches} prediction (Ham):\\n==========\\n {predicted}\\n==========')
                # xs, hs, ps = self.forward(start, inputs)
                # dU, dW, dV, db, dc = self.backward(xs, hs, ps, targets)
                # self.update_model(dU, dW, dV, db, dc)
                
            epoches += 1            
            loss = self.loss(ps, targets)
            print(f"Finished {epoches} epoches, Loss: {loss}")

    def predict(self, data_reader, start, n):

        #initialize input vector
        x = np.zeros((self.vocab_size, 1))
        chars = [ch for ch in start]
        ixes = []        
        h = np.zeros((self.hidden_size,1))

        for i in range(len(chars) - 1):
            ix = data_reader.char_to_ix[chars[i]]
            x[ix] = 1
            h = np.tanh(np.dot(self.U, x) + np.dot(self.W, h) + self.b)
            ixes.append(ix)
            x[ix] = 0

        ix = data_reader.char_to_ix[chars[-1]]
        x[ix] = 1
        ixes.append(ix)
        # predict next n chars
        for t in range(n):
            h = np.tanh(np.dot(self.U, x) + np.dot(self.W, h) + self.b)
            y = np.dot(self.V, h) + self.c
            p = np.exp(y)/np.sum(np.exp(y))
            x[ix] = 0
            ix = np.random.choice(range(self.vocab_size), p = p.ravel())
            x[ix] = 1
            ixes.append(ix)
        txt = ''.join(data_reader.ix_to_char[i] for i in ixes)
        return txt

In [3]:

seq_length = 50
#read text from the "input.txt" file
data_reader = DataReader("input.txt", seq_length, progress_len=80, max_trace_len = 500)
rnn = RNN(hidden_size=350, vocab_size=data_reader.vocab_size,seq_length=seq_length,learning_rate=1e-1)

['i', 'G', ',', 'r', 'l', 'e', 'V', '"', '&', 'Q', 'y', '[', ':', 'b', 'P', 'I', 'd', 'p', 'a', ';', 'E', 'm', '(', 'R', 'S', 'O', 's', ')', 'Y', 'W', 'v', '?', 'c', 'q', 'x', '1', 'A', 'j', 'n', 'o', 'U', 'J', 'D', 'f', 'T', 'K', 'F', "'", 't', ']', 'H', 'w', 'B', ' ', 'N', 'g', 'h', 'z', 'L', 'C', '-', 'M', 'k', '!', '\n', 'u', '.']


In [4]:
func_string = \
"""
if batch_count % 500 == 0:
    print(f'Epoch {epoches}, Batch {batch_count}, Previous Loss {self.loss(ps, targets)}: ')
    # print(f'Starting string for last batch: {"".join(data_reader.ix_to_char[i] for i in start)}')
    # print(f'Input string for last batch: {"".join(data_reader.ix_to_char[i] for i in inputs)}')
    predict_phrases = ["Hamlet", "To be", "poison", "King", "Queen"]
    
    for num, phrase in enumerate(predict_phrases):
        result = self.predict(data_reader, phrase, 100)
        print(f'Prediction {num+1} ({phrase}): \\n{result}\\n')
    print("*****************************************************************")
"""

predict_phrases = ["Hamlet", "To be", "poison", "King", "Queen"]

for num, phrase in enumerate(predict_phrases):
    result = rnn.predict(data_reader, phrase, 100)
    print(f'Prediction {num+1} ({phrase}): \\n{result}\\n')
print("*****************************************************************")
# """
# predicted = self.predict(data_reader, 'Ham ', 50)
# print(f'Epoch {epoches} prediction (Ham):\\n==========\\n {predicted}\\n==========')
# """
rnn.train(data_reader, func_string = func_string)


Prediction 1 (Hamlet): \nHamletL-UkRH&V
-:eVQgAFWD)JCYGdRjg YrDnS(:1n;cuYor,-W d?gKSi.,qabJ"SV'x&rx; TfE:GiA&yV Ig:NWrIs]c;"BpCLnKN\n
Prediction 2 (To be): \nTo ber JH.cM,Rx,N][?Q&bN]KCNNbbf))tBf1
!BHcE;FwzsKPWH?.ivbu(aGWKdMlFs r1xsS.kBwBf DukrULoFQ.O,QEsOfsjPl
(\n
Prediction 3 (poison): \npoisonA1pV;?RdifKfbVz-afeQBTfk (P'xt!dW?&'jh)dfUi1[ .f[jm(KJ[?IVsKCvMdai(M].dkmvxnN,I xtRnLfBfE.)T&WfJmzj:\n
Prediction 4 (King): \nKingSvTmwHTwpTjxkp:-lid H-CbLWL-p(dmscP.?&ay :xHb[?&:rfrars(mS(tjVIWfWtYNMPoHxBcRhewbB(ePUJn)mP nKG1'a1H\n
Prediction 5 (Queen): \nQueene&IlIz?pfGhcCgeIMjP.fb'DqpFcJude]vLJSR';fFW;eSJ["kixzkJ?RogPLtb?)Wz[j1CKQ?wiVOls1zNF.'NULrW;mdyV]UCn\n
*****************************************************************
Epoch 0, Batch 500, Previous Loss 261.14874694768736: 
Prediction 1 (Hamlet): 
Hamletkaorra o, khra,koooooI
ooaroo'r,
yor a
aoBrlko ga ooyoiosobrlran,sayy 
rv,lyo
 a,
,oavurao, oor hau,

Prediction 2 (To be): 
To beyooorakg
   or
otuoroo,ehk,lo

go
,ooakn tr,s 
sao yoiou 

In [5]:
predict_phrases = ["Hamlet", "To be", "poison", "King", "Queen"]

for num, phrase in enumerate(predict_phrases):
    result = rnn.predict(data_reader, phrase, 100)
    print(f'Prediction {num+1} ({phrase}): \n====================\n{result}\n====================\n')

Prediction 1 (Hamlet): 
HamletP the beSke tiichofoy oele s  df  pent  ?   cSc  pisis het saniuchh    he h  phi''e aca I   ir has;h

Prediction 2 (To be): 
To be Af samhiac  thhaycaId hase whaf .or Foovhar lat  bacheg   te phhiske  iyf .  halt  he  Kk   fe sFhh

Prediction 3 (poison): 
poison ara .id hoeb

   a   Fdaf. ienhleoncad 'e toa s ans che;hon A thit  wy    Yiu   .
   chode  Hn tefa

Prediction 4 (King): 
King . uWilat
 ag s    h  tin   ; inWhhige c;anErer  wit   F phy dhi d     wi  holhhdche mr bnt daf did 

Prediction 5 (Queen): 
Queend 
Narwoov raEsaykhid     d mrfota hhe ler ch Ft     bs   uem hhhoinkemow.pshhhhuy
o, ? whh boninmll



In [7]:
func_string = \
"""
if batch_count % 500 == 0:
    print(f'Epoch {epoches}, Batch {batch_count}, Previous Loss {self.loss(ps, targets)}: ')
    # print(f'Starting string for last batch: {"".join(data_reader.ix_to_char[i] for i in start)}')
    # print(f'Input string for last batch: {"".join(data_reader.ix_to_char[i] for i in inputs)}')
    predict_phrases = ["Hamlet", "To be", "poison", "King", "Queen"]
    
    for num, phrase in enumerate(predict_phrases):
        result = self.predict(data_reader, phrase, 100)
        print(f'Prediction {num+1} ({phrase}): \\n{result}\\n')
    print("*****************************************************************")
"""

predict_phrases = ["Hamlet", "To be", "poison", "King", "Queen"]

for num, phrase in enumerate(predict_phrases):
    result = rnn.predict(data_reader, phrase, 100)
    print(f'Prediction {num+1} ({phrase}): \\n{result}\\n')
print("*****************************************************************")
# """
# predicted = self.predict(data_reader, 'Ham ', 50)
# print(f'Epoch {epoches} prediction (Ham):\\n==========\\n {predicted}\\n==========')
# """
rnn.train(data_reader, func_string = func_string, max_epoch=500)


Prediction 1 (Hamlet): \nHamletaumhe br f  ho   he   n teeunc seant f;thig po [    met      cech. chold Fomit  p te t 
     Bouat c\n
Prediction 2 (To be): \nTo be
 Ot
       wl F
ics

   Rigc.as ;fa mi ch ;shhif  h dontoee     conthut bdua   hs   , ;  daw fldde \n
Prediction 3 (poison): \npoisonchhh the;har   e tulaoil e lhif fea g; ;  c. wh oamais  or 
 lhdabe   B S theoc?w T. d   Sar l , i F\n
Prediction 4 (King): \nKing  A     d     
    hhr kh; pe  de   hllhhesxa rikhha.onnrtbeswuh  glac hauas c hes a      pd ichhipa\n
Prediction 5 (Queen): \nQueend thorw. moargha shr  h. heiradhhis cphot rod d i we lf 
   hm bua's chaam    t it c  sesHay  ;hahho\n
*****************************************************************
Epoch 0, Batch 500, Previous Loss 138.07872982929183: 
Prediction 1 (Hamlet): 
Hamlete seoroerol.  mear,, th  eeiarh   yoreraki arhfhquut yWis weolorh ris  whas
yire?  d Hefor   fora to

Prediction 2 (To be): 
To bele   ridl,,  Hoy cy rthrocorkaBod Tamnwher sod ses       

In [None]:
func_string = \
"""
if batch_count % 500 == 0:
    print(f'Epoch {epoches}, Batch {batch_count}, Previous Loss {self.loss(ps, targets)}: ')
    # print(f'Starting string for last batch: {"".join(data_reader.ix_to_char[i] for i in start)}')
    # print(f'Input string for last batch: {"".join(data_reader.ix_to_char[i] for i in inputs)}')
    predict_phrases = ["Hamlet", "To be", "poison", "King", "Queen"]
    
    for num, phrase in enumerate(predict_phrases):
        result = self.predict(data_reader, phrase, 100)
        print(f'Prediction {num+1} ({phrase}): \\n{result}\\n')
    print("*****************************************************************")
"""

predict_phrases = ["Hamlet", "To be", "poison", "King", "Queen"]

for num, phrase in enumerate(predict_phrases):
    result = rnn.predict(data_reader, phrase, 100)
    print(f'Prediction {num+1} ({phrase}): \\n{result}\\n')
print("*****************************************************************")
# """
# predicted = self.predict(data_reader, 'Ham ', 50)
# print(f'Epoch {epoches} prediction (Ham):\\n==========\\n {predicted}\\n==========')
# """
rnn.learning_rate /= 10
rnn.train(data_reader, func_string = func_string, max_epoch=500)


Prediction 1 (Hamlet): \nHamlet ils, he drand.
  Haws ank us her boff lord me maald thang in wiusess not, him wand gor; A, prom.   \n
Prediction 2 (To be): \nTo bed.
  '    Ose gant prere stamys a (Fy, of his daeld maebl. ave, cale fur
 O und. Add ster Nhatte dDe\n
Prediction 3 (poison): \npoison one mout eansDee,
    The shas fupct!s mn fos a shatd logt; in with,
    Buf fief thebr briinthfiid\n
Prediction 4 (King): \nKingais sto's and hpen a jor bute haag.
    Wht.
    On onom jukes pe hat a ket hamres,
    Youes, heal,\n
Prediction 5 (Queen): \nQueendgam. Bucgimn cont thasse unzirr thabwis ginithsat uach esh Giospe ger ham. ies sorr me in Hagk suti\n
*****************************************************************
Epoch 0, Batch 500, Previous Loss 88.1733177434875: 
Prediction 1 (Hamlet): 
Hamleth, nots. Wworllilivr mave st, yout pare, her a
    To gn thingount't, yore.

  Hoos
 

  shouggaingu

Prediction 2 (To be): 
To bedry maante hiomaceunsit a
    I mand I thare thas.
    Bure