In [None]:
import numpy as np
class Data:
    def __init__(self,path, seq_length):
        self.fp = open(path,'r')
        self.data = self.fp.read()
        character = list(set(self.data))
        self.char_to_index = { ch:i for (i,ch) in enumerate(character)}
        self.index_to_char = { i:ch for (i,ch) in enumerate(character)}
        self.data_size = len(self.data)
        self.vocab_size = len(character)
        self.tracker = 0
        self.seq_length = seq_length

    def next_batch(self):
        input_start = self.tracker
        input_end = self.tracker + self.seq_length
        inputs = [self.char_to_index[ch] for ch in self.data[input_start:input_end]]
        targets = [self.char_to_index[ch] for ch in self.data[input_start+1 :input_end +1]]
        self.tracker += self.seq_length
        if self.tracker + self.seq_length >= self.data_size:
            self.tracker = 0

        return inputs, targets

    def reset_tracker(self):
        return self.tracker == 0

    def close_file(self):
        self.fp.close()

class RNN:
    def __init__(self, hidden_size, vocab_size, seq_length, learning_rate):
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.seq_length = seq_length
        self.learning_rate = learning_rate

        self.W_ih = np.random.uniform(-np.sqrt(1./vocab_size),np.sqrt(1./vocab_size), (hidden_size, vocab_size))
        self.W_hh = np.random.uniform(-np.sqrt(1./hidden_size), np.sqrt(1./vocab_size), (hidden_size, hidden_size) )
        self.W_ho = np.random.uniform(-np.sqrt(1./hidden_size), np.sqrt(1./hidden_size), (vocab_size, hidden_size))
        # self.W_ih = np.random.randn(hidden_size, vocab_size) * 0.01
        # self.W_hh = np.random.randn(hidden_size, hidden_size) * 0.01
        # self.W_ho = np.random.randn(vocab_size, hidden_size) * 0.01

        self.h = np.zeros((hidden_size , 1))
        self.o = np.zeros((vocab_size, 1))

        self.mW_ih = np.zeros_like(self.W_ih)
        self.mW_hh = np.zeros_like(self.W_hh)
        self.mW_ho = np.zeros_like(self.W_ho)
        self.mbh = np.zeros_like(self.h)
        self.moh = np.zeros_like(self.o)

    def soft_max(self, x):
        p = np.exp(x - np.max(x))
        return p/np.sum(p)

    def forward(self, inputs, hprev):
        xs, hs, os, ps = {},{},{},{}
        hs[-1] = np.copy(hprev)
        for i in range(len(inputs)):
            xs[i] = np.zeros((self.vocab_size, 1))
            xs[i][inputs[i]]= 1 
            hs[i] = np.tanh(np.dot(self.W_ih,xs[i]) + np.dot(self.W_hh, hs[i-1]) + self.h)
            os[i] = np.dot(self.W_ho,hs[i]) + self.o
            ps[i] = self.soft_max(os[i])

        return xs,hs,ps

    def backward(self, xs, hs, ps, targets):
        dW_ih, dW_hh, dW_ho = np.zeros_like(self.W_ih),np.zeros_like(self.W_hh), np.zeros_like(self.W_ho)
        dbh, dbo = np.zeros_like(self.h), np.zeros_like(self.o)
        dhnext = np.zeros_like(hs[0])
        for i in reversed(range(self.seq_length)):
            dy = np.copy(ps[i])
            dy[targets[i]] -= 1
            dW_ho += np.dot(dy,hs[i].T)
            dbo += dy
            dh = np.dot(self.W_ho.T,dy) + dhnext 
            dhraw = (1 - hs[i]*hs[i])*dh
            dbh += dhraw
            dW_ih += np.dot(dhraw,xs[i].T)
            dW_hh += np.dot(dhraw,hs[i-1].T)
            dhnext = np.dot(self.W_hh.T, dhraw)

        for dparameters in [dW_ih, dW_hh,dW_ho,dbh,dbo]:
            np.clip(dparameters, -5,5,out=dparameters )

        return dW_ih,dW_hh,dW_ho,dbh,dbo


    def loss(self, ps, targets):
        return sum(-np.log(ps[i][targets[i],0]) for i in range(self.seq_length))

    def update_model(self,dW_ih, dW_hh, dW_ho,dbh, dbo):
        for param, dparam, mem in zip([self.W_ih,self.W_hh,self.W_ho, self.h, self.o],
                                     [dW_ih, dW_hh, dW_ho,dbh, dbo],
                                      [self.mW_ih, self.mW_hh, self.mW_ho, self.mbh, self.moh]):
            mem += dparam*dparam
            param += -self.learning_rate*dparam/np.sqrt(mem + 1e-8)

            #adagrad optimiser

    def sample(self, h, seed_ix, n):
        x = np.zeros((self.vocab_size, 1))
        
        x[seed_ix] = 1
        # print(seed_ix)
        ixes = []
        for i in range(n):
            h = np.tanh(np.dot(self.W_ih, x) + np.dot(self.W_hh, h) + self.h)
            y = np.dot(self.W_ho,h) + self.o
            # print(h)
            p = np.exp(y) / np.sum(np.exp(y))
            # print(p)
            ix = np.random.choice(range(self.vocab_size), p = p.ravel())
            x = np.zeros((self.vocab_size, 1))
            x[ix] = 1
            ixes.append(ix)

        return ixes

    def train(self, data_reader):
        iter_num = 0
        threshold = 0.01
        smooth_loss = -np.log(1.0/data_reader.vocab_size)*self.seq_length
        while (smooth_loss > threshold):
            if data_reader.reset_tracker():
                hprev = np.zeros((self.hidden_size, 1))

            inputs, targets = data_reader.next_batch()
            xs, hs, ps = self.forward(inputs, hprev)
            dW_ih,dW_hh,dW_ho,dbh,dbo = self.backward(xs, hs, ps, targets)
            loss = self.loss(ps, targets)
            self.update_model(dW_ih,dW_hh,dW_ho,dbh,dbo)
            smooth_loss = smooth_loss*0.999 + loss*0.001
            hprev = hs[self.seq_length - 1]
            if not iter_num%500:
                sample_ix = self.sample(hprev, inputs[0], 200)
                
                sample_text = ''.join(data_reader.index_to_char[ix] for ix in sample_ix)
                print(f'{" " * 500}')  
                print(f'Iteration: {iter_num}, Loss: {smooth_loss:.4f} ')
                print(f'Sample: {sample_text}')
                

            iter_num += 1

    def predict(self, data_reader, start, n):

        x = np.zeros(self.vocab_size, 1)
        chars = [ch for ch in start]
        ixes = []
        for i in range(len(chars)):
            ix = data_reader.char_to_index[chars[i]]
            x[ix] = 1
            ixes.append(ix)

        h = np.zeros((self.hidden_size, 1))
        for i in range(n):
            h = np.tanh(np.dot(self.W_ih, x) + np.dot(self.W_hh, h) + self.h)
            y = np.dot(self.W_ho, h) + self.o
            p = np.exp(y) / np.sum(np.exp(y))
            ix = np.random.choice(range(self.vocab_size), p=p.ravel())
            x = np.zeros(self.vocab_size, 1)
            x[ix] = 1
            ixes.append(ix)

        txt = ''.join(data_reader.index_to_char[i] for i in ixes)
        return txt



seq_length = 25
data_reader = Data("saks.txt",seq_length)
rnn = RNN(hidden_size=100, vocab_size=data_reader.vocab_size, seq_length=seq_length,learning_rate=1e-1)
rnn.train(data_reader)        
            

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
Iteration: 0, Loss: 104.3597 
Sample: Bhn k'Rl.Nnwsn ppntn!G!dcuzopzprpHMASlR&tr
oon
rXodHx;$tonfsnpo yWE'$Wsprn
:pnwB,k!JJQnn rii:CoP-lW.yQVorzn
:
3cWM.NRn:wnBfGnvSqVNKtenrfnrp oHQgJkOC
en pnff&N.Qv'sepwwpJIBvAUEazf mip:r:wq-cKUJonp annr
                                                                                                                                                                                                                                                                    