<a href="https://colab.research.google.com/github/annamaartensson/dd2424project/blob/issue%2F2b/rnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf

In [33]:
class AdaGrad:

  def __init__(self, eta, grads_acc):
    self.eta = eta
    self.grads_acc = grads_acc

  def step(self, field, grads):
    tf.clip_by_value(getattr(grads, field), clip_value_min = -5, clip_value_max = 5)
    getattr(self.grads_acc, field).assign(getattr(self.grads_acc, field) + tf.math.square(getattr(grads, field)))
    return -self.eta*tf.math.divide(getattr(grads, field), tf.math.sqrt(getattr(self.grads_acc, field)+1e-8))

In [34]:
class RNN:

  class Grads:

    def __init__(self, m, K):
      self.b = tf.Variable(tf.zeros_initializer()(shape = (m)))
      self.c = tf.Variable(tf.zeros_initializer()(shape = (K)))
      self.U = tf.Variable(tf.zeros_initializer()(shape = (m, K)))
      self.W = tf.Variable(tf.zeros_initializer()(shape = (m, m)))
      self.V = tf.Variable(tf.zeros_initializer()(shape = (K, m)))

    def compute(self):
      pass

  def __init__(self, data, seq_length, m, K, sig):
    self.data = data
    self.seq_length = seq_length
    self.m = m
    self.K = K
    self.b = tf.Variable(tf.zeros_initializer()(shape = (m)))
    self.c = tf.Variable(tf.zeros_initializer()(shape = (K)))
    self.U = tf.Variable(tf.random_normal_initializer(mean=0.0, stddev=sig)(shape = (m, K)))
    self.W = tf.Variable(tf.random_normal_initializer(mean=0.0, stddev=sig)(shape = (m, m)))
    self.V = tf.Variable(tf.random_normal_initializer(mean=0.0, stddev=sig)(shape = (K, m)))
    self.hprev = tf.Variable(tf.zeros_initializer()(shape = (m)))

  def forwardPass(self, X, Y):
      P = tf.Variable(tf.zeros_initializer()(shape = (self.K, self.seq_length)))
      H = tf.Variable(tf.zeros_initializer()(shape = (self.m, self.seq_length+1)))
      A = tf.Variable(tf.zeros_initializer()(shape = (self.m, self.seq_length)))
      H[:,0].assign(self.h0)
      for t in range(self.seq_length):
        A[:,t].assign(tf.tensordot(self.W, H[:,t], 1) + tf.tensordot(self.U, X[:,t], 1) + self.b)
        H[:,t+1].assign(tf.math.tanh(A[:,t]))
        P[:,t].assign(tf.raw_ops.Softmax(logits = (tf.tensordot(self.V, H[:,t+1], 1) + self.c)))
      return P, H, A

  def backwardPass(self, X, Y, P, H, A):
    grads = RNN.Grads(self.m, self.K)

    G = tf.transpose(Y-P) * -1

    #dL/dc
    grads.c.assign(tf.reduce_sum(G, axis=0))

    #dL/dV
    for i in range(self.seq_length):
      grads.V.assign(grads.V+tf.tensordot(G[i,:],H[:,i],0))

    #dL/dh and dL/da, in grad_H and grad_A each row represents a timestep
    grad_H = tf.Variable(tf.zeros_initializer()(shape = (self.seq_length, self.m)))
    grad_A = tf.Variable(tf.zeros_initializer()(shape = (self.seq_length, self.m)))
    grad_H[-1,:].assign(tf.tensordot(tf.reshape(G[-1,:], (1,-1)),self.V,1))
    for i in range(self.seq_length):
      grad_A[-i,:].assign(tf.tensordot(tf.reshape(grad_H[-i,:],(1,-1)),tf.linalg.diag(tf.ones(self.m)-tf.math.square(tf.math.tanh(A[:,-i]))),1))
      grad_H[-i-1,:].assign(tf.tensordot(tf.reshape(G[-i-1,:], (1,-1)),self.V,1)+tf.tensordot(tf.reshape(grad_A[-i,:], (1,-1)),self.W,1))
    grad_A[0,:].assign(tf.tensordot(tf.reshape(grad_H[0,:],(1,-1)),tf.linalg.diag(tf.ones(self.m)-tf.math.square(tf.math.tanh(A[:,0]))),1))

    #dL/db
    grads.b.assign(tf.reduce_sum(grad_A, axis=0))

    #dL/dW
    for i in range(self.seq_length):
      grads.W.assign(grads.W+tf.tensordot(grad_A[i,:],H[:,i],0))

    #dL/dU
    for i in range(self.seq_length):
      grads.U.assign(grads.U+tf.tensordot(grad_A[i,:],X[:,i],0))

    return grads

  def loss(self, Y, P):
    L = 0
    for t in range(self.seq_length):
        L -= tf.tensordot(Y[:,t], tf.math.log(P[:,t]), 1)
    return L

  def train(self, eta, steps):
    e = 0
    grads_acc = RNN.Grads(self.m, self.K)
    optimizer = AdaGrad(eta, grads_acc)
    for iter in range(steps):
      X = self.data[:,e:e+self.seq_length]
      Y = self.data[:,e+1:e+self.seq_length+2]
      P, H, A = self.forwardPass(X)
      L = self.loss(Y, P)
      print(f"iter = {iter}, loss = {L}")
      grads = self.backwardPass(X, Y, P, H, A)
      for field in vars(grads):
        getattr(self, field).assign(getattr(self, field)+optimizer.step(field, grads))
      e = e+1
      if e > len(self.data)-self.seq_length-2:
        e = 0
        self.hprev.assign(tf.zeros((m)))
      else:
        self.hprev.assign(H[:,self.seq_length])

In [14]:
#path_to_file = tf.keras.utils.get_file('prideandprejudice.txt', 'https://www.gutenberg.org/cache/epub/42671/pg42671.txt')
#text = list(open(path_to_file, 'rb').read().decode(encoding='utf-8'))
text = list(open('goblet_book.txt', 'rb').read().decode(encoding='utf-8'))


In [20]:
vocabulary = sorted(set(text))
char_to_ind = tf.keras.layers.StringLookup(vocabulary = list(vocabulary), mask_token = None)
ind_to_char = tf.keras.layers.StringLookup(vocabulary = char_to_ind.get_vocabulary(), invert = True, mask_token = None)

In [29]:
seq_length = 25
m = 100
K = len(vocabulary)
sig = 0.01
data = tf.transpose(tf.one_hot(char_to_ind(text), K))

In [35]:
rnn = RNN(data, seq_length, m, K, sig)
rnn.train(0.1, 10)
#X = data[:,0:seq_length]
#Y = data[:,1:seq_length+1]
#P, H, A = rnn.forwardPass(X)
#print(rnn.loss(Y, P))

iter = 0, loss = 109.54740142822266
iter = 1, loss = 109.54781341552734
iter = 2, loss = 109.54824829101562
iter = 3, loss = 109.54658508300781
iter = 4, loss = 109.54887390136719
iter = 5, loss = 109.5482177734375
iter = 6, loss = 109.54767608642578
iter = 7, loss = 109.54552459716797
iter = 8, loss = 109.54651641845703
iter = 9, loss = 109.54473114013672


In [18]:
x = 1e-8
print(type(x))

<class 'float'>
