In [52]:
import numpy as np

In [35]:
class RNN:
    def __init__(self, hidden_size, vocab_size):
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.Wxh = np.random.randn(vocab_size, self.hidden_size) * 0.01
        self.Whh = np.random.randn(self.hidden_size, self.hidden_size) * 0.01
        self.Why = np.random.randn(self.hidden_size, vocab_size) * 0.01
        self.bh = np.zeros((self.hidden_size, 1))
        self.by = np.zeros((vocab_size, 1))

    def forward(self, inputs, h_prev):
        h_next = np.tanh(np.dot(self.Wxh.T, inputs) + np.dot(self.Whh, h_prev) + self.bh) #hidden_size x 1
        y = np.dot(self.Why.T, h_next) + self.by # vocab_size x 1
        probs = np.exp(y) / np.sum(np.exp(y))
        return probs, h_next


    def train(self, inputs, targets, epochs, lr):
      inputs_one_hot = [self.one_hot_encode(c) for c in inputs]
      targets_one_hot = [self.one_hot_encode(c) for c in targets]

      loss_history = []

      for epoch in range(epochs):
        h_prev = np.zeros((self.hidden_size, 1))
        loss = 0

        for t in range(len(inputs)):
          x = inputs_one_hot[t]
          target = targets_one_hot[t]

          p, h_prev = self.forward(x, h_prev)

          target_idx = np.where(target == 1)[0]

          loss += -np.log(p[target_idx])

          dL_dy = p #vocab_size x 1
          dL_dy[target_idx] -= 1


          dL_dWhy = np.dot(h_prev, dL_dy.T) #hidden_size x vocab_size
          dL_dby = np.copy(dL_dy) #vocab_size x 1

          dL_dh = np.dot(dL_dWhy, dL_dby) #hidden_size x 1
          dL_dh_raw = (1 - h_prev * h_prev) * dL_dh

          dL_dWxh = np.dot(x, dL_dh_raw.T) #vocab x hidden

          dL_dWhh = np.dot(h_prev, dL_dh.T)
          dL_dbh = dL_dh_raw

          self.Wxh -= lr * dL_dWxh
          self.Whh -= lr * dL_dWhh
          self.Why -= lr * dL_dWhy
          self.bh -= lr * np.sum(dL_dbh, axis=1, keepdims=True)
          self.by -= lr * np.sum(dL_dby, axis=1, keepdims=True)

        loss_history.append(loss/len(inputs))
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss_history[-1]}")
      return loss_history

    def sample(self, seed, length):
      x = self.one_hot_encode(seed)
      h_prev = np.zeros((self.hidden_size, 1))
      output = seed

      for _ in range(length):
        p, h_prev = self.forward(x, h_prev)

        idx = np.random.choice(range(self.vocab_size), p = p.ravel())
        char = self.index_to_char[idx]
        output += char
        x = self.one_hot_encode(char)
      return output

    def one_hot_encode(self, char):
      vector = np.zeros((self.vocab_size, 1))
      vector[self.char_to_index[char]] = 1

      return vector

In [51]:
input_chars = ['h', 'e', 'l', 'p']
target_chars = ['e', 'l', 'p', '!']

chars = list(set(input_chars + target_chars))
char_to_index = {ch: i for i, ch in enumerate(chars)}
index_to_char = {i: ch for i, ch in enumerate(chars)}

hidden_size = 100
vocab_size = len(chars)
learning_rate = 0.1
num_epochs = 100

rnn = RNN(hidden_size, vocab_size)
rnn.char_to_index = char_to_index
rnn.index_to_char = index_to_char
loss_history = rnn.train(input_chars, target_chars, num_epochs, learning_rate)

seed = 'h'
generated_sequence = rnn.sample(seed, length=4)
print(f"Generated Sequence: {generated_sequence}")

Epoch 1/100, Loss: [[1.63955575]]
Epoch 2/100, Loss: [[1.62120632]]
Epoch 3/100, Loss: [[1.60552031]]
Epoch 4/100, Loss: [[1.59198321]]
Epoch 5/100, Loss: [[1.58020636]]
Epoch 6/100, Loss: [[1.56988768]]
Epoch 7/100, Loss: [[1.56078797]]
Epoch 8/100, Loss: [[1.55271535]]
Epoch 9/100, Loss: [[1.54551431]]
Epoch 10/100, Loss: [[1.53905769]]
Epoch 11/100, Loss: [[1.53324072]]
Epoch 12/100, Loss: [[1.52797648]]
Epoch 13/100, Loss: [[1.52319239]]
Epoch 14/100, Loss: [[1.51882748]]
Epoch 15/100, Loss: [[1.51483026]]
Epoch 16/100, Loss: [[1.51115699]]
Epoch 17/100, Loss: [[1.50777037]]
Epoch 18/100, Loss: [[1.50463842]]
Epoch 19/100, Loss: [[1.50173358]]
Epoch 20/100, Loss: [[1.49903205]]
Epoch 21/100, Loss: [[1.49651315]]
Epoch 22/100, Loss: [[1.49415886]]
Epoch 23/100, Loss: [[1.49195342]]
Epoch 24/100, Loss: [[1.48988299]]
Epoch 25/100, Loss: [[1.48793538]]
Epoch 26/100, Loss: [[1.48609981]]
Epoch 27/100, Loss: [[1.48436675]]
Epoch 28/100, Loss: [[1.48272768]]
Epoch 29/100, Loss: [[1.48117