<a href="https://colab.research.google.com/github/jordancoil/colab_notebooks/blob/main/RNN_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

The basic idea of an RNN is that the activations of the current hidden layer depend on:
- the current input
- the activations of the previous hidden layer

Things to keep in mind:
- for the first hidden layer, what are the previous activations?
  - h_0 is just zeros
- how does "Backpropagation Through Time" work?
  - (Werbos 1974, Rumelhart et al. 1986, Werbos 1990)

History
- Simple RNNs originally from Elman, 1990 (Elman Networks)

In [64]:
# numerically stable sigmoid: https://stackoverflow.com/questions/51976461/optimal-way-of-defining-a-numerically-stable-sigmoid-function-for-a-list-in-pyth
def sigmoid(x):
    return np.where(x >= 0, 
                    1 / (1 + np.exp(-x)), 
                    np.exp(x) / (1 + np.exp(x)))

class RNN():
  def __init__(self, input_size, seq_len, hidden_size, output_size):
    # initialize weights
    # x_t to hidden_t
    w_upper, w_lower = -(1.0 / np.sqrt(input_size)), (1.0 / np.sqrt(input_size))  # Xavier initialization for sigmoid activation
    self.W = np.random.randn(input_size, hidden_size)
    self.W = w_lower + self.W * (w_upper - w_lower)

    # hidden_t-1 to hidden_t
    u_upper, u_lower = -(1.0 / np.sqrt(hidden_size)), (1.0 / np.sqrt(hidden_size))  # Xavier initialization for sigmoid activation
    self.U = np.random.randn(hidden_size, hidden_size)
    self.U = u_lower + self.U * (u_upper - u_lower)

    # hidden_t to y_t
    self.V = np.random.randn(hidden_size, output_size)

    # initialize containers for activations, etc.
    # self.input_activations = np.zeros((seq_len, hidden_size))
    self.ht = np.zeros((seq_len, hidden_size))
    self.yt = np.zeros((seq_len, output_size))
    # ...?

    self.activation_fn = sigmoid
    return

  
  def forward(self, x):
    for t in range(1, len(x)):
      Uh_tsub1 =  self.U * self.ht[t-1]
      Wx_t = self.W * x[t]
      self.ht[t] = self.activation_fn(Uh_tsub1 + Wx_t)
      self.yt[t] = self.softmax(self.V * self.ht[t])

  
  def backward(self):
    # TODO
    return

  def softmax(self, x):
    # TODO
    return

In [65]:
import nltk
nltk.download('treebank')

[nltk_data] Downloading package treebank to /root/nltk_data...
[nltk_data]   Package treebank is already up-to-date!


True

In [66]:
# see: https://www.nltk.org/howto/corpus.html
from nltk.corpus import treebank

In [67]:
import itertools
from collections import defaultdict
from nltk.tokenize import TreebankWordTokenizer

tokenizer = TreebankWordTokenizer().tokenize

def create_vocab(sents):

  token2id = defaultdict(itertools.count(0).__next__)

  [token2id[c] for sent in sents for c in tokenizer(" ".join(sent))]
  id2token = {v: k for k, v in token2id.items()}

  return token2id, id2token

token2id, id2token = create_vocab(treebank.sents())

In [54]:
len(token2id), len(id2token)

(12409, 12409)

In [55]:
# TODO determine max seq len in dataset
max_seq_len = 0

for sent in treebank.sents():
  seq_len = len(sent)
  if seq_len > max_seq_len:
    max_seq_len = seq_len

max_seq_len

271

In [None]:
!pip install sentence_transformers

In [62]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('flax-sentence-embeddings/all_datasets_v4_MiniLM-L6')

384

In [63]:
len(id2token)

12409

In [70]:
embbedding = model.encode(" ".join(treebank.sents()[0]))
embbedding.shape[0]

rnn = RNN(embedding.shape[0], max_seq_len, 16, len(id2token))