<a href="https://colab.research.google.com/github/4sunshine/dl-unn.github.io/blob/master/practice/rnn_lecture.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Input, SimpleRNN, Dense, TimeDistributed, Softmax, GRU
from tensorflow.keras.models import Sequential

In [2]:
# MAIN GOAL OF THIS TASK IS TO GET A STABLE PREDICTION OF 'HELLO'
# FROM SINGLE SYMBOL 'H' MODEL INPUT
# ANY YOUR IDEAS ARE WELCOME

tf.random.set_seed(42)

ALL_TEXTS = ['HELLO']

vocab = sorted({char for word in ALL_TEXTS for char in word})
print(vocab)

id2char = {i: s for i, s in enumerate(vocab)}
char2id = {s: i for i, s in enumerate(vocab)}

def onehot(char):
  # NP.ARRAY [VOCAB_SIZE] ex. [0, 0, 0, 1]
  return np.array([1 if char2id[char]==i else 0 for i in range(len(vocab))])
  
def text2hots(text):
  # NP.ARRAY [NUM LETTERS, VOCAB_SIZE] ex. [[1, 0, 0, 0], [0, 1, 0, 0]]
  return np.array([onehot(char) for char in text], dtype=np.float64)

['E', 'H', 'L', 'O']


In [3]:
def build_model(vocab_size, HIDDEN_SIZE=3):
  # YOU CAN PLAY WITH HIDDEN_SIZE
  # OR USE LSTM instead of SimpleRNN
  # OR USE different ACTIVATIONS, play with BIAS
  # USE EMBEDDING LAYER BEFORE RNN
  # PUT THE SECOND RNN HERE

  model = Sequential()

  # RNN_INPUT: [BATCH_SIZE=1, NUM_LETTERS (ARBITRARY), VOCABULARY SIZE]
  model.add(SimpleRNN(HIDDEN_SIZE, input_shape=(None, vocab_size),
                      return_sequences=True,
                      use_bias=True,
                      activation='tanh'))
  # FULLY CONNECTED LAYER
  # RETURNS LOGITS TENSOR [BATCH_SIZE=1, NUM_LETTERS, VOCABULARY_SIZE]
  model.add(Dense(vocab_size))
  model.compile()
  model.summary()
  return model

def generate_text(model, start_symbol):
  text = start_symbol
  hot = text2hots(start_symbol)

  # CONVERT TO SHAPE [1 (BATCH_SIZE), NUM_LETTERS (TIMESTEPS),
  # X_VECTOR_SIZE (VOCAB_SIZE)]
  hot = np.expand_dims(hot, 0)

  model.reset_states()
  for _ in range(4):
    prediction = model(hot)

    # REMOVE BATCH DIMENSION
    prediction = tf.squeeze(prediction, 0)

    # ID OF LETTER IN VOCABULARY
    predicted_id = tf.math.argmax(prediction, axis=-1).numpy()[0]

    predicted_symbol = id2char[predicted_id]
    text += predicted_symbol

    # ONE-HOT ENCODE LAST PREDICTED SYMBOL
    hot = text2hots(predicted_symbol)

    # CONVERT TO SHAPE [1 (BATCH_SIZE), NUM_LETTERS (TIMESTEPS),
    # X_VECTOR_SIZE (VOCAB_SIZE)]
    hot = np.expand_dims(hot, 0)
  return text

# YOU CAN CREATE YOUR OWN TRAINING PIPELINE SPECIAL FOR THIS TASK
# e.g. PREDICT TARGET LETTER IN FOR LOOP
@tf.function
def train_step(model, opt, inp, target):
    with tf.GradientTape() as tape:
        predictions = model(inp)

        # CALCULATE PREDICTED SEQUENCE LOSS
        loss = tf.reduce_mean(
            tf.keras.losses.categorical_crossentropy(target,
                                                     predictions,
                                                     from_logits=True),
                              axis=-1)

    # CALCULATE GRADIENTS FROM LOSS FUNCTION        
    grads = tape.gradient(loss, model.trainable_variables)
    # APPLY GRADIENTS TO MODEL
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss

In [4]:
# OPTIMIZER
# YOU CAN CHANGE OPTIMIZER AND PLAY WITH ITS LEARNING RATE
optimizer = tf.keras.optimizers.Adam(learning_rate=0.05)

# OUR DATASET
text_input = 'HELL'
text_output = 'ELLO'

# YOU CAN MODIFY DATASET WITH SHIFTED SEQUENCES LIKE
# input 'ELLO', target 'LLOH'
# input 'LLOH', target 'LOHE' etc.

model = build_model(len(vocab))

for epoch in range(50):
  # CURRENTLY EPOCH CONSISTS ONLY OF 1 TRAINING EXAMPLE

  # RESET INITIAL HIDDEN STATE
  model.reset_states()
  
  # X AND Y TO ONE-HOT WITH SHAPE [1, NUM_LETTERS, LEN_VOCAB]
  x = np.expand_dims(text2hots(text_input), 0)

  y = np.expand_dims(text2hots(text_output), 0)

  # TRAIN MODEL WITH ONE STEP
  loss = train_step(model, optimizer, x, y)

  # VALIDATE MODEL
  if epoch % 2 == 0:
    print(f'epoch: {epoch}, loss: {loss.numpy()}')
    text = generate_text(model, 'H')
    print(text)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, None, 3)           24        
_________________________________________________________________
dense (Dense)                (None, None, 4)           16        
Total params: 40
Trainable params: 40
Non-trainable params: 0
_________________________________________________________________
loss: [1.8906832]
HLEEE
loss: [1.4843457]
HLELE
loss: [1.2807729]
HLLLL
loss: [1.0895083]
HLLLL
loss: [0.9175042]
HLOLO
loss: [0.79906666]
HLOLO
loss: [0.69465196]
HLOLO
loss: [0.6000497]
HLLLL
loss: [0.51886374]
HLLLL
loss: [0.45230272]
HELLL
loss: [0.39407352]
HELLL
loss: [0.3351027]
HELLL
loss: [0.27923965]
HELLL
loss: [0.23096496]
HELLL
loss: [0.19076142]
HELLL
loss: [0.15795903]
HELLL
loss: [0.13147989]
HELLL
loss: [0.11020262]
HELLL
loss: [0.09313775]
HELOE
loss: [0.07945786]
HELOE
loss: [0.06847647]
HELOE
loss