# Project: RNN (Recurrent Neural Network) on TensorFlow

In this project, ...
This material is a re-write/repeat of material found in https://www.tensorflow.org/tutorials/sequences/text_generation

## Part 1: Import TensorFlow and Setup

In [3]:
from __future__ import absolute_import, division, print_function
import tensorflow as tf

# TensorFlow's eager execution is an imperative programming environment that evaluates operations immediately, without building graphs: 
# operations return concrete values instead of constructing a computational graph to run later.
tf.enable_eager_execution()
import os
import time
import numpy as np
import matplotlib.pyplot as plt

## Part 2: Load and Inspect the DataSet

In [38]:
# dowenload the Shakespeare dataset to the cache_dir ~/.keras
path_to_file = tf.keras.utils.get_file('shakespeare.txt','https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
print(path_to_file)

# read the dataset in Binary mode (rb)
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print("The length of text is {} characters".format(len(text)))
# see the first 200 charachters 
print(text[:200])

# understand the unique characters in the text
# get unique charachters in the text using set() and sort them in a list
ch =sorted(set(text))
print("There are {} unique charachters".format(len(ch)))
# type(ch)


/Users/Amir/.keras/datasets/shakespeare.txt
The length of text is 1115394 characters
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you
There are 65 unique charachters


list

## Part 3: Vectorize the Dataset

In [46]:
# Vectorize the text by mapping strings to a numerical representation

# using enumerate loop over ch, retrieve both the index and the value of each item, and make a dictionary of it
ch2idx = {u:i for i, u in enumerate(ch)}
print(ch2idx)

# transfrom text to integers
text_as_int = np.array([ch2idx[c] for c in text])
# inspect 
print ('{} mapped to -> {}'.format(repr(text[:13]), text_as_int[:13]))

{'\n': 0, ' ': 1, '!': 2, '$': 3, '&': 4, "'": 5, ',': 6, '-': 7, '.': 8, '3': 9, ':': 10, ';': 11, '?': 12, 'A': 13, 'B': 14, 'C': 15, 'D': 16, 'E': 17, 'F': 18, 'G': 19, 'H': 20, 'I': 21, 'J': 22, 'K': 23, 'L': 24, 'M': 25, 'N': 26, 'O': 27, 'P': 28, 'Q': 29, 'R': 30, 'S': 31, 'T': 32, 'U': 33, 'V': 34, 'W': 35, 'X': 36, 'Y': 37, 'Z': 38, 'a': 39, 'b': 40, 'c': 41, 'd': 42, 'e': 43, 'f': 44, 'g': 45, 'h': 46, 'i': 47, 'j': 48, 'k': 49, 'l': 50, 'm': 51, 'n': 52, 'o': 53, 'p': 54, 'q': 55, 'r': 56, 's': 57, 't': 58, 'u': 59, 'v': 60, 'w': 61, 'x': 62, 'y': 63, 'z': 64}
'First Citizen' mapped to -> [18 47 56 57 58  1 15 47 58 47 64 43 52]


## Part 3: Create Training Dataset

In [98]:
# use tf to split data into manageable sequences 
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

# make an array of the unique charachters 
idx2ch = np.array(ch)

# print examples of char_dataset; use idx2ch to find equivalent characters of the integers in char_dataset
for i in char_dataset.take(10):
    print(idx2ch[i])

# the maximum length sentence (chunks) we want for a single input in characters
seq_length = 100
# tip: //: divide with integral result (discard remainder)
examples_per_epoch = len(text)//seq_length
print(examples_per_epoch)

# use batch method to convert these individual characters to sequences/chunks of of the desired size (100)
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

# print examples of sequences to get an impression 
for i in sequences.take(1):
  print(idx2ch[i.numpy()])
  print(repr(''.join(idx2ch[i.numpy()])))

F
i
r
s
t
 
C
i
t
i
11153
['F' 'i' 'r' 's' 't' ' ' 'C' 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'B' 'e' 'f'
 'o' 'r' 'e' ' ' 'w' 'e' ' ' 'p' 'r' 'o' 'c' 'e' 'e' 'd' ' ' 'a' 'n' 'y'
 ' ' 'f' 'u' 'r' 't' 'h' 'e' 'r' ',' ' ' 'h' 'e' 'a' 'r' ' ' 'm' 'e' ' '
 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'A' 'l' 'l' ':' '\n' 'S' 'p' 'e' 'a'
 'k' ',' ' ' 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'F' 'i' 'r' 's' 't' ' ' 'C'
 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'Y' 'o' 'u' ' ']
'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [198]:
# define a function to shift our sequences/chunks to create target_text. For example, say seq_length is 4 and our text is "Hello". 
# The input sequence would be "Hell", and the target sequence "ello"
def split_input_target(piece):
    input_text = piece[:-1]
    target_text = piece[1:]
    return input_text, target_text

# we map our 100 long sequences to the function ro create shifted sequences 
sequences_s = sequences.map(split_input_target)

# decode and print an example of output to get an impression 
for input_example, target_example in sequences_s.take(2):
    print(repr(''.join(idx2ch[input_example.numpy()])))
    print(repr(''.join(idx2ch[target_example.numpy()]))) 


print('')
print(
    "Each index of these vectors are processed as one time step. \
For the input at time step 0, the model receives the index for F and \
trys to predict the index for i as the next character. At the next timestep, \
it does the same thing but the RNN considers the previous step context in addition to \
the current input character.")

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you '
're all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'

Each index of these vectors are processed as one time step. For the input at time step 0, the model receives the index for F and trys to predict the index for i as the next character. At the next timestep, it does the same thing but the RNN considers the previous step context in addition to the current input character.


In [199]:
# shuffle the data and pack it into training batches

# Batch size
batch_size = 64
steps_per_epoch = examples_per_epoch//batch_size
print(steps_per_epoch)

# TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. 
# instead,it maintains a buffer in which it shuffles elements)
BUFFER_SIZE = 10000

# batch() will make bathes of batch_size from sequences_s dataset
# shuffle() will allocate a buffer of size of batch_size for picking random entries from sequences_s
sequences_s = sequences_s.shuffle(batch_size).batch(batch_size , drop_remainder=True)
sequences_s

174


<DatasetV1Adapter shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

## Part 3: Build the RNN Model
Use tf.keras.Sequential to define the model. For this  example three layers are used to define our model:

- tf.keras.layers.Embedding: The input layer. A trainable lookup table that will map the numbers of each character to a vector with embedding_dim dimensions;
- tf.keras.layers.GRU: A type of RNN with size units=rnn_units 
- tf.keras.layers.Dense: The output layer, with vocab_size outputs.

In [200]:
# Set the model parameters 
# Length of the vocabulary in chars
vocab_size = len(ch)
# The embedding dimension
embedding_dim = 256
# Number of RNN units
rnn_units = 1024

In [201]:
# Use CuDNNGRU if running on GPU.
if tf.test.is_gpu_available():
  rnn = tf.keras.layers.CuDNNGRU
else:
  import functools
  rnn = functools.partial(
    tf.keras.layers.GRU, recurrent_activation='sigmoid')

In [202]:
# Define the RNN model

# Keras is a high-level API to build and train deep learning models
# tf.keras.Sequential: https://www.tensorflow.org/api_docs/python/tf/keras/models/Sequential#class_sequentialdef build_model(vocab_size, embedding_dim, rnn_units, batch_size):
# Word Embedding is collective term for models that learned to map a set of words or phrases in a vocabulary to vectors of numerical values.
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([tf.keras.layers.Embedding(vocab_size, embedding_dim,batch_input_shape=[batch_size, None]),
                                 rnn(rnn_units,return_sequences=True,recurrent_initializer='glorot_uniform',stateful=True),
                                 tf.keras.layers.Dense(vocab_size)])
    return model


In [203]:
# from PIL import Image                                                                                
# img = Image.open('text_generation_training.png')
# print("# For each character the model looks up the embedding, runs the GRU one timestep with the embedding as input, and applies the dense layer to generate logits predicting the log-liklihood of the next character:")
# img.show()

In [208]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size)

In [209]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_15 (Embedding)     (64, None, 256)           16640     
_________________________________________________________________
gru_8 (GRU)                  (64, None, 1024)          3935232   
_________________________________________________________________
dense_8 (Dense)              (64, None, 65)            66625     
Total params: 4,018,497
Trainable params: 4,018,497
Non-trainable params: 0
_________________________________________________________________


In [211]:
# Try the "untrained" model

# check the shape of the output
for input_example, target_example in sequences_s.take(1):
    target_example_prediction = model(input_example)
    print(target_example_prediction.shape,"# (batch_size, sequence_length, vocab_size)")

# try it for the first example in the batch    
sampled_indices = tf.random.categorical(target_example_prediction[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


## Part 4: 