## This piece of code is used to enable using tf.random.categorical

In [0]:
!pip install -q tf-nightly

In [0]:
import tensorflow as tf
tf.enable_eager_execution()

import numpy as np
import os
import time

## First, we read the text data and then creat its corresponding vocabulary containig its unique characters. After that two mappings are introduced to convert each character to its index in the dictionary and vice versa.
## Last line has the responsibility to convert the whole text to a its index form, i.e. each char is represented by a number in text_as_int.

In [0]:
text = open('Book.txt').read()
vocab = sorted(set(text))
# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

## Here we define 40 character window for each sentence and number of examples per epoch based on that

In [0]:
# The maximum length sentence we want for a single input in characters
seq_length = 40
examples_per_epoch = len(text)//seq_length

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

## The batch method lets us easily convert these individual characters to sequences of the desired size.

In [0]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

## For each sequence, I duplicate and shift it to form the input and target text by using the map to apply a function to each batch:

In [0]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

## Shuffling and packing data into batches

In [0]:
# Batch size 
BATCH_SIZE = 64
steps_per_epoch = examples_per_epoch//BATCH_SIZE

BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

## Defining the model using Keras and testing whether there is GPU available (since I am using Google Colab)
## I use GRU with sigmoid activation function.

In [0]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension 
embedding_dim = 256

# Number of RNN units
rnn_units = 128

if tf.test.is_gpu_available():
  rnn = tf.keras.layers.CuDNNGRU
else:
  import functools
  rnn = functools.partial(
    tf.keras.layers.GRU, recurrent_activation='sigmoid')

## tf.keras.layers.Dense: The output layer, with vocab_size outputs.

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, 
                              batch_input_shape=[batch_size, None]),
    rnn(rnn_units,
        return_sequences=True, 
        recurrent_initializer='glorot_uniform',
        stateful=True),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

## For each character the model looks up the embedding, runs the GRU one timestep with the embedding as input, and applies the dense layer to generate logits predicting the log-liklihood of the next character

In [10]:
model = build_model(
  vocab_size = len(vocab), 
  embedding_dim=embedding_dim, 
  rnn_units=rnn_units, 
  batch_size=BATCH_SIZE)

Instructions for updating:
Colocations handled automatically by placer.


In [11]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           19712     
_________________________________________________________________
gru (GRU)                    (64, None, 128)           147840    
_________________________________________________________________
dense (Dense)                (64, None, 77)            9933      
Total params: 177,485
Trainable params: 177,485
Non-trainable params: 0
_________________________________________________________________


# Training
We use the previous RNN state, and the input of this time step to predict the class of the next character.

We also define the loss function.

In [0]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

## Configuring the training procedure using the Model.compile method

In [0]:
learning_rate = 0.01
model.compile(optimizer = tf.train.RMSPropOptimizer(learning_rate), loss = loss)

## Saving Checkpoints during training.

In [0]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [0]:
EPOCHS=20

In [16]:
history = model.fit(dataset.repeat(), epochs=EPOCHS, steps_per_epoch=steps_per_epoch, callbacks=[checkpoint_callback])

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# Generating Text

## Restoring the latest checkpoint

### Tensorflow note: 
Because of the way the RNN state is passed from timestep to timestep, the model only accepts a fixed batch size once built.

To run the model with a different batch_size, we need to rebuild the model and restore the weights from the checkpoint.

In [17]:
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints/ckpt_20'

In [0]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [23]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (1, None, 256)            19712     
_________________________________________________________________
gru_2 (GRU)                  (1, None, 128)            147840    
_________________________________________________________________
dense_2 (Dense)              (1, None, 77)             9933      
Total params: 177,485
Trainable params: 177,485
Non-trainable params: 0
_________________________________________________________________


# The prediction loop

## The following code block generates the text:

### It Starts by choosing a start string, initializing the RNN state and setting the number of characters to generate.

### Get the prediction distribution of the next character using the start string and the RNN state.

### Then, use a multinomial distribution to calculate the index of the predicted character. Use this predicted character as our next input to the model.

### The RNN state returned by the model is fed back into the model so that it now has more context, instead than only one word. After predicting the next word, the modified RNN states are again fed back into the model, which is how it learns as it gets more context from the previously predicted words.

In [0]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 400

  # You can change the start string to experiment
  start_string = 'It is a truth univer'

  # Converting our start string to numbers (vectorizing) 
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a multinomial distribution to predict the word returned by the model
      predictions = predictions / temperature
      predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy()
      
      # We pass the predicted word as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)
      
      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [25]:
print(generate_text(model, start_string="Why, my dear, you mu"))

Instructions for updating:
Use tf.random.categorical instead.
It is a truth univernes of his aspection, well, the same own hon he felt
I could hape-ply reparing him into the leasure.”

Elizabeth of there was. It mintinged. The who length; which and though that
his partia here most an is unnewisture, you will an, eless. We could nature
And the prodeigabld undest,
as she were having to thinkinable monumple, or a compart it
was the now prode cir
this
cothers prompt impromant of Mr
