In [22]:
import tensorflow as tf
from tensorflow.keras import datasets, preprocessing, models, layers, optimizers, losses
import matplotlib.pyplot as plt
import numpy as np
import time
import os

In [23]:
# Read star wars movie script
text = open('data/star_wars_episode_vi.txt', 'rb').read().decode(encoding='utf8')
print(text[:250])

SHUTTLE CAPTAIN: Command station, this is ST 321. Code Clearance Blue. We're starting our approach. Deactivate the security shield.DEATH STAR CONTROLLER: The security deflector shield will be deactivated when we have confirmation of your code transmi


In [24]:
# Check unique characters
vocab = sorted(set(text))

# Create mapping for unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])
print(text_as_int[:250])

[38 27 40 39 39 31 24  1 22 20 35 39 20 28 33 17  1 22 60 58 58 46 59 49
  1 64 65 46 65 54 60 59  6  1 65 53 54 64  1 54 64  1 38 39  1 13 12 11
  8  1 22 60 49 50  1 22 57 50 46 63 46 59 48 50  1 21 57 66 50  8  1 42
 50  5 63 50  1 64 65 46 63 65 54 59 52  1 60 66 63  1 46 61 61 63 60 46
 48 53  8  1 23 50 46 48 65 54 67 46 65 50  1 65 53 50  1 64 50 48 66 63
 54 65 70  1 64 53 54 50 57 49  8 23 24 20 39 27  1 38 39 20 37  1 22 34
 33 39 37 34 31 31 24 37 17  1 39 53 50  1 64 50 48 66 63 54 65 70  1 49
 50 51 57 50 48 65 60 63  1 64 53 54 50 57 49  1 68 54 57 57  1 47 50  1
 49 50 46 48 65 54 67 46 65 50 49  1 68 53 50 59  1 68 50  1 53 46 67 50
  1 48 60 59 51 54 63 58 46 65 54 60 59  1 60 51  1 70 60 66 63  1 48 60
 49 50  1 65 63 46 59 64 58 54]


In [25]:
# Set maximum sequence length
seq_len = 100
examples_per_epoch = len(text)

# Create training examples and targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

# Convert sequence of characters to sequences of set seq_length
sequences = char_dataset.batch(seq_len+1, drop_remainder=True)
for item in sequences.take(5):
    print(repr(''.join(idx2char[item])))

"SHUTTLE CAPTAIN: Command station, this is ST 321. Code Clearance Blue. We're starting our approach. D"
'eactivate the security shield.DEATH STAR CONTROLLER: The security deflector shield will be deactivate'
'd when we have confirmation of your code transmission. Stand by... You are clear to proceed.\nSHUTTLE '
"CAPTAIN: We're starting our approach.\nOFFICER: Inform the commander that Lord Vader's shuttle has arr"
"ived.\nOPERATOR: Yes, sir.\nJERJERROD: Lord Vader, this is an unexpected pleasure.  We're honored by yo"


In [26]:
# For each sequence, duplicate and shift it to form the input and target text
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

for input_example, target_example in dataset.take(1):
    print("Input example: {}".format(repr(''.join(idx2char[input_example]))))
    print("Target example: {}".format(repr(''.join(idx2char[target_example]))))

Input example: "SHUTTLE CAPTAIN: Command station, this is ST 321. Code Clearance Blue. We're starting our approach. "
Target example: "HUTTLE CAPTAIN: Command station, this is ST 321. Code Clearance Blue. We're starting our approach. D"


In [27]:
# Create training batches
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
dataset

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [28]:
# Build model
vocab_size = len(vocab)
embedding_dimension = 256
rnn_units = 1024

def build_model(vocab_size, embedding_dimension, rnn_units, batch_size):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dimension, 
                                  batch_input_shape=[batch_size, None]),
        tf.keras.layers.GRU(rnn_units, return_sequences=True, 
                            stateful=True,recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)])
    
    return model

model = build_model(vocab_size, embedding_dimension, rnn_units, BATCH_SIZE)
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (64, None, 256)           18432     
_________________________________________________________________
gru_4 (GRU)                  (64, None, 1024)          3938304   
_________________________________________________________________
dense_4 (Dense)              (64, None, 72)            73800     
Total params: 4,030,536
Trainable params: 4,030,536
Non-trainable params: 0
_________________________________________________________________


In [29]:
# Train model
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

model.compile(optimizer='adam', loss=loss)

# Save checkpoints during training
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [30]:
history = model.fit(dataset, epochs=50, callbacks=[checkpoint_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [31]:
# Rebuild model with batch size = 1
model = build_model(vocab_size, embedding_dimension, rnn_units, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (1, None, 256)            18432     
_________________________________________________________________
gru_5 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
dense_5 (Dense)              (1, None, 72)             73800     
Total params: 4,030,536
Trainable params: 4,030,536
Non-trainable params: 0
_________________________________________________________________


In [33]:
# Generate text from model predictions
def generate_text(model, start_string):
    # Number of characters to generate
    num_generate = 1000

    # Vectorize start string
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    temperature = 0.5

    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a categorical distribution to predict the character returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        # Pass the predicted character as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))

input_string = input("Input your text: ")
print(generate_text(model, start_string=input_string))

Input your text: OFFICER
OFFICER: You want the mound the dorks are you will ne to know the fire come on the conitht my father.
VADER: It is will come of the shield sere.
HAN: What's gonna net it it they don't the oll the Emperor will nee go stack as the to seep that thing.
THREEPIO: I am the sait in the pleet.
VADER: Your mestroy here awer angere and to the matien fire of the ranged har.
THREEPIO: Oh, yes, my Master Luke!  We're now the Emperor has a reade at the farce and this.
HAN: We ale you all the stickn's be a proined be not to all refurate ald I have the fliends wan you and the dest.
HAN: We're gon a get a Jedi's come on the macker stack of the parress and thing. That shield he wirn you will you are your father has for a gith they den't you a Jedi's all time. I can te the going to the reach power are a gring a preath the pare of the dighter sourte.
THREEPIO: Oh, my you're of the eas.
BEN: You're got in the surce is staiding a fill bittle with your dething with a Jedi will sime. 