In [1]:
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np
from tensorflow import keras

In [2]:
# Dataset
path = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [3]:
# Read the contents of file

text = open(path, 'rb').read().decode(encoding='utf-8') # read and then decode for py2 compat

print('Length of text: {} characters'.format(len(text))) # length of text is the number of character in it

Length of text: 1115394 characters


In [4]:
# Take a look at thr first 250 character in text 
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



Encoding

In [5]:
vocab = sorted(set(text))
# create a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

def text_to_int(text):
    return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

In [6]:
# look at how part of the text is encoded
print("Text:", text[:13])
print("Encoded:", text_to_int(text[:13]))

Text: First Citizen
Encoded: [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [7]:
# converting numeric value into text
def int_to_text(ints):
    try:
        ints = ints.numpy()
    except:
        pass
    return ''.join(idx2char[ints])

print(int_to_text(text_as_int[:13]))

First Citizen


Creating Training Examples

In [8]:
seq_length = 100  # length of aequence for a training example
examples_per_epoch = len(text)//(seq_length+1)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)   # creating training examples/targets

In [9]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

In [10]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [11]:
for x,y in dataset.take(2):
    print('\n\nExample\n')
    print('INPUT')
    print(int_to_text(x))
    print('\nOUTPUT')
    print(int_to_text(y))



Example

INPUT
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You

OUTPUT
irst Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You 


Example

INPUT
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 

OUTPUT
re all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you k


Training Batches

In [12]:
BATCH_SIZE = 64
VOCAB_SIZE = len(vocab) # vocab is the number of unique chatacters
EMBEDDING_DIM = 256
RNN_UNITS = 1024

BUFFER_SIZE = 10000

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder = True)

Biulding the Model

In [13]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim,
                                 batch_input_shape=[batch_size, None]),
        tf.keras.layers.LSTM(rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
lstm (LSTM)                  (64, None, 1024)          5246976   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


Creating a loss function

In [14]:
for input_example_batch, target_example_batch in data.take(1):
    example_batch_predictions = model(input_example_batch) # model for prediction on first batch of training data
    print(example_batch_predictions.shape, '#(batch_size, sequence_length, vocab_size)') # output shape

(64, 100, 65) #(batch_size, sequence_length, vocab_size)


In [15]:
# prediction is an array of 64 arrays, one of each entry in the batch
print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[ 2.6835676e-04 -2.4043592e-03  3.1238701e-04 ...  3.3250034e-03
    1.4485762e-03 -1.8889762e-03]
  [-9.5811859e-04 -6.5937079e-03  2.9485396e-03 ... -3.5521188e-03
    1.3828911e-03 -3.6397122e-04]
  [-6.4356113e-04 -4.7039390e-03  6.9149891e-03 ... -2.3314385e-03
    1.0031664e-03 -1.2065803e-03]
  ...
  [-1.6279254e-02  9.0237241e-05  6.3311821e-03 ... -9.6180793e-03
   -1.4927818e-03  3.5123213e-03]
  [-9.8446803e-03  2.0734710e-04 -2.8090738e-04 ... -7.2063450e-03
   -2.5813864e-03  2.3302324e-03]
  [-7.4439961e-03  2.8094433e-03  7.7496096e-04 ... -1.0850264e-02
   -2.3158535e-03 -4.9336534e-03]]

 [[ 3.2465232e-03 -8.5429556e-04  4.2326362e-03 ...  1.3357238e-03
    5.5756699e-04  5.5873831e-04]
  [ 2.7232147e-03 -6.6268737e-03  6.4409934e-03 ... -1.9845096e-03
   -4.5434074e-03 -8.8995695e-03]
  [ 1.1726246e-03 -1.7753309e-03  3.1910250e-03 ... -1.8854102e-03
   -4.9156626e-03 -6.1342865e-03]
  ...
  [-1.3764877e-02  6.2270788e-03  5.8079173e-04 ... -5.3487555e

In [16]:
# examine one prediction
pred = example_batch_predictions[0]
print(len(pred))
print(pred)

100
tf.Tensor(
[[ 2.6835676e-04 -2.4043592e-03  3.1238701e-04 ...  3.3250034e-03
   1.4485762e-03 -1.8889762e-03]
 [-9.5811859e-04 -6.5937079e-03  2.9485396e-03 ... -3.5521188e-03
   1.3828911e-03 -3.6397122e-04]
 [-6.4356113e-04 -4.7039390e-03  6.9149891e-03 ... -2.3314385e-03
   1.0031664e-03 -1.2065803e-03]
 ...
 [-1.6279254e-02  9.0237241e-05  6.3311821e-03 ... -9.6180793e-03
  -1.4927818e-03  3.5123213e-03]
 [-9.8446803e-03  2.0734710e-04 -2.8090738e-04 ... -7.2063450e-03
  -2.5813864e-03  2.3302324e-03]
 [-7.4439961e-03  2.8094433e-03  7.7496096e-04 ... -1.0850264e-02
  -2.3158535e-03 -4.9336534e-03]], shape=(100, 65), dtype=float32)


In [17]:
# prediction at the first timestamp
time_pred = pred[0]
print(len(time_pred))
print(time_pred)

# 65 values representing the probability of each character occuring next

65
tf.Tensor(
[ 2.6835676e-04 -2.4043592e-03  3.1238701e-04  5.9055719e-03
  2.7221891e-03  2.1151276e-03 -1.1146354e-03 -6.2866933e-03
  3.0501164e-03 -9.3313772e-04 -3.1414132e-03  7.4931548e-04
 -6.9796210e-03  3.7527829e-03  1.6871274e-03 -2.6258470e-03
 -2.1284088e-03  5.6024562e-03  4.0740683e-03  3.9769299e-03
  1.1814991e-03 -3.7159924e-03 -1.3651038e-03  2.3035039e-03
  1.4176541e-03 -9.9157915e-05 -3.1870725e-03 -7.5181015e-04
  5.3967666e-03 -5.1384382e-03 -1.5944138e-03  3.4170595e-03
  2.2597297e-03  5.3067096e-03  5.7703359e-03 -3.1878176e-04
  2.7990635e-03 -4.7373609e-03 -5.8932975e-04 -3.1357536e-03
  4.8179915e-03  3.5158456e-03 -3.5370394e-04  1.3917428e-03
  3.7823298e-03 -2.9317804e-03 -3.7185908e-03 -1.8853560e-04
  3.2118117e-03 -9.8737655e-07 -5.0133364e-03 -5.0359429e-04
  1.3695679e-03  2.6862170e-03  2.5242046e-03 -1.1091516e-04
  3.1385622e-03  1.1578819e-04 -3.1051694e-03  1.5061484e-03
  2.9909345e-03 -1.3696358e-03  3.3250034e-03  1.4485762e-03
 -1.888976

In [18]:
# if we want to determine the predicted character we need to sample that putput edistribution

sampled_indices = tf.random.categorical(pred, num_samples=1)

# we can reshape that array and convert all the integers to numbers to see the actual characters

sampled_indices = np.reshape(sampled_indices, (1, -1))[0]
predicted_chars = int_to_text(sampled_indices)
predicted_chars  # model prerdicted this for training sequence 1

"BysN wDF,du\nU?zM3cPYMPI?mTCPGZ.ofNJYGrY,WM?!YkAFY&afK'!djkilunFddcGiaS,wkXqZkcM';bEYlOTBvNwuGRSoimV3"

In [19]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

Compiling the Model

In [20]:
model.compile(optimizer='adam', loss=loss)

Creating Checkpoints

In [21]:
# directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'

checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt_{epoch}')

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
               filepath=checkpoint_prefix,
               save_weights_only=True)

Training

In [22]:
history = model.fit(data, epochs=2, callbacks=[checkpoint_callback])

Epoch 1/2
Epoch 2/2


Loading the Model

In [23]:
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1 )

In [24]:
# once the model is finished training, we can find the latest checkpoint that stores the model weights
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

Generate Text

In [28]:
def generate_text(model, start_string):
    
    num_generate = 800 # number of character to generate
    
    input_eval = [char2idx[s] for s in start_string] # converting start string to numbers(vectorizing)
    input_eval = tf.expand_dims(input_eval, 0)
    
    text_generated = [] # empty string to store result
    
    # low temperature results in more predictable text
    # Higher temperature results in more surprising text
    temperature= 1.0
    
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        
        # remove batch dimension
        predictions = tf.squeeze(predictions,0)
        
        # using a categorical diatribution to predict the character returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
        
        input_eval = tf.expand_dims([predicted_id], 0)
        
        text_generated.append(idx2char[predicted_id])
    return (start_string + ''.join(text_generated))

In [30]:
inp = input("Type a starting string:")
print(generate_text(model, inp))

Type a starting string:p
plouth,
You deeder' theo, thing you this sobred'T': at I among:
Of pruwe to you. in, whenchence atsey.
All I am on hepperclant lotinus to balk
And neach Pemesperte, with doving and 'cass of hesterps.

IUSELDLUUS:
Or cault not in.
And thy son dow my loed, bettee's one cothen
Edward
Ey so, I beture but that cir these by anvorber,
Nell hereir, det tor, And to dree thy butthal beenures,
And it would ngive to marref of hows! if my will kishie,
Wheerfore you and with it tive hig? Mind unde.

BoPT:
Manry, you lord, comen as that duts, it to but rears
my lirsting tomet to forlem and malopracumell!
Hat suringever mack the briviy to Unon-baltet,
He carn to he book monedy so, sig,
And good too yourt?

LOYE:
Noo, leave iffant Mithern youre ol kisters'd one
Mare'cleam bile ann mery pasine
Of sin, you bo
