# Generating Shakespearen Text Using a Character RNN

In [1]:
import tensorflow as tf

shakespeare_url = 'https://homl.info/shakespeare'
filepath = tf.keras.utils.get_file('shakespeare.txt', shakespeare_url)
with open(filepath) as f:
  shakespeare_text = f.read()

Downloading data from https://homl.info/shakespeare
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [2]:
print(shakespeare_text[:80])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.


In [3]:
''.join(sorted(set(shakespeare_text.lower())))

"\n !$&',-.3:;?abcdefghijklmnopqrstuvwxyz"

In [4]:
text_vec_layer=tf.keras.layers.TextVectorization(split="character",
                                                 standardize="lower")

text_vec_layer.adapt(shakespeare_text)
encoded=text_vec_layer([shakespeare_text][0])

In [5]:
text_vec_layer.get_vocabulary()

['',
 '[UNK]',
 np.str_(' '),
 np.str_('e'),
 np.str_('t'),
 np.str_('o'),
 np.str_('a'),
 np.str_('i'),
 np.str_('h'),
 np.str_('s'),
 np.str_('r'),
 np.str_('n'),
 np.str_('\n'),
 np.str_('l'),
 np.str_('d'),
 np.str_('u'),
 np.str_('m'),
 np.str_('y'),
 np.str_('w'),
 np.str_(','),
 np.str_('c'),
 np.str_('f'),
 np.str_('g'),
 np.str_('b'),
 np.str_('p'),
 np.str_(':'),
 np.str_('k'),
 np.str_('v'),
 np.str_('.'),
 np.str_("'"),
 np.str_(';'),
 np.str_('?'),
 np.str_('!'),
 np.str_('-'),
 np.str_('j'),
 np.str_('q'),
 np.str_('x'),
 np.str_('z'),
 np.str_('3'),
 np.str_('&'),
 np.str_('$')]

In [6]:
encoded -= 2  # drop tokens 0 (pad) and 1 (unknown), which we will not use
n_tokens = text_vec_layer.vocabulary_size() - 2 # number of distinct chars without unk and padding
dataset_size = len(encoded)
dataset_size

1115394

In [7]:
def to_dataset(sequence,lenght,seed=None,shuffle=False , batch_size=32):
    ds=tf.data.Dataset.from_tensor_slices(sequence)
    ds=ds.window(lenght+1,shift=1,drop_remainder=True)
    ds=ds.flat_map(lambda window_ds:window_ds.batch(lenght+1))
    if shuffle:
        ds=ds.shuffle(100_000,seed=seed)
    ds=ds.batch(batch_size)
    return ds.map(lambda window:(window[:,:-1],window[:,1:])).prefetch(1)

In [8]:
lenght=100
tf.random.set_seed(42)

train_set=to_dataset(encoded[:1_000_000],lenght=lenght,shuffle=True,seed=42)
valid_set=to_dataset(encoded[:1_000_000:1_060_000],lenght=lenght)
test_set=to_dataset(encoded[1_060_000:],lenght=lenght)

In [9]:
tf.random.set_seed(42)
vocab_size = text_vec_layer.vocabulary_size()

model=tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size,output_dim=16),
    tf.keras.layers.GRU(128, return_sequences=True),
    tf.keras.layers.Dense(vocab_size,activation="softmax")
])

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=tf.keras.optimizers.Nadam(),
    metrics=["accuracy"]
)

model_ckpt=tf.keras.callbacks.ModelCheckpoint(
    "my_shakespeare_model.keras",monitor="val_accuracy",save_best_only=True
)

history=model.fit(train_set,
                  validation_data=valid_set,
                  epochs=1,
                  callbacks=[model_ckpt])

  31247/Unknown [1m370s[0m 11ms/step - accuracy: 0.5440 - loss: 1.5100



[1m31247/31247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m371s[0m 11ms/step - accuracy: 0.5440 - loss: 1.5100


  self._save_model(epoch=epoch, batch=None, logs=logs)


In [10]:
shakespeare_model=tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Lambda(lambda X: X-2),
    model
])

In [11]:
y_proba=shakespeare_model.predict(tf.constant(["To be or not to b"]))[0,-1]
y_pred=tf.argmax(y_proba)
text_vec_layer.get_vocabulary()[y_pred+2]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step


np.str_('e')

# Generating Fake Shakespearean text

In [12]:
log_probas=tf.math.log([[0.5,0.3,0.2]])
tf.random.set_seed(42)
tf.random.categorical(log_probas,num_samples=8)

<tf.Tensor: shape=(1, 8), dtype=int64, numpy=array([[0, 0, 1, 2, 1, 0, 0, 0]])>

In [13]:
def next_char(text,temperature=1):
    text=tf.constant([text])
    y_proba=shakespeare_model.predict(text)[0,-1:]
    rescaled_logits=tf.math.log(y_proba)/temperature
    char_id=tf.random.categorical(rescaled_logits,num_samples=1)[0,0]
    return text_vec_layer.get_vocabulary()[char_id+2]

In [14]:
def extent_text(text,chars=50,temperature=1):
    for  _ in range(chars):
        text+=next_char(text,temperature)
    return text

In [15]:
extent_text("to be or not to",temperature=1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31

'to be or not to death,\ndreams: i say, very plead plaw him not the'

In [16]:
extent_text("to be or not to",chars=50,temperature=0.01)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29

'to be or not to death,\nand the duke is the duke is the duke is th'