## Generate "fake English" text from an RNN

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers.experimental import preprocessing
# import matplotlib.pyplot as plt
# import datetime
# import pickle
from tensorflow.keras.models import Model
# from IPython.display import clear_output
from tensorflow.python.keras.layers import Input,Conv1D, LeakyReLU,PReLU, MaxPool1D, CuDNNLSTM, Bidirectional, TimeDistributed, Dense, Reshape, Dropout, BatchNormalization


gpu = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

import numpy as np
# import os
# import time

In [2]:
shakespeare_url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
filepath = keras.utils.get_file("shakespeare.txt", shakespeare_url)
with open(filepath) as f:
    shakespeare_text = f.read()

In [3]:
print(shakespeare_text[:148])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?



In [4]:
"".join(sorted(set(shakespeare_text)))

"\n !$&',-.3:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

In [5]:
tokenizer = keras.preprocessing.text.Tokenizer(char_level=True, lower = False)
tokenizer.fit_on_texts(shakespeare_text)

In [6]:
tokenizer.texts_to_sequences(["First"])

[[50, 10, 8, 7, 3]]

In [7]:
tokenizer.sequences_to_texts([[50, 10, 8, 7, 3]])

['F i r s t']

In [8]:
max_id = len(tokenizer.word_index) # number of distinct characters
dataset_size = tokenizer.document_count # total number of characters

In [11]:
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1
train_size = dataset_size * 90 // 100
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])

In [12]:
n_steps = 100
window_length = n_steps + 1 # target = input shifted 1 character ahead
dataset = dataset.window(window_length, shift=1, drop_remainder=True)

In [13]:
dataset = dataset.flat_map(lambda window: window.batch(window_length))

In [14]:
np.random.seed(42)
tf.random.set_seed(42)

In [15]:
batch_size = 32
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))

In [16]:
dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))

In [17]:
dataset = dataset.prefetch(1)

In [18]:
for X_batch, Y_batch in dataset.take(1):
    print(X_batch.shape, Y_batch.shape)

(32, 100, 65) (32, 100)


In [None]:
model = tf.keras.models.Sequential([
    CuDNNLSTM(128, return_sequences=True, input_shape=[None, max_id],),
    BatchNormalization(),
    Dropout(0.2),
    
    CuDNNLSTM(128, return_sequences=True),
    BatchNormalization(),
    Dropout(0.2),
    
    TimeDistributed(Dense(max_id,
                        activation="softmax"))
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
ckpt = tf.train.Checkpoint(model = model)
history = model.fit(dataset.take(1), epochs=1)

In [35]:
# path = 'D:\Char_RNN\checkpoints/train\model_2/20210928-225654/'
# ckpt.read(path).assert_consumed()
# print("Restored")

Restored


In [39]:
history = model.fit(dataset, epochs=1)



In [40]:
path= "./checkpoints/train/model_2/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + '/'
ckpt.write(path)
print("checkpoint saved !")

checkpoint saved !


In [42]:
print(history.history)

{'loss': [1.2103033065795898]}


In [None]:
_, ax = plt.subplots(1, 1, figsize=(14, 7), facecolor='silver')
ax.set_facecolor('white')
ax.plot(history.history,  label="Learning_rate_2")
ax.set_xlabel("Steps")
ax.set_ylabel("LR (power of 10)", size  =20)
ax.set_title(
    "Learning_rate Vs Epochs")
ax.legend()

## Model to Generate Text


In [44]:
def preprocess(texts):
    X = np.array(tokenizer.texts_to_sequences(texts)) - 1
    return tf.one_hot(X, max_id)

In [47]:
X_new = preprocess(["Hell"])
#Y_pred = model.predict_classes(X_new)
Y_pred = np.argmax(model(X_new), axis=-1)
tokenizer.sequences_to_texts(Y_pred + 1)[0][-1] # 1st sentence, last char

' '

In [46]:
tf.random.set_seed(42)

tf.random.categorical([[np.log(0.5), np.log(0.4), np.log(0.1)]], num_samples=40).numpy()

array([[0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
        2, 0, 0, 1, 1, 1, 0, 0, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0]],
      dtype=int64)

In [48]:
def next_char(text, temperature=1):
    X_new = preprocess([text])
    y_proba = model(X_new)[0, -1:, :]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
    return tokenizer.sequences_to_texts(char_id.numpy())[0]

In [49]:
tf.random.set_seed(42)

next_char("How are ", temperature=1)

'y'

In [50]:
def complete_text(text, n_chars=100, temperature=1):
    for _ in range(n_chars):
        text += next_char(text, temperature)
    return text

## Output

In [51]:
tf.random.set_seed(42)

print(complete_text("World", temperature=0.2))

World, and will have me the maid of my wood;
That so, sir, as I may contrive the  words?

HORTENSIO:
Sir,


In [52]:
print(complete_text("I", temperature=1))

Ibe, I be so, Tranio, that comes you hear
Your abest did wither will encounter,
Unless me hence she i


In [57]:
print(complete_text("Man", temperature=0.001))

Man as faults strange to her for me at the scold:
That I reading that my deeds shall prove.

GREMIO:
An


In [60]:
print(complete_text("Man", temperature=0.5))

Man and more more offent me,
And you must be game and the man,
That but she shall be so far well be gre


In [66]:
print(complete_text("Man", temperature=0.7))

Mander grace, I am all till be have;
What means to come to mave of seef mese
To knock you well stend th


In [67]:
print(complete_text("Man", temperature=0.6))

Man of Padua: shall I not have this devil,
I'll tell you will have me and hang me so dow.

KATHARINA:
I


In [68]:
print(complete_text("Man", temperature=0.5))

Man and beaute with a stand of the head
To hell me, whom he begin once, I shall not
be revise him and h


In [70]:
print(complete_text("Her", temperature=0.6))

Her good and of Bianca,
For she is not how fellows in your love to her.

BIANCA:
Here's no knock you, s


In [75]:
print(complete_text("Mother", temperature=0.4))

Mother father father did be so good and sweet:
I would I be becomes from her than your sister
Kate to men 


In [79]:
print(complete_text("Country is great. We are all the citizens of ", temperature=0.6))

Country is great. We are all the citizens of well
And reason more the his note.

GREMIO:
And that his batthes of a suitors,
And be so graceless b
