In [46]:
import tensorflow as tf
from bs4 import BeautifulSoup
import requests

import os
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

In [47]:
# Text document of Jane Eyre, by Charlotte Brontë
response = requests.get("https://www.gutenberg.org/files/1260/1260-0.txt")
soup_doc = BeautifulSoup(response.content, "html.parser")
raw_string_data = soup_doc.text
vocabulary = sorted(set(raw_string_data))

# Creating maps from character values to numerical representations
string2index = {s:i for i, s in enumerate(vocabulary)}
index2string = np.array(vocabulary)

def vectorize_string(raw_string):
    return np.array([string2index[s] for s in raw_string])

def find_string_from_vector(vectorized_data):
    return "".join([index2string[i] for i in vectorized_data])


vectorized_data = vectorize_string(raw_string_data)

def get_batch(vectorized_data, batch_size, seq_length):
    last_idx_of_data = vectorized_data.shape[0] - 1

    starting_indices_of_samples = np.random.choice(last_idx_of_data - seq_length, batch_size)

    input_batch = np.array([vectorized_data[idx : idx + seq_length] for idx in starting_indices_of_samples])
    # The target sequences are the x_batch sequences shifted over once to the right
    target_batch = np.array([vectorized_data[(idx + 1) : (idx + 1) + seq_length] for idx in starting_indices_of_samples])
    x_batch = np.reshape(input_batch, [batch_size, seq_length])
    y_batch = np.reshape(target_batch, [batch_size, seq_length])
    return(x_batch, y_batch)
  


In [48]:
# I don't know too much about tenforflow's default setup of an LSTM model, 
#   so I'll use what I've seen used in the past for this sort of thing.
def LSTM(rnn_units): 
    return tf.keras.layers.LSTM(
      rnn_units, 
      return_sequences=True, 
      recurrent_initializer='glorot_uniform',
      recurrent_activation='sigmoid',
      stateful=True,
    )

def develop_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
                               tf.keras.layers.Embedding(input_dim = vocab_size,
                                                         output_dim = embedding_dim,
                                                         batch_input_shape = [batch_size, None]),
                               LSTM(rnn_units = rnn_units),
                               tf.keras.layers.Dense(units = vocab_size)
    ])
    return model

In [49]:
vocab_size = len(vocabulary)
embedding_dim = embedding_dim = 2**8
batch_size = 2**5
rnn_units = 2**10
model = develop_model(vocab_size, embedding_dim, rnn_units, batch_size)
model.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_11 (Embedding)     (32, None, 256)           26112     
_________________________________________________________________
lstm_11 (LSTM)               (32, None, 1024)          5246976   
_________________________________________________________________
dense_11 (Dense)             (32, None, 102)           104550    
Total params: 5,377,638
Trainable params: 5,377,638
Non-trainable params: 0
_________________________________________________________________


In [50]:
def calc_cross_entropy_loss(target_labels, prediction_logits):
    loss = tf.keras.losses.sparse_categorical_crossentropy(target_labels, prediction_logits, from_logits=True)
    return loss

In [51]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "my_ckpt")
if not os.path.exists(checkpoint_dir):
    os.mkdir(checkpoint_dir)
assert os.path.exists(checkpoint_dir)

In [91]:
### Hyperparameters ###

# For training optimization
epochs = 2000
batch_size = 2**4
sequence_length = 100
learning_rate = 1e-3

# For the model itself
vocab_size = len(vocabulary)
embedding_dim = 2**8
rnn_units = 2**9

In [None]:
# Build model
model = develop_model(vocab_size, embedding_dim, rnn_units, batch_size)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
# Define optimizer for training
optimizer = tf.keras.optimizers.Adam(learning_rate)

# Defining training steps
def take_training_step(model, optimizer, loss_function, x_batch, y_batch):
    with tf.GradientTape() as tape:
        prediction_batch_logits = model(x_batch)
        loss = loss_function(y_batch, prediction_batch_logits)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

# Train the model!
loss_history = []

for epoch in tqdm(range(epochs)):
    x_batch, y_batch = get_batch(vectorized_data, batch_size, seq_length = sequence_length)
    batch_loss = take_training_step(model, optimizer, calc_cross_entropy_loss, x_batch, y_batch)
    loss_history.append(batch_loss.numpy().mean())
    if epoch % 100 == 0:     
        model.save_weights(checkpoint_prefix)

model.save_weights(checkpoint_prefix)
plt.plot(loss_history)


 71%|███████   | 1414/2000 [38:54<12:40,  1.30s/it] 

In [86]:
from glob import glob
glob("./training_checkpoints/*")

['./training_checkpoints/checkpoint',
 './training_checkpoints/my_ckpt.index',
 './training_checkpoints/my_ckpt.data-00000-of-00001']

In [87]:
model = develop_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

model.summary()

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_14 (Embedding)     (1, None, 256)            26112     
_________________________________________________________________
lstm_14 (LSTM)               (1, None, 512)            1574912   
_________________________________________________________________
dense_14 (Dense)             (1, None, 102)            52326     
Total params: 1,653,350
Trainable params: 1,653,350
Non-trainable params: 0
_________________________________________________________________


In [88]:
def generate_string_data(model, start_character, length = 100):
    beginning_input = string2index[start_character]
    beginning_input = tf.expand_dims([beginning_input], 0)

    model.reset_states()

    generated_char_list = []
    for i in tqdm(range(length)):
        predictions = model(beginning_input)

        predictions = tf.squeeze(predictions, 0)
        predicted_id = tf.random.categorical(predictions, num_samples = 1)[-1,0].numpy()
        generated_char_list.append(index2string[predicted_id])

        beginning_input = tf.expand_dims([predicted_id], 0)
    return start_character + "".join(generated_char_list)

print(generate_string_data(model = model, start_character = " ", length = 2000))

100%|██████████| 2000/2000 [00:15<00:00, 132.95it/s]

 the flowers greater, Jane Eyre.”

“And did I gave you out of,—my John was to attempt, to show off, you endearch to sternles rayless. It could say, “Jane Eyre, for course with all
with the end.”

“Histo mine; who recivid the only, essence lay perhed?”

“When, constant to beto the swell.”

“Your guintage will be survensible amid, one, you enounce the present
crmanting the parson; never besides, and agree is daily,”
asked, he was but England; but I advanced to the boxes that as
one people examining any child—it seldomy.

“Bending back seen me to be mine you’ve raving, and she knew you will direct up when course your face.

“I hope thaw that he answered to leave secure and love leavings: less to be use
now of folds of your mind. You will be a letter band of;
not the ancided love, liberty to she? But that with your aid; and Burns acknowledged
resumed.

“Yes—as I looks to the master’s unsideration. Afterwards he bexchadiously been rollid.

But tho works: where then, c


