In [None]:
import numpy as np

## Data Preprocessing

In [None]:
def load_data():
  """Function to load the dataset"""
  with open('shakespeare-2.txt', mode='r', encoding='utf-8') as f:
    data = f.read()
  return data

In [None]:
data = load_data()
distinct_chars = sorted(list(set(data))) # The vocabulary
# char <===> index Mappings
char_to_idx = dict((c, i) for i, c in enumerate(distinct_chars))
idx_to_char = dict((i, c) for i, c in enumerate(distinct_chars))

In [None]:
# Define constants
N_seq = 50 # Length of the input sequence to be fed
N_data = len(data)
N_vocab = len(distinct_chars)
print(N_data, N_vocab)

99993 62


In [None]:
x_train = []
y_train = []
for i in range(0, N_data - N_seq, 1):
  # Given x of 100 charcters (Input Sequence), predict the next character y (Conditional Probability)
	x = data[i:i+N_seq]
	y = data[i+N_seq]
	x_train.append([char_to_idx[x_i] for x_i in x])
	y_train.append(char_to_idx[y])

m = len(x_train)
assert m == len(y_train), "Length mismatch error"

In [None]:
from keras.utils import to_categorical

# OHE the input data:
for i in range(m):
  x_train[i] = to_categorical(x_train[i], num_classes=N_vocab)

# OHE the output values
y_train = to_categorical(y_train, num_classes=N_vocab)

# Reshaping x_train to be [samples, timesteps, features]
x_train = np.array(x_train).reshape(m, N_seq, N_vocab)

## The LSTM Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM

def build_model():
  model = Sequential()
  model.add(LSTM(512, input_shape=x_train[0].shape, return_sequences=True))
  model.add(LSTM(512, return_sequences=True))
  model.add(LSTM(512))
  model.add(Dense(y_train.shape[1], activation='softmax'))
  return model

In [None]:
model = build_model()
model.compile(optimizer='adam', loss='categorical_crossentropy')

In [None]:
from keras.callbacks import ModelCheckpoint

# Callbacks:
PATH_SAVE = "shakespearean_generator_2.h5"
checkpoint = ModelCheckpoint(PATH_SAVE, monitor='loss', mode='min')
cb_list = [checkpoint]

# Fitting
history = model.fit(x_train, y_train, epochs=30, batch_size=128, callbacks=cb_list)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## Generating Text

In [None]:
def generate(seed, ohed_seed, N_chars):
  x0 = ohed_seed.copy()
  generated_sentence = seed.copy()
  for _ in range(N_chars):
    x = np.array(x0).reshape(1, N_seq, N_vocab)
    probabilities = model.predict(x)
    idx = np.random.choice(N_vocab, p=probabilities.ravel())
    ohed_idx = to_categorical(idx, num_classes=N_vocab)
    x0.append(ohed_idx)
    generated_sentence.append(idx)
    # Select the next sequence
    x0 = x0[1:]
  return generated_sentence

In [None]:
initial_word = "YOUR AWESOME CHARACTER:"

chars_input = set(list(initial_word))
chars_valid = set(distinct_chars)
invalid_chars = chars_input.difference(chars_valid) # chars_input - chars_valid
if invalid_chars:
  raise SyntaxError("Input word contains invalid characters.")

# Truncate larger words
if len(initial_word) > N_seq:
  initial_word = initial_word[N_seq:] 

# Pad small words with spaces
N_pad = max(N_seq - len(initial_word), 0)
initial_word = ' '*N_pad + initial_word

print("The initial word is : {}".format(initial_word))

The initial word is :                            YOUR AWESOME CHARACTER:


In [None]:
seed = [char_to_idx[character] for character in initial_word]
ohed_seed = list(to_categorical(seed, num_classes=N_vocab))

In [None]:
generated_sentence = generate(seed, ohed_seed, 500)[N_pad:] # Remove the prepended padding, if any

In [None]:
generated_sentence = ''.join([idx_to_char[i] for i in generated_sentence])
print(generated_sentence) 

YOUR AWESOME CHARACTER:
But there are I repont I show anged and too: I bring them.

SEBASTIAN:
Wine, my bear them! I Dun!

TITUS ANDR:
Then let this first I good lord; where it not his sender
goolinar and putther that themselves in she should
Of this joylly down from the world.

OTiz:
It is some serve, I would not wonder queen.

First Servingman:
By Lord HasiN, if vire I should ever
Do golding sholl be done. This bumd to die, and inforsure no leisure
Bamses he should with with a knight, purpereces,
There are I see the


In [None]:
model.save('shakespeare_final.h5')