In [30]:
import keras 
import numpy as np
import pdb
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Flatten, TimeDistributed
from keras.callbacks import LambdaCallback, ModelCheckpoint
from os import listdir
from os.path import isfile, join

num_chars = 256
seq_len = 100
batch_size = 64
model_save_path = 'serialized_models/shakespeare_gen_2layer_large.h5'

def bliteral_to_categorical(b_string):
    # Convert byte literal representation
    int_rep = [ord(c) for c in b_string]
    return keras.utils.to_categorical(int_rep, num_classes=num_chars)

def load_data(data_path):  
    shakespeare_files = [f for f in listdir(data_path) if isfile(join(data_path, f))]
    input_text = ""
    for f in shakespeare_files:
        with open('{}/{}'.format(data_path, f), 'r') as fh:
            input_text += fh.read()

    targets = input_text[1:]
    input_text = input_text[:(len(input_text) - 1)]

    oh_input_chars = bliteral_to_categorical(input_text)
    oh_targets = bliteral_to_categorical(targets)
    return oh_input_chars, oh_targets

def string_to_model_input(string):
    mapped_str = bliteral_to_categorical(string)
    return mapped_str.reshape(1, *mapped_str.shape)

def model_pred_to_string(model_pred):
    best_pred_chars = np.argmax(model_pred, axis=1)
    return ''.join([chr(r) for r in best_pred_chars])

def generate_string(model, str_seed):
    """
    Predict a single character at a time by passing 
    predicted characters back as inputs to the network.
    """
    seed_str = ''.join([str_seed for i in range(seq_len)])
    curr_model_input = string_to_model_input(seed_str)
    for i in range(len(seed_str)):
        curr_index_pred = model.predict(curr_model_input)[0, i]
        model_next_input = np.zeros(num_chars)
        model_next_input[np.argmax(curr_index_pred)] = 1
        
        if i + 1 < len(seed_str):
            curr_model_input[0, i + 1] = model_next_input
    
    return model_pred_to_string(curr_model_input[0])

In [None]:
train_x_seq, train_y_seq = load_data('data/train')
val_x_seq, val_y_seq = load_data('data/validation')

In [None]:
def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)
    random_char = chr(np.random.randint(num_chars))
    str_out = generate_string(model, random_char)
    print('Generated string: {}'.format(str_out))
        
def generate_sequences(inputs, targets, seq_len, batch_size):
    while True:
        seq_starts = np.random.choice(len(inputs) - seq_len, batch_size)
        X_batch = [inputs[s:s+seq_len] for s in seq_starts]
        Y_batch = [targets[s:s+seq_len] for s in seq_starts]
        yield np.array(X_batch), np.array(Y_batch)
    
try:
    model = keras.models.load_model(model_save_path)
    print('Using saved model')
except OSError:
    model = Sequential([
        LSTM(500, input_shape=(seq_len, num_chars), return_sequences=True),
        LSTM(500, return_sequences=True),
        TimeDistributed(Dense(num_chars, activation='softmax'))
    ])

    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

model.summary() 

# steps_per_epoch = oh_input_chars.shape[0] / batch_size
steps_per_epoch = 1000
val_steps = 100

train_generator = generate_sequences(train_x_seq, train_y_seq, seq_len=seq_len, batch_size=batch_size)
val_generator = generate_sequences(val_x_seq, val_y_seq, seq_len=seq_len, batch_size=batch_size)

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
checkpointer = ModelCheckpoint(filepath=model_save_path, verbose=1)
model.fit_generator(train_generator, validation_data=val_generator, validation_steps=val_steps, steps_per_epoch=steps_per_epoch, epochs=50, callbacks=[print_callback, checkpointer])
   


In [None]:
import sys

def pred_next_string(model, start_string):
    last_char_index = len(start_string) - 1
    model_in = string_to_model_input(start_string)
    next_char_vec = model.predict(model_in)[0, last_char_index]
    next_char = chr(np.argmax(next_char_vec))
    return start_string[1:] + next_char
    
str_seed = generate_string(model, 'h')
str_len = 10000

print(str_seed)

prev_pred = str_seed
for i in range(str_len - len(str_seed)):
    prev_pred = pred_next_string(model, prev_pred)
    next_char = prev_pred[-1]
    sys.stdout.write(next_char)
    sys.stdout.flush()

he state of heaven,
And the dear godshade hands with ducats here,
As paltered in the sun's nest flou
rish.

BENVOLIO
Then shall we have a play to study for me
I'll knock the people in a sea or no.

SHYLOCK
I am content.

PORTIA, [as Balthazar]
I humbly do desire your Grace of mine.

PRINCESS
I think no less. Dost thou understand the sure
Of the foul sins that he is subtle with his face?

KING HENRY
Ay, as the boy hath hit him out of death,
And therefore he hath giv'n it for his sake.
But when he faints with sorrow still, as summer bred
All faces this became of the firm that killed
Some craven to the state of all things through.

[Enter Parolles.]


PAROLLES, [to Bassanio]
I pray you, give me leave. I'll to the walls,
Though none but I do feast it by the steed,
And therefore take this changing in the close.
Where be the champion of the Duke of York,
I stay here for a man.

CASSIUS  What news?

MESSENGER
The news is mercy, let it be the man.

[Enter Parolles.]


PAROLLES, [to Bassanio]
I