In [1]:
import sys
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.utils import np_utils

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Parameters & Settings

In [7]:
text_path = "dataset/wows-script.txt"  # path to text corpus used. Should be the same as the one the model was trained on
weights_path = "checkpoints/weights-imporvement-11-2.4104.hdf5"  # path to weights to load the model
seed_length = 100  # number of characters to start off with from the corpus
generate_length = 1000 # number of characters to generate

### Load dataset (text) and clean/format it

In [9]:
with open(text_path, "r") as f:
    raw_text = f.read().lower()

characters = sorted(list(set(raw_text))) # sorted list of unique chars
char_to_int = dict((c, i) for i, c in enumerate(characters))
int_to_char = dict((i, c) for i, c in enumerate(characters))

n_chars = len(raw_text)
n_vocab = len(characters)

print("Text has {} characters".format(n_chars))
print("Vocabulary has {} unique characters".format(n_vocab))

Text has 128743 characters
Vocabulary has 52 unique characters


In [12]:
seq_length = 100
data_x = []
data_y = []
for i in range(0, (len(raw_text)-seq_length), 1):
    seq_in = raw_text[i:i+seq_length] # x
    seq_out = raw_text[i+seq_length]  # y
    data_x.append([char_to_int[char] for char in seq_in])
    data_y.append(char_to_int[seq_out])
n_patterns = len(data_x)
print("Total patterns {}".format(n_patterns))  
X = np.reshape(data_x, (n_patterns, seq_length, 1)) # reshape to  [samples, time steps, features]
X = X / float(len(char_to_int))  # normalize values
y = np_utils.to_categorical(data_y)

Total patterns 128643


### Define the same network arhcitecture

and then load the weights from training

In [13]:
# Define model, must be same architecture as the one trained on
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
# load weights
model.load_weights(weights_path)

## Generate Text

In [21]:
# random seed
start = np.random.randint(0, len(data_x)-1)
pattern = data_x[start]
print("Seed: ")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

Seed: 
" 't let him do that.
- okay.
- 'cause that would make it real.
- right.
no. what do you do?
you get a "


In [24]:
# Generate characters
for i in range(generate_length):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\n--------\nFinished.")

to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet to toeet t