# Prep Data and Model

In [1]:
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

Using TensorFlow backend.


Load ascii text, remove header and footer (from project Gutenberg), and covert to lowercase

In [2]:
filename = "texts/variable_man.txt"
raw_text = ""
with open(filename, encoding='utf-8') as f:
    save_string = False
    for line in f:
        if "*** START OF" in line:
            save_string = True
            continue
        if "*** END OF" in line:
            save_string = False
        if save_string:
            raw_text += line
raw_text = raw_text.lower()
raw_text = raw_text.replace("\n", " ")

Create mapping of unique chars to integers, and a reverse mapping

In [3]:
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  147302
Total Vocab:  51


Prepare the dataset of input to output pairs encoded as integers

In [4]:
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  147202


Reshape X to be [samples, time steps, features], normalize, one hot encode the output variable

In [5]:
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
X = X / float(n_vocab)
y = np_utils.to_categorical(dataY)
print("X shape:", X.shape, "\ny shape:", y.shape)

X shape: (147202, 100, 1) 
y shape: (147202, 51)


Define the LSTM model

In [6]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

# Train model

In [None]:
# define the checkpoint
filepath="models/weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

Load the previous weights if not starting from the scratch

In [None]:
if True:
    filename = "models/model_weights.hdf5"
    model.load_weights(filename)

model.compile(loss='categorical_crossentropy', optimizer='sgd')

In [None]:
# fit the model
model.fit(X, y, epochs=23, batch_size=64, callbacks=callbacks_list)

# Do prediction

In [7]:
# load the network weights
filename = "models/model_weights.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [8]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"\n\n")
# generate characters
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" able man!"     ii   thomas cole was sharpening a knife with his whetstone when the tornado hit.  the "


 soldiers had been destroyed. the council members sett of the corridor.  the srb machines were standing and rerardhed siouly. he had been working to his feet. a blanked gad been surning and fell over the screen of the soals had began to sake the council buildings, the shme torldd around him. the sob machines were standing and rertied. he strogg his way to his feet. "what happened the control turret. the machines crensed the council and sherikov said for areath. he gad been working to see the tra machines. seening shat had been the side of the soals had begnwed the council members seatsety the sareen of the seaurity police aroken the shape of the council members seached it certeored and rerarched for areath. he saw the strface corrid hear, said nothing.  "what she war again to to the council to see that is ouer to the war. you can tee the control turret."  "what do you want?"