In [19]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import sys

In [5]:
# https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/


filename = './corpus/cmudict/cmudict.dict'

text = open(filename).read().lower()

chars = sorted(list(set(text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

n_chars = len(text)
vocab_size = len(chars)

print('Total Characters', n_chars)
print('Vocab Size', vocab_size)


Total Characters 3615903
Vocab Size 39


In [8]:
# prepare dataset of input to output pairs
seq_length = 100
dataX = []
dataY = []

for i in range(0, n_chars - seq_length, 1):
    seq_in = text[i:i + seq_length]
    seq_out = text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

n_patterns = len(dataX)
print('Total Patterns', n_patterns)


Total Patterns 3615803


In [9]:
import numpy as np
X = np.reshape(dataX, (n_patterns, seq_length, 1))

In [10]:
from keras.utils import np_utils
X = X / float(vocab_size)
y = np_utils.to_categorical(dataY)

In [12]:
# Define LSTM
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [14]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [None]:
model.fit(X, y, epochs=2, batch_size=200, callbacks=callbacks_list)

Epoch 1/2

Epoch 00001: loss improved from inf to 1.52333, saving model to weights-improvement-01-1.5233.hdf5
Epoch 2/2

In [17]:
# load weights
load_filepath='weights-improvement-02-1.0141.hdf5'
model.load_weights(load_filepath)
print("Created model and loaded weights from file")

Created model and loaded weights from file


In [20]:
# Generate text

int_to_char = dict((i, c) for i, c in enumerate(chars))

# Pick a random seed
start = np.random.randint(0, len(dataX) - 1)
pattern = dataX[start]
print('Seed:', ''.join([int_to_char[value] for value in pattern]))

#generate characterss
for i in range(1000):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(vocab_size)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
    
print('Done')
    

Seed: ih1 n s k iy2
iwo ay1 w ow0
iwosaki ay2 w ah0 s aa1 k iy0
ixion ih0 k s ay1 ah0 n
iyer ay1 er0
izagu
 eh1 v eh0 s
ivee eh1 v iy0
isel eh1 t ah0 l
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 n
isen ih1 s ah0 