In [15]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import keras
import sys
# https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

In [2]:
filename = './corpus/gandhi/gandhi.txt'

text = open(filename).read().lower()

chars = sorted(list(set(text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

# n_chars = len(text)
n_chars = int(len(text) / 200)
vocab_size = len(chars)

print('Total Characters', n_chars)
print('Vocab Size', vocab_size)

Total Characters 241906
Vocab Size 126


In [3]:
# prepare dataset of input to output pairs
import pickle
from IPython.display import display, clear_output

save = True

if save:
    seq_length = 50
    dataX = []
    dataY = []

    iterations = n_chars - seq_length
    
    print(iterations)
    
    for i in range(0, iterations, 1):
        display_text = 'of ' + str(iterations) + ' {:.4f}% done'.format(float(i) / float(iterations) * 100)
        clear_output(wait=True)
        display(display_text)
        
        seq_in = text[i:i + seq_length]
        seq_out = text[i + seq_length]
    #     seq_out = ' ' if seq_out == ' ' else '*'
        dataX.append([char_to_int[char] for char in seq_in])
        dataY.append(char_to_int[seq_out])

    n_patterns = len(dataX)
    print('Total Patterns', n_patterns)
    pickle.dump((dataX, dataY), open('Xy.dat', 'wb'))
else:
    (dataX, dataY) = pickle.load(open('Xy.dat', 'rb'))


'of 241856 99.7800% done'

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [5]:
import numpy as np
X = np.reshape(dataX, (n_patterns, seq_length, 1))

In [6]:
from keras.utils import np_utils
X = X / float(vocab_size)
y = np_utils.to_categorical(dataY)

In [43]:
# Define LSTM
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

load_weights(model)

Created model and loaded weights from file


In [45]:
# define some callbacks
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
generate = PeakOnGeneration()
tensorboard = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None)
callbacks_list = [checkpoint, generate]

In [None]:
model.fit(X, y, epochs=20, batch_size=100, callbacks=callbacks_list)

Epoch 1/20

Epoch 00001: loss improved from inf to 2.84651, saving model to weights-improvement-01-2.8465.hdf5
Seed: your knowledge of the vegetarian literature will e
o toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe to
Epoch 2/20

Epoch 00002: loss improved from 2.84651 to 2.78042, saving model to weights-improvement-02-2.7804.hdf5
Seed: worthy brother after
money would be in my returnin
 toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe
Epoch 3/20

Epoch 00003: loss improved from 2.78042 to 2.74283, saving model to weights-improvement-03-2.7428.hdf5
Seed: indeed, some
fashion as a goddess have discarded s
o the toete to the toete to the toete to the toete to the toete to the toete to the to

In [41]:
def load_weights(model, filepath='weights-improvement-10-2.8777.hdf5'):
    model.load_weights(filepath)
    print("Created model and loaded weights from file")

In [37]:
def generate_text(n):
    
    int_to_char = dict((i, c) for i, c in enumerate(chars))

    # Pick a random seed
    start = np.random.randint(0, len(dataX) - 1)
    pattern = dataX[start]
    print('Seed:', ''.join([int_to_char[value] for value in pattern]))
    
    for i in range(n):
        x = np.reshape(pattern, (1, len(pattern), 1))
        x = x / float(vocab_size)
        prediction = model.predict(x, verbose=0)
        index = np.argmax(prediction)
        result = int_to_char[index]
        seq_in = [int_to_char[value] for value in pattern]
        sys.stdout.write(result)
        pattern.append(index)
        pattern = pattern[1:len(pattern)]
    
    print()

class PeakOnGeneration(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        return
 
    def on_train_end(self, logs={}):
        return
 
    def on_epoch_begin(self, epoch, logs={}):
        return
 
    def on_epoch_end(self, epoch, logs={}):
        generate_text(200)
        return
 
    def on_batch_begin(self, batch, logs={}):
        return
 
    def on_batch_end(self, batch, logs={}):
        return