In [1]:
# Small LSTM RNN

In [3]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [4]:
filename = "wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [5]:
chars = sorted(list(set(raw_text)))
char_to_int = dict((c,i) for i,c in enumerate(chars))

In [6]:
chars

['\n',
 ' ',
 '!',
 '"',
 "'",
 '(',
 ')',
 '*',
 ',',
 '-',
 '.',
 '0',
 '3',
 ':',
 ';',
 '?',
 '[',
 ']',
 '_',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [7]:
n_chars = len(raw_text)
n_vocab = len(chars)

print("Total Characters: ", n_chars)
print("Total Vocab: ",n_vocab)

Total Characters:  144409
Total Vocab:  45


In [8]:
seq_length = 100
dataX = []
dataY = []
for i in range(0,n_chars-seq_length,1):
    seq_in = raw_text[i:i+seq_length]
    seq_out = raw_text[i+seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

In [9]:
n_patterns = len(dataX)

In [10]:
print("Total Patterns: ", n_patterns)

Total Patterns:  144309


In [11]:
X = numpy.reshape(dataX, (n_patterns,seq_length,1))
X = X/float(n_vocab)
y = np_utils.to_categorical(dataY)

In [13]:
model = Sequential()
model.add(LSTM(256,input_shape = (X.shape[1],X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1],activation="softmax"))
model.compile(loss="categorical_crossentropy",optimizer="adam")

In [16]:
filepath = "weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath,monitor = "loss",verbose=1,save_best_only = True,mode = "min")
callbacks_list = [checkpoint]

In [18]:
model.fit(X,y,epochs=1,batch_size=128,callbacks=callbacks_list)

Epoch 1/1

Epoch 00001: loss improved from 2.66031 to 2.55607, saving model to weights-improvement-01-2.5561.hdf5


<keras.callbacks.callbacks.History at 0x7f56c1e576d8>

In [19]:
filename = "weights-improvement-03-2.6603.hdf5"
model.load_weights(filename)
model.compile(loss = "categorical_crossentropy",optimizer = "adam")

In [20]:
int_to_char = dict((i,c) for i,c in enumerate(chars))

In [21]:
start = numpy.random.randint(0,len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"","".join([int_to_char[value] for value in pattern]),"\"")

Seed:
" been to the seaside once in
her life, and had come to the general conclusion, that wherever you go
t "


In [23]:
import sys
for i in range(1000):
    x = numpy.reshape(pattern,(1,len(pattern),1))
    x = x/float(n_vocab)
    prediction = model.predict(x,verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone")

he toet to the toet to the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the toet the t

In [24]:
# Larger LSTM NN

In [25]:
model = Sequential()
model.add(LSTM(256,input_shape = (X.shape[1],X.shape[2]),return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1],activation="softmax"))
model.compile(loss="categorical_crossentropy",optimizer="adam")

In [26]:
filepath = "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"

In [27]:
checkpoint = ModelCheckpoint(filepath,monitor = "loss",verbose=1,save_best_only=True,mode = "min")

In [28]:
callbacks_list =[checkpoint]

In [29]:
model.fit(X,y,epochs=1,batch_size=64,callbacks=callbacks_list)

Epoch 1/1

Epoch 00001: loss improved from inf to 2.78733, saving model to weights-improvement-01-2.7873-bigger.hdf5


<keras.callbacks.callbacks.History at 0x7f56c87ad278>

In [32]:
filename = "weights-improvement-01-2.7873-bigger.hdf5"
model.load_weights(filename)
model.compile(loss = "categorical_crossentropy",optimizer="adam")

In [33]:
start = numpy.random.randint(0,len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"",''.join([int_to_char[value] for value in pattern]),"\"")

Seed:
" tes. alice thought to
herself, 'i don't see how he can even finish, if he doesn't begin.' but
she wa "


In [35]:
for i in range(1000):
    x = numpy.reshape(pattern,(1,len(pattern),1))
    x = x/float(n_vocab)
    prediction = model.predict(x,verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

r a sooe the mooee the tooe the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the mooee the toee the