In [35]:
import numpy
import sys
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [2]:
# load ascii text and covert to lowercase
filename = "AIW.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [33]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [4]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  163815
Total Vocab:  60


In [5]:
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

In [7]:
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

163715

In [None]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(60,activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
model.summary

In [30]:
model.fit(X, y, epochs=1, batch_size=128)

Epoch 1/1


<keras.callbacks.History at 0x1dc38e29a90>

In [36]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print( "Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" of trials, “there was some attempts
at applause, which was immediately suppressed by the officers of "
 the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th the th

In [44]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]),return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dense(y.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(60,activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_20 (LSTM)               (None, 100, 256)          264192    
_________________________________________________________________
dropout_18 (Dropout)         (None, 100, 256)          0         
_________________________________________________________________
lstm_21 (LSTM)               (None, 256)               525312    
_________________________________________________________________
dense_21 (Dense)             (None, 60)                15420     
_________________________________________________________________
dropout_19 (Dropout)         (None, 60)                0         
_________________________________________________________________
dense_22 (Dense)             (None, 60)                3660      
Total params: 808,584
Trainable params: 808,584
Non-trainable params: 0
_________________________________________________________________


In [38]:
model.fit(X, y, epochs=1, batch_size=128)

Epoch 1/1


<keras.callbacks.History at 0x1dc1f4476a0>

In [43]:
model = Sequential()
model.add(LSTM(128, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='relu'))
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
model.summary()
model.fit(X, y, epochs=1, batch_size=1000)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_19 (LSTM)               (None, 128)               66560     
_________________________________________________________________
dropout_17 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 60)                7740      
Total params: 74,300
Trainable params: 74,300
Non-trainable params: 0
_________________________________________________________________
Epoch 1/1


<keras.callbacks.History at 0x1dc45e744a8>