In [1]:
# https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import sys

Using TensorFlow backend.


In [2]:
raw_text = open('Parser_Output.txt').read().lower()

In [3]:
# map characters to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [4]:
n_chars = len(raw_text)
n_vocab = len(chars)
print (n_chars, n_vocab)

87401 51


In [5]:
# define training data for the network
# split text into 100 characters (arbitrary)

seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print(n_patterns)

87301


In [6]:
# 1 transform list of input seq into form [samples, time steps, features]
# 2 rescale integers to range 0 to 1
# 3 convert output patterns into one encoding so each char has a probability value

# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize 
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)


In [7]:
# define LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [8]:
# define checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [9]:
# fit model to data
model.fit(X, y, epochs=50, batch_size=100, callbacks=callbacks_list)

Epoch 1/50
 4600/87301 [>.............................] - ETA: 21:00 - loss: 3.1495

KeyboardInterrupt: 

In [10]:
# generate the text
# the exciting part!

In [36]:
# load the network weights
filename = "weights-improvement-20-1.1968.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [37]:
# reverse mapping for human eyeballs to read
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [41]:
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(500):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print ("\nDone.")

Seed:
" would be a good teacher.

and so, our treatise will comprise all of our knowledge, as a book

decora "
ene on tee 
andaav on him thr walh the tooer nf moe and th in toil iote aitadt ano tithits.



    	
  
io pu thioty uo the mrher  to call ti lee liru.



    	
  
i aa the surrd guard on hhr hamd  you hl gete an andadt.

i am the suart of byor daoge  and you'll be lllt weuh the wword 



   
      


   
 



  

        

        


                                
         
                                                                                                                        
Done.
