In [None]:
#https://machinelearningmastery.com/develop-word-based-neural-language-models-python-keras/

In [1]:
from numpy import array
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding

# generate a sequence from the model
def generate_seq(model, tokenizer, seed_text, n_words):
	in_text, result = seed_text, seed_text
	# generate a fixed number of words
	for _ in range(n_words):
		# encode the text as integer
		encoded = tokenizer.texts_to_sequences([in_text])[0]
		encoded = array(encoded)
		# predict a word in the vocabulary
		yhat = model.predict_classes(encoded, verbose=0)
		# map predicted word index to word
		out_word = ''
		for word, index in tokenizer.word_index.items():
			if index == yhat:
				out_word = word
				break
		# append to input
		in_text, result = out_word, result + ' ' + out_word
	return result

# source text
data = """ Jack and Jill went up the hill\n
		To fetch a pail of water\n
		Jack fell down and broke his crown\n
		And Jill came tumbling after\n """
# integer encode text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]
# determine the vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)
# create word -> word sequences
sequences = list()
for i in range(1, len(encoded)):
	sequence = encoded[i-1:i+1]
	sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))
# split into X and y elements
sequences = array(sequences)
X, y = sequences[:,0],sequences[:,1]
# one hot encode outputs
y = to_categorical(y, num_classes=vocab_size)
# define model
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=1))
model.add(LSTM(50))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())
# compile network
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(X, y, epochs=500, verbose=2)
# evaluate
print(generate_seq(model, tokenizer, 'Jack', 6))

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Vocabulary Size: 22
Total Sequences: 24
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 1, 10)             220       
_________________________________________________________________
lstm_1 (LSTM)                (None, 50)                12200     
_________________________________________________________________
dense_1 (Dense)              (None, 22)                1122      
Total params: 13,542
Trainable params: 13,542
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/500
 - 1s - loss: 3.0909 - acc: 0.1250
Epoch 2/500
 - 0s - loss: 3.0902 - acc: 0.2083
Epoch 3/500
 - 0s - loss: 3.0894 - acc: 0.1667
Epoch 4/500
 - 0s - loss: 3.0886 - acc: 0.1667
Epoch 5/500
 - 0s - loss: 3.0878 - acc: 0.1667
Epoch 6/500
 - 0s - loss: 3.0870 - acc: 0.1667
Epoch 7/500
 - 0s - loss: 3.0862 - acc: 0.2083
Epoch 8/500
 - 0s - loss: 3.

Epoch 155/500
 - 0s - loss: 2.1323 - acc: 0.4167
Epoch 156/500
 - 0s - loss: 2.1185 - acc: 0.4167
Epoch 157/500
 - 0s - loss: 2.1047 - acc: 0.4167
Epoch 158/500
 - 0s - loss: 2.0909 - acc: 0.4167
Epoch 159/500
 - 0s - loss: 2.0771 - acc: 0.4167
Epoch 160/500
 - 0s - loss: 2.0633 - acc: 0.4167
Epoch 161/500
 - 0s - loss: 2.0495 - acc: 0.4583
Epoch 162/500
 - 0s - loss: 2.0357 - acc: 0.4583
Epoch 163/500
 - 0s - loss: 2.0220 - acc: 0.4583
Epoch 164/500
 - 0s - loss: 2.0083 - acc: 0.4583
Epoch 165/500
 - 0s - loss: 1.9946 - acc: 0.4583
Epoch 166/500
 - 0s - loss: 1.9808 - acc: 0.4583
Epoch 167/500
 - 0s - loss: 1.9672 - acc: 0.4583
Epoch 168/500
 - 0s - loss: 1.9535 - acc: 0.4583
Epoch 169/500
 - 0s - loss: 1.9399 - acc: 0.4583
Epoch 170/500
 - 0s - loss: 1.9263 - acc: 0.4583
Epoch 171/500
 - 0s - loss: 1.9127 - acc: 0.4583
Epoch 172/500
 - 0s - loss: 1.8992 - acc: 0.4583
Epoch 173/500
 - 0s - loss: 1.8857 - acc: 0.4583
Epoch 174/500
 - 0s - loss: 1.8722 - acc: 0.4583
Epoch 175/500
 - 0s 

Epoch 323/500
 - 0s - loss: 0.4776 - acc: 0.8750
Epoch 324/500
 - 0s - loss: 0.4735 - acc: 0.8750
Epoch 325/500
 - 0s - loss: 0.4695 - acc: 0.8750
Epoch 326/500
 - 0s - loss: 0.4656 - acc: 0.8750
Epoch 327/500
 - 0s - loss: 0.4617 - acc: 0.8750
Epoch 328/500
 - 0s - loss: 0.4578 - acc: 0.8750
Epoch 329/500
 - 0s - loss: 0.4541 - acc: 0.8750
Epoch 330/500
 - 0s - loss: 0.4504 - acc: 0.8750
Epoch 331/500
 - 0s - loss: 0.4467 - acc: 0.8750
Epoch 332/500
 - 0s - loss: 0.4431 - acc: 0.8750
Epoch 333/500
 - 0s - loss: 0.4396 - acc: 0.8750
Epoch 334/500
 - 0s - loss: 0.4361 - acc: 0.8750
Epoch 335/500
 - 0s - loss: 0.4326 - acc: 0.8750
Epoch 336/500
 - 0s - loss: 0.4292 - acc: 0.8750
Epoch 337/500
 - 0s - loss: 0.4259 - acc: 0.8750
Epoch 338/500
 - 0s - loss: 0.4226 - acc: 0.8750
Epoch 339/500
 - 0s - loss: 0.4194 - acc: 0.8750
Epoch 340/500
 - 0s - loss: 0.4163 - acc: 0.8750
Epoch 341/500
 - 0s - loss: 0.4132 - acc: 0.8750
Epoch 342/500
 - 0s - loss: 0.4101 - acc: 0.8750
Epoch 343/500
 - 0s 

 - 0s - loss: 0.2385 - acc: 0.8750
Epoch 491/500
 - 0s - loss: 0.2381 - acc: 0.8750
Epoch 492/500
 - 0s - loss: 0.2378 - acc: 0.8750
Epoch 493/500
 - 0s - loss: 0.2375 - acc: 0.8750
Epoch 494/500
 - 0s - loss: 0.2372 - acc: 0.8750
Epoch 495/500
 - 0s - loss: 0.2368 - acc: 0.8750
Epoch 496/500
 - 0s - loss: 0.2365 - acc: 0.8750
Epoch 497/500
 - 0s - loss: 0.2362 - acc: 0.8750
Epoch 498/500
 - 0s - loss: 0.2359 - acc: 0.8750
Epoch 499/500
 - 0s - loss: 0.2356 - acc: 0.8750
Epoch 500/500
 - 0s - loss: 0.2353 - acc: 0.8750
Jack and jill came tumbling after up


In [2]:
print(generate_seq(model, tokenizer, 'Jack', 10))

Jack and jill came tumbling after up the hill to fetch


In [3]:
print(generate_seq(model, tokenizer, 'Jack', 20))

Jack and jill came tumbling after up the hill to fetch a pail of water jack and jill came tumbling after
