In [92]:
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Embedding, LSTM, Dense
from keras.preprocessing.sequence import pad_sequences
import numpy as np


In [5]:
data = """Jack and Jill went up the hill \
To fetch a pail of water \
Jack fell down and broke his crown \
And Jill came tumbling after. Jack and Jill"""

In [6]:
# integer encode text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]

In [7]:
# determine the vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

Vocabulary Size: 22


In [21]:
# create word -> word sequences
sequences = list()
for i in range(1, len(encoded)):
    sequence = encoded[i-1:i+1]
    sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))

Total Sequences: 27


In [34]:
# We can then split the sequences into input (X) and output elements (y).
# This is straightforward as we only have two columns in the data.


# split into X and y elements
sequences = np.asarray(sequences)
X, y = sequences[:,0],sequences[:,1]


In [44]:
#Keras provides the to_categorical() function that we can use to convert the integer to a one hot encoding while 
#specifying the number of classes as the vocabulary size.

# one hot encode outputs
y = to_categorical(y, num_classes=vocab_size)

The model uses a learned word embedding in the input layer. This has one real-valued vector for each word in the vocabulary, where each word vector has a specified length. In this case we will use a 10-dimensional projection. The input sequence contains a single word, therefore the input_length=1.

The model has a single hidden LSTM layer with 50 units. This is far more than is needed. The output layer is comprised of one neuron for each word in the vocabulary and uses a softmax activation function to ensure the output is normalized to look like a probability.

# Model 1: One-Word-In, One-Word-Out Sequences

In [55]:

model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=1))
model.add(LSTM(50))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 1, 10)             220       
_________________________________________________________________
lstm_2 (LSTM)                (None, 50)                12200     
_________________________________________________________________
dense_1 (Dense)              (None, 22)                1122      
Total params: 13,542
Trainable params: 13,542
Non-trainable params: 0
_________________________________________________________________
None


Next, we can compile and fit the network on the encoded text data. Technically, we are modeling a multi-class classification problem (predict the word in the vocabulary), therefore using the categorical cross entropy loss function. We use the efficient Adam implementation of gradient descent and track accuracy at the end of each epoch. The model is fit for 500 training epochs, again, perhaps more than is needed.

The network configuration was not tuned for this and later experiments; an over-prescribed configuration was chosen to ensure that we could focus on the framing of the language model.

In [56]:
# compile network
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(X, y, epochs=500, verbose=2)

W0717 12:46:43.731997 140735587824512 deprecation_wrapper.py:119] From /Users/data/Documents/Envs/env_smart_desktop/lib/python3.6/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0717 12:46:43.825818 140735587824512 deprecation_wrapper.py:119] From /Users/data/Documents/Envs/env_smart_desktop/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.

W0717 12:46:44.029007 140735587824512 deprecation.py:323] From /Users/data/Documents/Envs/env_smart_desktop/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0717 12:46:45.147596 140735587824512 deprecation_wrapper.py:119] From /Users/data/Do

Epoch 1/500
 - 1s - loss: 3.0910 - acc: 0.1481
Epoch 2/500
 - 0s - loss: 3.0900 - acc: 0.1111
Epoch 3/500
 - 0s - loss: 3.0890 - acc: 0.1852
Epoch 4/500
 - 0s - loss: 3.0880 - acc: 0.1852
Epoch 5/500
 - 0s - loss: 3.0870 - acc: 0.1852
Epoch 6/500
 - 0s - loss: 3.0860 - acc: 0.1852
Epoch 7/500
 - 0s - loss: 3.0850 - acc: 0.2593
Epoch 8/500
 - 0s - loss: 3.0840 - acc: 0.2593
Epoch 9/500
 - 0s - loss: 3.0830 - acc: 0.2593
Epoch 10/500
 - 0s - loss: 3.0819 - acc: 0.2593
Epoch 11/500
 - 0s - loss: 3.0809 - acc: 0.2593
Epoch 12/500
 - 0s - loss: 3.0798 - acc: 0.2593
Epoch 13/500
 - 0s - loss: 3.0787 - acc: 0.2593
Epoch 14/500
 - 0s - loss: 3.0776 - acc: 0.2593
Epoch 15/500
 - 0s - loss: 3.0765 - acc: 0.2593
Epoch 16/500
 - 0s - loss: 3.0754 - acc: 0.2593
Epoch 17/500
 - 0s - loss: 3.0742 - acc: 0.2593
Epoch 18/500
 - 0s - loss: 3.0730 - acc: 0.2593
Epoch 19/500
 - 0s - loss: 3.0718 - acc: 0.2593
Epoch 20/500
 - 0s - loss: 3.0706 - acc: 0.2593
Epoch 21/500
 - 0s - loss: 3.0694 - acc: 0.2593
E

Epoch 171/500
 - 0s - loss: 1.8712 - acc: 0.4444
Epoch 172/500
 - 0s - loss: 1.8585 - acc: 0.4815
Epoch 173/500
 - 0s - loss: 1.8458 - acc: 0.4815
Epoch 174/500
 - 0s - loss: 1.8331 - acc: 0.4815
Epoch 175/500
 - 0s - loss: 1.8204 - acc: 0.4815
Epoch 176/500
 - 0s - loss: 1.8078 - acc: 0.4815
Epoch 177/500
 - 0s - loss: 1.7952 - acc: 0.4815
Epoch 178/500
 - 0s - loss: 1.7826 - acc: 0.4815
Epoch 179/500
 - 0s - loss: 1.7701 - acc: 0.4815
Epoch 180/500
 - 0s - loss: 1.7576 - acc: 0.4815
Epoch 181/500
 - 0s - loss: 1.7452 - acc: 0.4815
Epoch 182/500
 - 0s - loss: 1.7328 - acc: 0.4815
Epoch 183/500
 - 0s - loss: 1.7204 - acc: 0.4815
Epoch 184/500
 - 0s - loss: 1.7080 - acc: 0.5185
Epoch 185/500
 - 0s - loss: 1.6957 - acc: 0.5185
Epoch 186/500
 - 0s - loss: 1.6834 - acc: 0.5556
Epoch 187/500
 - 0s - loss: 1.6712 - acc: 0.5556
Epoch 188/500
 - 0s - loss: 1.6589 - acc: 0.5926
Epoch 189/500
 - 0s - loss: 1.6467 - acc: 0.5926
Epoch 190/500
 - 0s - loss: 1.6346 - acc: 0.5926
Epoch 191/500
 - 0s 

 - 0s - loss: 0.4614 - acc: 0.8889
Epoch 339/500
 - 0s - loss: 0.4580 - acc: 0.8889
Epoch 340/500
 - 0s - loss: 0.4546 - acc: 0.8889
Epoch 341/500
 - 0s - loss: 0.4514 - acc: 0.8889
Epoch 342/500
 - 0s - loss: 0.4481 - acc: 0.8889
Epoch 343/500
 - 0s - loss: 0.4449 - acc: 0.8889
Epoch 344/500
 - 0s - loss: 0.4418 - acc: 0.8889
Epoch 345/500
 - 0s - loss: 0.4387 - acc: 0.8889
Epoch 346/500
 - 0s - loss: 0.4356 - acc: 0.8889
Epoch 347/500
 - 0s - loss: 0.4326 - acc: 0.8889
Epoch 348/500
 - 0s - loss: 0.4296 - acc: 0.8889
Epoch 349/500
 - 0s - loss: 0.4267 - acc: 0.8889
Epoch 350/500
 - 0s - loss: 0.4238 - acc: 0.8889
Epoch 351/500
 - 0s - loss: 0.4210 - acc: 0.8889
Epoch 352/500
 - 0s - loss: 0.4182 - acc: 0.8889
Epoch 353/500
 - 0s - loss: 0.4154 - acc: 0.8889
Epoch 354/500
 - 0s - loss: 0.4127 - acc: 0.8889
Epoch 355/500
 - 0s - loss: 0.4100 - acc: 0.8889
Epoch 356/500
 - 0s - loss: 0.4074 - acc: 0.8889
Epoch 357/500
 - 0s - loss: 0.4048 - acc: 0.8889
Epoch 358/500
 - 0s - loss: 0.4022

<keras.callbacks.History at 0x12c918860>

# Evaluate

In [63]:
# evaluate
in_text = 'Jack'
print(in_text)
encoded = tokenizer.texts_to_sequences([in_text])[0]
encoded = np.asarray(encoded)

yhat = model.predict_classes(encoded, verbose=0)

for word, index in tokenizer.word_index.items():
	if index == yhat:
		print(word)

Jack
and


In [69]:
# generate a sequence from the model
def generate_seq(model, tokenizer, seed_text, n_words):
	in_text, result = seed_text, seed_text
	# generate a fixed number of words
	for _ in range(n_words):
		# encode the text as integer
		encoded = tokenizer.texts_to_sequences([in_text])[0]
		encoded = np.asarray(encoded)
		# predict a word in the vocabulary
		yhat = model.predict_classes(encoded, verbose=0)
		# map predicted word index to word
		out_word = ''
		for word, index in tokenizer.word_index.items():
			if index == yhat:
				out_word = word
				break
		# append to input
		in_text, result = out_word, result + ' ' + out_word
	return result

In [72]:
generate_seq(model, tokenizer, 'Jack', 6)

'Jack and jill went up the hill'

# Model 2: Line-by-Line Sequence

In [74]:
# Another approach is to split up the source text line-by-line, then break each line down into a series of words 
# that build up.

First, we can create the sequences of integers, line-by-line by using the Tokenizer already fit on the source text.

In [90]:
# create line-based sequences
#bigram,trigram,forthgram, etc
sequences = list()
for line in data.split('\n'):
    encoded = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(encoded)):
        sequence = encoded[:i+1]
        sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))


Total Sequences: 27


In [89]:
sequences

[[2, 1],
 [2, 1, 3],
 [2, 1, 3, 4],
 [2, 1, 3, 4, 5],
 [2, 1, 3, 4, 5, 6],
 [2, 1, 3, 4, 5, 6, 7],
 [2, 1, 3, 4, 5, 6, 7, 8],
 [2, 1, 3, 4, 5, 6, 7, 8, 9],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2, 14],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2, 14, 15],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2, 14, 15, 1],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2, 14, 15, 1, 16],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2, 14, 15, 1, 16, 17],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2, 14, 15, 1, 16, 17, 18],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2, 14, 15, 1, 16, 17, 18, 1],
 [2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2, 14, 15, 1, 16, 17, 18, 1, 3],
 [2,
  1,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  2,
  14,
  15,
  1,
  16,
  17,
  18,

In [104]:
# pad input sequences

max_length = max([len(seq) for seq in sequences])
sequences = pad_sequences(sequences, maxlen=max_length, padding='pre')
print('Max Sequence Length: %d' % max_length)


Max Sequence Length: 28


In [94]:
#pad_sequences fills the vector representation of a sentences with 0s. 
#Number of ceros = max(lens(sentences))-len(sentence)

sequences

array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  1],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  1,  3],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  2,  1,  3,  4],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  2,  1,  3,  4,  5],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  2,  1,  3,  4,  5,  6],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  2,  1,  3,  4,  5,  6,  7],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  2,  1,  3,  4,  5,  6,  7,  8],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  2,  

In [96]:
# split into input and output elements
sequences = np.asarray(sequences)
X, y = sequences[:,:-1],sequences[:,-1]
#convert into one hot encoding
y = to_categorical(y, num_classes=vocab_size)

In [98]:
X

array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  1],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  2,  1,  3],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  2,  1,  3,  4],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  2,  1,  3,  4,  5],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  2,  1,  3,  4,  5,  6],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  2,  1,  3,  4,  5,  6,  7],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  2,  1,  3,  4,  5,  6,  7,  8],


In [97]:
y

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,

In [99]:
# define model
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=max_length-1))
model.add(LSTM(50))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())
# compile network
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(X, y, epochs=500, verbose=2)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 27, 10)            220       
_________________________________________________________________
lstm_3 (LSTM)                (None, 50)                12200     
_________________________________________________________________
dense_2 (Dense)              (None, 22)                1122      
Total params: 13,542
Trainable params: 13,542
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/500
 - 1s - loss: 3.0915 - acc: 0.0000e+00
Epoch 2/500
 - 0s - loss: 3.0896 - acc: 0.0370
Epoch 3/500
 - 0s - loss: 3.0878 - acc: 0.1481
Epoch 4/500
 - 0s - loss: 3.0861 - acc: 0.1481
Epoch 5/500
 - 0s - loss: 3.0842 - acc: 0.1481
Epoch 6/500
 - 0s - loss: 3.0823 - acc: 0.1481
Epoch 7/500
 - 0s - loss: 3.0804 - acc: 0.1481
Epoch 8/500
 - 0s - loss: 3.0783 - acc: 0.1481
Epoch 9/500
 - 0s

Epoch 156/500
 - 0s - loss: 1.5814 - acc: 0.4074
Epoch 157/500
 - 0s - loss: 1.5518 - acc: 0.5926
Epoch 158/500
 - 0s - loss: 1.5354 - acc: 0.5556
Epoch 159/500
 - 0s - loss: 1.5226 - acc: 0.5926
Epoch 160/500
 - 0s - loss: 1.5142 - acc: 0.5926
Epoch 161/500
 - 0s - loss: 1.5174 - acc: 0.5926
Epoch 162/500
 - 0s - loss: 1.5316 - acc: 0.5185
Epoch 163/500
 - 0s - loss: 1.6323 - acc: 0.4074
Epoch 164/500
 - 0s - loss: 1.5758 - acc: 0.4444
Epoch 165/500
 - 0s - loss: 1.5039 - acc: 0.6296
Epoch 166/500
 - 0s - loss: 1.5594 - acc: 0.4074
Epoch 167/500
 - 0s - loss: 1.4953 - acc: 0.6296
Epoch 168/500
 - 0s - loss: 1.5431 - acc: 0.4444
Epoch 169/500
 - 0s - loss: 1.5210 - acc: 0.4444
Epoch 170/500
 - 0s - loss: 1.4815 - acc: 0.5556
Epoch 171/500
 - 0s - loss: 1.4693 - acc: 0.5926
Epoch 172/500
 - 0s - loss: 1.4785 - acc: 0.5556
Epoch 173/500
 - 0s - loss: 1.4762 - acc: 0.5556
Epoch 174/500
 - 0s - loss: 1.4598 - acc: 0.5926
Epoch 175/500
 - 0s - loss: 1.4342 - acc: 0.5926
Epoch 176/500
 - 0s 

 - 0s - loss: 0.6349 - acc: 1.0000
Epoch 324/500
 - 0s - loss: 0.6315 - acc: 1.0000
Epoch 325/500
 - 0s - loss: 0.6282 - acc: 1.0000
Epoch 326/500
 - 0s - loss: 0.6248 - acc: 1.0000
Epoch 327/500
 - 0s - loss: 0.6215 - acc: 1.0000
Epoch 328/500
 - 0s - loss: 0.6182 - acc: 1.0000
Epoch 329/500
 - 0s - loss: 0.6149 - acc: 1.0000
Epoch 330/500
 - 0s - loss: 0.6116 - acc: 1.0000
Epoch 331/500
 - 0s - loss: 0.6084 - acc: 1.0000
Epoch 332/500
 - 0s - loss: 0.6051 - acc: 1.0000
Epoch 333/500
 - 0s - loss: 0.6019 - acc: 1.0000
Epoch 334/500
 - 0s - loss: 0.5987 - acc: 1.0000
Epoch 335/500
 - 0s - loss: 0.5955 - acc: 1.0000
Epoch 336/500
 - 0s - loss: 0.5923 - acc: 1.0000
Epoch 337/500
 - 0s - loss: 0.5891 - acc: 1.0000
Epoch 338/500
 - 0s - loss: 0.5860 - acc: 1.0000
Epoch 339/500
 - 0s - loss: 0.5828 - acc: 1.0000
Epoch 340/500
 - 0s - loss: 0.5797 - acc: 1.0000
Epoch 341/500
 - 0s - loss: 0.5766 - acc: 1.0000
Epoch 342/500
 - 0s - loss: 0.5734 - acc: 1.0000
Epoch 343/500
 - 0s - loss: 0.5703

Epoch 491/500
 - 0s - loss: 0.2652 - acc: 1.0000
Epoch 492/500
 - 0s - loss: 0.2637 - acc: 1.0000
Epoch 493/500
 - 0s - loss: 0.2623 - acc: 1.0000
Epoch 494/500
 - 0s - loss: 0.2610 - acc: 1.0000
Epoch 495/500
 - 0s - loss: 0.2597 - acc: 1.0000
Epoch 496/500
 - 0s - loss: 0.2583 - acc: 1.0000
Epoch 497/500
 - 0s - loss: 0.2571 - acc: 1.0000
Epoch 498/500
 - 0s - loss: 0.2559 - acc: 1.0000
Epoch 499/500
 - 0s - loss: 0.2544 - acc: 1.0000
Epoch 500/500
 - 0s - loss: 0.2533 - acc: 1.0000


<keras.callbacks.History at 0x12dda2198>

In [105]:
# generate a sequence from a language model
def generate_seq(model, tokenizer, max_length, seed_text, n_words):
	in_text = seed_text
	# generate a fixed number of words
	for _ in range(n_words):
		# encode the text as integer
		encoded = tokenizer.texts_to_sequences([in_text])[0]
		# pre-pad sequences to a fixed length
		encoded = pad_sequences([encoded], maxlen=max_length, padding='pre')
		# predict probabilities for each word
		yhat = model.predict_classes(encoded, verbose=0)
		# map predicted word index to word
		out_word = ''
		for word, index in tokenizer.word_index.items():
			if index == yhat:
				out_word = word
				break
		# append to input
		in_text += ' ' + out_word
	return in_text

In [110]:
generate_seq(model, tokenizer, max_length-1, 'Jack', 15 )

'Jack and jill went up the hill to fetch a pail of water jack fell down'

In [111]:
# limitations: how do we know that the sentence has finished?