In [0]:
from numpy import array
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding

In [0]:
def generate_seq(model, tokenizer, seed_text, n_words):
    in_text, result = seed_text, seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        # encode the text as integer
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        encoded = array(encoded)
        # predict a word in the vocabulary
        yhat = model.predict_classes(encoded, verbose=0)
        # map predicted word index to word
        out_word = ''
        for word, index in tokenizer.word_index.items():
            if index == yhat:
                out_word = word
                break
        # append to input
        in_text, result = out_word, result + ' ' + out_word
    return result


In [0]:
# source text
data = """ Jack and Jill went up the hill\n
        To fetch a pail of water\n
        Jack fell down and broke his crown\n
        And Jill came tumbling after\n """

In [0]:
# integer encode text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]

In [0]:
# determine the vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

Vocabulary Size: 22


In [0]:
# create word -> word sequences
sequences = list()
for i in range(1, len(encoded)):
    sequence = encoded[i-1:i+1]
    sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))


Total Sequences: 24


In [0]:
print(sequences)

[[2, 1], [1, 3], [3, 4], [4, 5], [5, 6], [6, 7], [7, 8], [8, 9], [9, 10], [10, 11], [11, 12], [12, 13], [13, 2], [2, 14], [14, 15], [15, 1], [1, 16], [16, 17], [17, 18], [18, 1], [1, 3], [3, 19], [19, 20], [20, 21]]


In [0]:
# split into X and y elements
sequences = array(sequences)
X, y = sequences[:,0],sequences[:,1]
# one hot encode outputs
y = to_categorical(y, num_classes=vocab_size)


In [0]:
# define model
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=1))
model.add(LSTM(50))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 1, 10)             220       
_________________________________________________________________
lstm_3 (LSTM)                (None, 50)                12200     
_________________________________________________________________
dense_3 (Dense)              (None, 22)                1122      
Total params: 13,542
Trainable params: 13,542
Non-trainable params: 0
_________________________________________________________________
None


In [0]:
# compile network
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [0]:
# fit network
model.fit(X, y, epochs=500, verbose=2)


Epoch 1/500
 - 2s - loss: 3.0907 - acc: 0.2083
Epoch 2/500
 - 0s - loss: 3.0898 - acc: 0.1667
Epoch 3/500
 - 0s - loss: 3.0890 - acc: 0.1250
Epoch 4/500
 - 0s - loss: 3.0882 - acc: 0.1250
Epoch 5/500
 - 0s - loss: 3.0874 - acc: 0.1250
Epoch 6/500
 - 0s - loss: 3.0865 - acc: 0.1250
Epoch 7/500
 - 0s - loss: 3.0857 - acc: 0.1250
Epoch 8/500
 - 0s - loss: 3.0848 - acc: 0.1250
Epoch 9/500
 - 0s - loss: 3.0840 - acc: 0.1250
Epoch 10/500
 - 0s - loss: 3.0831 - acc: 0.1250
Epoch 11/500
 - 0s - loss: 3.0822 - acc: 0.1250
Epoch 12/500
 - 0s - loss: 3.0813 - acc: 0.1250
Epoch 13/500
 - 0s - loss: 3.0804 - acc: 0.1250
Epoch 14/500
 - 0s - loss: 3.0795 - acc: 0.1250
Epoch 15/500
 - 0s - loss: 3.0786 - acc: 0.1250
Epoch 16/500
 - 0s - loss: 3.0776 - acc: 0.1250
Epoch 17/500
 - 0s - loss: 3.0766 - acc: 0.1250
Epoch 18/500
 - 0s - loss: 3.0756 - acc: 0.1250
Epoch 19/500
 - 0s - loss: 3.0746 - acc: 0.1250
Epoch 20/500
 - 0s - loss: 3.0736 - acc: 0.1250
Epoch 21/500
 - 0s - loss: 3.0725 - acc: 0.1250
E

Epoch 171/500
 - 0s - loss: 1.9078 - acc: 0.7500
Epoch 172/500
 - 0s - loss: 1.8935 - acc: 0.7500
Epoch 173/500
 - 0s - loss: 1.8792 - acc: 0.7917
Epoch 174/500
 - 0s - loss: 1.8649 - acc: 0.8750
Epoch 175/500
 - 0s - loss: 1.8506 - acc: 0.8750
Epoch 176/500
 - 0s - loss: 1.8364 - acc: 0.8750
Epoch 177/500
 - 0s - loss: 1.8221 - acc: 0.8750
Epoch 178/500
 - 0s - loss: 1.8078 - acc: 0.8750
Epoch 179/500
 - 0s - loss: 1.7936 - acc: 0.8750
Epoch 180/500
 - 0s - loss: 1.7794 - acc: 0.8750
Epoch 181/500
 - 0s - loss: 1.7652 - acc: 0.8750
Epoch 182/500
 - 0s - loss: 1.7510 - acc: 0.8750
Epoch 183/500
 - 0s - loss: 1.7369 - acc: 0.8750
Epoch 184/500
 - 0s - loss: 1.7228 - acc: 0.8750
Epoch 185/500
 - 0s - loss: 1.7087 - acc: 0.8750
Epoch 186/500
 - 0s - loss: 1.6947 - acc: 0.8750
Epoch 187/500
 - 0s - loss: 1.6806 - acc: 0.8750
Epoch 188/500
 - 0s - loss: 1.6667 - acc: 0.8750
Epoch 189/500
 - 0s - loss: 1.6527 - acc: 0.8750
Epoch 190/500
 - 0s - loss: 1.6388 - acc: 0.8750
Epoch 191/500
 - 0s 

Epoch 339/500
 - 0s - loss: 0.3956 - acc: 0.8750
Epoch 340/500
 - 0s - loss: 0.3927 - acc: 0.8750
Epoch 341/500
 - 0s - loss: 0.3899 - acc: 0.8750
Epoch 342/500
 - 0s - loss: 0.3872 - acc: 0.8750
Epoch 343/500
 - 0s - loss: 0.3844 - acc: 0.8750
Epoch 344/500
 - 0s - loss: 0.3818 - acc: 0.8750
Epoch 345/500
 - 0s - loss: 0.3792 - acc: 0.8750
Epoch 346/500
 - 0s - loss: 0.3766 - acc: 0.8750
Epoch 347/500
 - 0s - loss: 0.3741 - acc: 0.8750
Epoch 348/500
 - 0s - loss: 0.3716 - acc: 0.8750
Epoch 349/500
 - 0s - loss: 0.3691 - acc: 0.8750
Epoch 350/500
 - 0s - loss: 0.3667 - acc: 0.8750
Epoch 351/500
 - 0s - loss: 0.3644 - acc: 0.8750
Epoch 352/500
 - 0s - loss: 0.3621 - acc: 0.8750
Epoch 353/500
 - 0s - loss: 0.3598 - acc: 0.8750
Epoch 354/500
 - 0s - loss: 0.3575 - acc: 0.8750
Epoch 355/500
 - 0s - loss: 0.3553 - acc: 0.8750
Epoch 356/500
 - 0s - loss: 0.3532 - acc: 0.8750
Epoch 357/500
 - 0s - loss: 0.3511 - acc: 0.8750
Epoch 358/500
 - 0s - loss: 0.3490 - acc: 0.8750
Epoch 359/500
 - 0s 

<keras.callbacks.History at 0x63effda240>

In [0]:
# evaluate
print(generate_seq(model, tokenizer, 'Jack', 11))


Jack and jill came tumbling after of water jack and jill came
