## First we import the necessary modules

In [2]:
from __future__ import print_function
from keras.layers import Dense, Activation
from keras.layers.recurrent import SimpleRNN
from keras.models import Sequential
from keras.utils.vis_utils import plot_model
import numpy as np

In [3]:
## We read our input text from the text of Alice in Wonderland on the Project Gutenberg website
fin = open("Datasets/alice_in_wonderland.txt", 'rb')
lines = []
for line in fin:
    line = line.strip().lower()
    line = line.decode("ascii", "ignore")
    if len(line) == 0:
        continue
    lines.append(line)
fin.close()
text = " ".join(lines)

In [6]:
## We create our label data
chars = set([c for c in text])
nb_chars = len(chars)
char2index = dict((c, i) for i, c in enumerate(chars))
index2char = dict((i, c) for i, c in enumerate(chars))

In [18]:
SEQLEN = 10
STEP = 1
input_chars = []
label_chars = []
for i in range(0, len(text) - SEQLEN, STEP):
    input_chars.append(text[i:i + SEQLEN])
    label_chars.append(text[i + SEQLEN])

## The next step is to vectorize these input and label texts

In [21]:
X = np.zeros((len(input_chars), SEQLEN, nb_chars), dtype=np.bool)
y = np.zeros((len(input_chars), nb_chars), dtype=np.bool)
for i, input_char in enumerate(input_chars):
    for j, ch in enumerate(input_char):
        X[i, j, char2index[ch]] = 1
    y[i, char2index[label_chars[i]]] = 1

## We build our Model

In [26]:
HIDDEN_SIZE = 128
BATCH_SIZE = 128
NUM_ITERATIONS = 25
NUM_EPOCHS_PER_ITERATION = 1
NUM_PREDS_PER_EPOCH = 100

model = Sequential()
model.add(SimpleRNN(HIDDEN_SIZE, return_sequences=False, 
                    input_shape=(SEQLEN, nb_chars), unroll=True))

model.add(Dense(nb_chars))
model.add(Activation("softmax"))
model.compile(loss="categorical_crossentropy", optimizer="rmsprop")

In [27]:
for iteration in range(NUM_ITERATIONS):
    print("=" * 50)
    print("Iteration #: %d" % (iteration))
    model.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
    test_idx = np.random.randint(len(input_chars))
    test_chars = input_chars[test_idx]
    print("Generating from seed: %s" % (test_chars))
    print(test_chars, end="")
    for i in range(NUM_PREDS_PER_EPOCH):
        Xtest = np.zeros((1, SEQLEN, nb_chars))
        for i, ch in enumerate(test_chars):
            Xtest[0, i, char2index[ch]] = 1
        pred = model.predict(Xtest, verbose=0)[0]
        ypred = index2char[np.argmax(pred)]
        print(ypred, end="")
        # move forward with test_chars + ypred
        test_chars = test_chars[1:] + ypred
print()

Iteration #: 0
Epoch 1/1
Generating from seed: arch hare 
Iteration #: 1
Epoch 1/1
Generating from seed: ork is dis
Iteration #: 2
Epoch 1/1
Generating from seed: ad come ba
Iteration #: 3
Epoch 1/1
Generating from seed: ble. it do
Iteration #: 4
Epoch 1/1
Generating from seed: and some u
Iteration #: 5
Epoch 1/1
Generating from seed: ad that do
Iteration #: 6
Epoch 1/1
Generating from seed: ! i wish y
Iteration #: 7
Epoch 1/1
Generating from seed:  turtle to
Iteration #: 8
Epoch 1/1
Generating from seed: : before s
Iteration #: 9
Epoch 1/1
Generating from seed: he set to 
Iteration #: 10
Epoch 1/1
Generating from seed: and the wo
Iteration #: 11
Epoch 1/1
Generating from seed: f making a
Iteration #: 12
Epoch 1/1
Generating from seed: t. it was 
Iteration #: 13
Epoch 1/1
Generating from seed: said alice
Iteration #: 14
Epoch 1/1
Generating from seed:  me see--h
Iteration #: 15
Epoch 1/1
Generating from seed: and just a
Iteration #: 16
Epoch 1/1
Generating from seed: urning int
Iterati