In [1]:
# import necessary modules
from __future__ import print_function
from keras.layers import Dense, Activation
from keras.layers.recurrent import LSTM
from keras.models import Sequential 
from keras.utils.vis_utils import plot_model
import numpy as np

Using TensorFlow backend.


In [2]:
# read input text and cleanup content
fin = open("../Learn-enough-python-and-machine-learning-to-dangerous/data/code.txt", "rb")
lines = []
for line in fin:
    line = line.strip().lower()
    line = line.decode("ascii","ignore")
    if len(line) == 0:
        continue
    line += '\n'
    lines.append(line)
fin.close()
text = " ".join(lines)

In [3]:
# build vocabulary
chars = set([c for c in text])
nb_chars = len(chars)
print(nb_chars)
print(chars)
char2index = dict((c, i) for i, c in enumerate(chars))
index2char = dict((i, c) for i, c in enumerate(chars))

70
{'(', 'o', '-', 'i', '7', '|', ']', ':', ',', '\\', 'l', '%', '@', 'v', '3', '!', '/', '^', 'e', 'w', 'q', 'r', 'a', '*', '6', '[', '2', 'u', 'z', '?', '4', '~', '<', 'b', '"', 'd', 't', '.', 'p', ' ', 'y', '0', 'c', 'g', 'h', '1', 'x', '\n', '$', '&', '5', '+', '_', '>', '{', 's', '9', '#', '8', 'k', 'f', 'j', ';', ')', '`', 'm', "'", 'n', '=', '}'}


In [4]:
# create the input and label texts  
SEQLEN = 10
STEP = 1

input_chars = []
label_chars = []
for i in range(0, len(text) - SEQLEN, STEP):
    input_chars.append(text[i:i + SEQLEN])
    label_chars.append(text[i + SEQLEN])

In [5]:
# vectorize these input and label texts
X = np.zeros((len(input_chars), SEQLEN, nb_chars), dtype=np.bool)
y = np.zeros((len(input_chars), nb_chars), dtype=np.bool)
for i, input_char in enumerate(input_chars):
    for j, ch in enumerate(input_char):
        X[i, j, char2index[ch]] = 1
    y[i, char2index[label_chars[i]]] = 1

In [6]:
# build our model
HIDDEN_SIZE = 256
BATCH_SIZE = 128
NUM_ITERATIONS = 25
NUM_EPOCHS_PER_ITERATION = 2
NUM_PREDS_PER_EPOCH = 500

model = Sequential()
model.add(LSTM(HIDDEN_SIZE, return_sequences=False, input_shape=(SEQLEN, nb_chars), unroll=True))
model.add(Dense(nb_chars))
model.add(Activation("softmax"))
model.summary()
model.compile(loss="categorical_crossentropy", optimizer="rmsprop")

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 256)               334848    
_________________________________________________________________
dense_1 (Dense)              (None, 70)                17990     
_________________________________________________________________
activation_1 (Activation)    (None, 70)                0         
Total params: 352,838
Trainable params: 352,838
Non-trainable params: 0
_________________________________________________________________


In [7]:
for iteration in range(NUM_ITERATIONS):
    print("\n")
    print("="*50)
    print("Iteration #: %d" % (iteration))
    model.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
    
    test_idx = np.random.randint(len(input_chars))
    test_chars = input_chars[test_idx]
    print("Generating from seed: %s" % (test_chars))
    print(test_chars, end="")
    for i in range (NUM_PREDS_PER_EPOCH):
        Xtest = np.zeros((1, SEQLEN, nb_chars))
        for i, ch in enumerate(test_chars):
            Xtest[0, i, char2index[ch]] = 1
        pred = model.predict(Xtest, verbose=0)[0]
        ypred = index2char[np.argmax(pred)]
        print(ypred, end="")
        test_chars = test_chars[1:] + ypred
print()

Iteration #: 0
Epoch 1/2
Epoch 2/2
Iteration #: 1
Epoch 1/2
Epoch 2/2
Iteration #: 2
Epoch 1/2
Epoch 2/2
Iteration #: 3
Epoch 1/2

KeyboardInterrupt: 