In [1]:
%pylab inline
import os; os.environ['KERAS_BACKEND'] = 'tensorflow'
import numpy as np

Populating the interactive namespace from numpy and matplotlib


In [2]:
class CharTransformer:
    def __init__(self):
        pass
    
    def build_vocab(self, text):
        self.vocab = sorted(list(set(text)))
        self.vocab_size = len(self.vocab)
    
    def encode_one_hot(self, char):
        k = self.vocab.index(char)
        arr = np.zeros(self.vocab_size, dtype=np.bool8)
        arr[k] = 1
        return arr
    
    def decode_one_hot(self, arr):
        idx = np.argmax(arr)
        return self.vocab[idx]
    
    def batch_encode_one_hot(self, text):
        return np.array([self.encode_one_hot(c) for c in text])

In [3]:
def rnn_sliding_window_indices(seq_len, time_steps, batch_size, epochs=1):
    from collections import deque

    i = 0
    assert seq_len >= time_steps + batch_size + 2, "sequence must be larger than time_steps+batch_size+2"
    while True:
        if i > seq_len - time_steps - batch_size - 2:
            if epochs <= 0:
                break
            epochs -= 1
            i = 0        
        batch = []
        for j in range(batch_size):
            x_start = i + j
            x_stop = x_start + time_steps
            y_start = x_start + 1
            y_stop = x_stop + 1
            batch.append((x_start, x_stop, y_start, y_stop))
        yield batch
        i += 1

def rnn_data_gen(seq, time_steps=None, batch_size=None, epochs=None, last_label_only=True):
    windows = rnn_sliding_window_indices(seq.shape[0], 
                                         time_steps=time_steps,
                                         batch_size=batch_size,
                                         epochs=epochs)
    for batch_indices in windows:
        x_batch = []
        y_batch = []
        for x_start, x_stop, y_start, y_stop in batch_indices:
            x_batch.append(seq[x_start:x_stop])
            if last_label_only:
                y_batch.append(seq[y_stop-1])
            else:
                y_batch.append(seq[y_start:y_stop])
        yield (np.array(x_batch), np.array(y_batch))

In [15]:
# Shakespeare
import urllib
TEXT = urllib.urlopen("https://ocw.mit.edu/ans7870/6/6.006/s08/lecturenotes/files/t8.shakespeare.txt").read().lower()[:200000]
charxform = CharTransformer()
charxform.build_vocab(TEXT)
x_one_hot = charxform.batch_encode_one_hot(TEXT)

In [16]:
BATCH_SIZE = 8
TIME_STEPS = 20
INPUT_SIZE = charxform.vocab_size
OUTPUT_SIZE = INPUT_SIZE
NB_SAMPLES = len(TEXT) - TIME_STEPS - BATCH_SIZE - 2

In [26]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint

gen = rnn_data_gen(x_one_hot, time_steps=TIME_STEPS, batch_size=BATCH_SIZE, epochs=2000)
opt = Adam(lr=.001)
early_stop_cb = EarlyStopping(monitor='loss', patience=3, verbose=0, mode='auto')
model_ckpt_cb = ModelCheckpoint("model.ckpt", monitor='loss', verbose=0, save_best_only=True, mode='auto')


model = Sequential()
model.add(LSTM(128, batch_input_shape=(BATCH_SIZE, TIME_STEPS, INPUT_SIZE), return_sequences=False))
model.add(Dropout(.4))
model.add(Dense(OUTPUT_SIZE, activation="softmax"))
model.compile(opt, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(gen, samples_per_epoch=NB_SAMPLES , nb_epoch=20, verbose=1, callbacks=[model_ckpt_cb, early_stop_cb])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20


<keras.callbacks.History at 0x7fb37195bcd0>

In [20]:
def sample(probs, temperature):
    # helper function to sample an index from a probability array
    preds = np.asarray(probs).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def sample_chars(seed, model, max_length=1000, temp=.5):
    text = seed
    while len(text) < max_length:
        x = np.zeros((BATCH_SIZE, TIME_STEPS, INPUT_SIZE), dtype=np.float32)
        x[0] = charxform.batch_encode_one_hot(text[-TIME_STEPS:])
        probs = model.predict_proba(x, verbose=0)[0]
        index = sample(probs, temp)
        text += charxform.vocab[index]
    print text
    

# model.load_weights("model.ckpt")


In [23]:
start = np.random.randint(0, len(TEXT) - 20 - 2)
end = start+20
sample_chars(TEXT[start:end], model, temp=.2)

h' imprisoned absencresere and the sind of tour the thour the strare thoughtr sore the sared and that mare of wind that the sumere the be and in the to do so to that a do there that of your the with with whing though the of to come be for in to thath to to that i stord be that that math to that be that the sume to that that with the selet that i sond sond the love of that be be the worth of which to that be in thour of thath to so so more o the bure that so so mare the of the sores and on sore to the the streth sure the be the stours and love that i some be thou be the with that that be that in of the for that thoughtred be thour that        55
r the nd be shour so fore of the to be of that so sime the of the tort your the sor to the stord of to shat that that be that to that here,
  the sure sund to the the with i sore to this at the stort  the be of that that store sond of be though the of ind the stind of to that the of the beand the of thate then be the share to stor bearter with s