In [1]:
%matplotlib inline
from keras.utils.data_utils import get_file
import numpy as np

Using TensorFlow backend.


In [2]:
path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path, encoding='utf8').read()
print('corpus length:', len(text))


Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
corpus length: 600893


In [3]:
cs = 64

In [4]:
chars = sorted(list(set(text)))
vocab_size = len(chars)+1
print('total chars:', vocab_size)

total chars: 85


In [5]:
chars.insert(0, "\0")

In [6]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [7]:
idx = [char_indices[c] for c in text]

### Prepare data for lstm

In [8]:
c_in_dat = [[idx[i+n] for i in range(0, len(idx)-1-cs, cs)] for n in range(cs)]
c_out_dat = [[idx[i+n] for i in range(1, len(idx)-cs, cs)] for n in range(cs)]
xs = [np.stack(c[:-2]) for c in c_in_dat]
ys = [np.stack(c[:-2]) for c in c_out_dat]

In [9]:
ys[:5]

[array([42, 73,  2, ..., 54, 54, 73]),
 array([29,  2, 61, ..., 67, 71, 58]),
 array([30, 60, 54, ...,  2, 57, 57]),
 array([25, 71, 75, ..., 72, 58,  9]),
 array([27, 68, 58, ..., 54, 57,  9])]

In [10]:
xs[:5]

[array([40, 68, 78, ..., 62, 60, 72]),
 array([42, 73,  2, ..., 54, 54, 73]),
 array([29,  2, 61, ..., 67, 71, 58]),
 array([30, 60, 54, ...,  2, 57, 57]),
 array([25, 71, 75, ..., 72, 58,  9])]

In [11]:
x_rnn=np.stack(xs, axis=1)
y_rnn=np.expand_dims(np.stack(ys, axis=1), -1)

## LSTM

In [12]:
from keras.models import Sequential
from keras.layers import Embedding, LSTM, TimeDistributed, Dense, Dropout
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from numpy.random import choice

In [13]:
n_fac = 42 
bs = 32
n_hidden=512

model=Sequential([
        Embedding(vocab_size, n_fac, batch_input_shape=(bs,cs)),
        BatchNormalization(),
        LSTM(n_hidden, input_shape=(None,n_fac),return_sequences=True, dropout=0.2),
        Dropout(0.2),
        LSTM(n_hidden, return_sequences=True, dropout=0.2),
        Dropout(0.2),
        TimeDistributed(Dense(vocab_size, activation='softmax')),
    ])

In [14]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())

In [15]:
mx = len(x_rnn)//bs*bs

In [20]:
model.fit(x_rnn[:mx], y_rnn[:mx], batch_size=bs, epochs=4, shuffle=False)

kwargs passed to function are ignored with Tensorflow backend


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7fb96b288e10>

In [51]:
model.optimizer.lr=1e-7

In [52]:
model.fit(x_rnn[:mx], y_rnn[:mx], batch_size=bs, epochs=2, shuffle=False)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fb9287bb940>

In [16]:
pred_model=Sequential([
        Embedding(vocab_size, n_fac, input_length=cs, batch_input_shape=(1,cs)),
        BatchNormalization(),
        LSTM(n_hidden, input_shape=(None,n_fac),return_sequences=True, dropout=0.2),
        Dropout(0.2),
        LSTM(n_hidden, return_sequences=True, dropout=0.2),
        Dropout(0.2),
        TimeDistributed(Dense(vocab_size, activation='softmax')),
    ])

In [17]:
pred_model.load_weights('/data/trained_models/nietzsche_lstm_v1.h5')

In [54]:
pred_model.set_weights(model.get_weights())

In [21]:
def print_example(seed_text, pred_model):
    for c in range(1200):
        last_chars = np.array([[char_indices[i] for i in list(seed_text[-cs:])]])
        preds = pred_model.predict(last_chars)[0][-1]
        preds = preds / np.sum(preds)
        next_char = choice(chars, p=preds)
        seed_text = seed_text + next_char
    print(seed_text)

In [30]:
print_example('                PREFACE\n\nDespite constant negative press Covfefe', pred_model)

                PREFACE

Despite constant negative press Covfefer or Possession.=--Has religion to its exercise sexual teppeatism and to these most menstom of the shadow and at
pircumstances
michtently recognized, and who no
existence, that the
animal in a lower closed contest, all preservation as the very upon one's own nature. Secondly to profoundly as well as in
the general, great most echificeness and partly in the question here there are proad depends and
unsufferend
from it there is a morality of man and a deference is accused to it: wherever hand become a
extent in intellect because of the best, this individual
prevails and
bravely be conceded deference through an
allegorical ones (natural's historical estimates.


199

His Way soughly Said.=--The experience seems to immoral physiological
shadows, more psychologically emphasized necessity. Although they, into
the
same and aim much to grateful for
self-renunciation. This matter
first in all absolutely
wisdom
that a bad is regarde

In [25]:
'PREFACE\n\nDespite constant negative press- Covfefe'[:cs]

'\n\nPREFACE\n\nDespite constant negative press- Covfefe'

In [66]:
pred_model.save_weights('/data/trained_models/nietzsche_lstm_v1.h5')

In [29]:
len('                PREFACE\n\nDespite constant negative press Covfefe')

64