In [9]:
import os, sys
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

import keras
import numpy as np
import random

In [2]:
path = keras.utils.get_file('nietzsche.txt', 
                            origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')

text = open(path, 'r').read().lower()
print('Corpus length:', len(text))

Corpus length: 600893


In [3]:
maxlen = 60
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i:i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sentences', len(sentences))
chars = sorted(list(set(text)))
print('Number of unique characters', len(chars))
char_indices = dict((char, i) for i, char in enumerate(chars))

print('Vectorizing...')

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sentences 200278
Number of unique characters 57
Vectorizing...


In [4]:
# builds a model
model = keras.Sequential()
model.add(keras.layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(keras.layers.Dense(len(chars), activation=keras.activations.softmax))

In [5]:
model.compile(optimizer=keras.optimizers.RMSprop(lr=0.01),
              loss=keras.losses.categorical_crossentropy,
              metrics=[keras.metrics.categorical_accuracy])

In [6]:
fname = '/content/dohai90/workspace/keras/checkpoints/text_gen_ckpt_{epoch:02d}.h5'

callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_categorical_accuracy',
                                                patience=5),
                  keras.callbacks.ModelCheckpoint(filepath=fname,
                                                 monitor='loss',
                                                 save_best_only=True, 
                                                 mode='min')]

history = model.fit(x, y, epochs=60, batch_size=128, validation_split=0.2, callbacks=callbacks_list)

Train on 160222 samples, validate on 40056 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60


In [6]:
model.load_weights('/content/dohai90/workspace/keras/checkpoints/text_gen_ckpt_29.h5')

In [8]:
# function to sample the next character given the model's predictions
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype(np.float64)
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [13]:
# generates text
start_index = random.randint(0, len(text) - maxlen - 1)
seed_text = text[start_index:start_index + maxlen]
print('Generating with seed:', seed_text)

for temperature in [0.2, 0.5, 1.0, 1.2]:
    print('\nTemperature:', temperature)
    generated_text = seed_text
    sys.stdout.write(generated_text)
    for i in range(400):
        # one hot encodes the generated text so far
        sampled = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(generated_text):
            sampled[0, t, char_indices[char]] = 1.
            
        preds = model.predict(sampled)[0]
        next_index = sample(preds, temperature)
        next_char = chars[next_index]
        
        generated_text += next_char
        generated_text = generated_text[1:]
        sys.stdout.write(next_char)

Generating with seed: f. this is far from being the general human opinion. it is
n

Temperature: 0.2
f. this is far from being the general human opinion. it is
not the strong the spirit and spirity of the spirit and subtle of the sign and the spirit and subtle of the similar the morality and seems to the strong conscience of the spirit to the contrary to the continuant that the sense of the spirit to the spirit and conscience of the spirit and subtle of the sentence and still be stronger to the strengthing of the sentence and subtle of the spirit and sens
Temperature: 0.5
f. this is far from being the general human opinion. it is
not the greatest of the words of the hard, it is always of the philosophers. the passion of the tendence of the more perhaps of the super--do not at the conceal is to make the contrary has been are so much as "such a sense with
the god of the world even that means, the greatest and surentary really any one wish and say, as the soul, the clous gangervation, the

  after removing the cwd from sys.path.


pher suffering only sil and weckes a
-int a all-so thought, prigiless, by life-frem which--this plato since, within semico its the fact live-croibles of life. their over life: it
must not eurority.g-outy onour conslato its hyped, at lisang for saint ntousina feet whom the spirit, enduescarderdorwly,
what learne, nor peollely mad himself ar