In [1]:
import keras
import numpy as np

# downloading the text file
path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt


In [2]:
#vectorizing sequences of data using one-hot-encoding and prepare array of corresponding targets

# Length of extracted character sequences
maxlen = 60

# We sample a new sequence every `step` characters
step = 3

# This holds our extracted sequences
sentences = []

# This holds the targets (the follow-up characters)
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))

# List of unique characters in the corpus
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to their index in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)

# Next, one-hot encode the characters into binary arrays.
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 200278
Unique characters: 57
Vectorization...


In [3]:
# defining the network, a single LSTM followed by a dense for next character prediction

from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

W0716 08:52:25.585372 140601544877824 deprecation_wrapper.py:118] From /home/joel/anaconda3/envs/tensorflow-gpu-new/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:58: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0716 08:52:25.590938 140601544877824 deprecation_wrapper.py:118] From /home/joel/anaconda3/envs/tensorflow-gpu-new/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:442: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0716 08:52:25.595276 140601544877824 deprecation_wrapper.py:118] From /home/joel/anaconda3/envs/tensorflow-gpu-new/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:3543: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0716 08:52:25.689426 140601544877824 deprecation.py:506] From /home/joel/anaconda3/envs/tensorflow-gpu-new/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:1188: call

In [4]:
# compiling the model using categorical_crossentropy (since targets are one hot encoded)

optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

W0716 08:52:28.969547 140601544877824 deprecation_wrapper.py:118] From /home/joel/anaconda3/envs/tensorflow-gpu-new/lib/python3.5/site-packages/keras/optimizers.py:711: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0716 08:52:28.976996 140601544877824 deprecation_wrapper.py:118] From /home/joel/anaconda3/envs/tensorflow-gpu-new/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:2759: The name tf.log is deprecated. Please use tf.math.log instead.



In [5]:
# defining a function to sample new text using reweighted distribution

def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [6]:
# defining the text-generation loop

import random
import sys

for epoch in range(1, 60):
    print('epoch', epoch)
    # Fit the model for 1 epoch on the available training data
    model.fit(x, y,
              batch_size=128,
              epochs=1)

    # Select a text seed at random
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)

        # We generate 400 characters
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

W0716 08:52:33.832783 140601544877824 deprecation.py:323] From /home/joel/anaconda3/envs/tensorflow-gpu-new/lib/python3.5/site-packages/tensorflow_core/python/ops/math_grad.py:1251: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


epoch 1


W0716 08:52:34.340679 140601544877824 deprecation.py:506] From /home/joel/anaconda3/envs/tensorflow-gpu-new/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:625: calling Constant.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0716 08:52:34.353957 140601544877824 deprecation_wrapper.py:118] From /home/joel/anaconda3/envs/tensorflow-gpu-new/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:899: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



Epoch 1/1
--- Generating with seed: "gloomy aspect, like a cloud over-charged with interrogative "
------ temperature: 0.2
gloomy aspect, like a cloud over-charged with interrogative is the dees of the soul the senses and some to be be more to the self-conded to be the has a senses and a some the sense of the cares and some the self-and the same of the allowity of the self-conded and the senses and destent of the senses and destroness of the self--the self--the senses and always of the senses and some the same of the to the self--and the belief the self--the senses and a some 
------ temperature: 0.5
to the self--and the belief the self--the senses and a some the soul the defered and so pression of say, for the self-conceution
of has every be not be freed allophy, is for the spirit in the fact of the instincted and hand read the action of the soul and to be the sensess of the worded and something the condution of the senses: the formed and agore of the soul of the intrusts of his does 

an in these most the sar that
agreeable to blensin-
is persapp into the hglatt actives of terns, it
oldersounding doithings of circrefundances desiany, are believe i hit our conccivine the -treed, because to german not their "moder momerant cimulavagry--and
moral conjection
vatues fri, men-id
epoch 5
Epoch 1/1
--- Generating with seed: "ith in the most cultivated
circles, so where nature is conce"
------ temperature: 0.2
ith in the most cultivated
circles, so where nature is concealle the same all the same all the same the same and and the present the same the same the same and and and and and in the experience of the same all the same and interpretation of the same man and concealed the same and and in the same the condition of the same all the same the present, and the same the explession of the same all the same and and the condition of the same the same and all the sam
------ temperature: 0.5
e and and the condition of the same the same and all the same agreeable concealed the conc

  """


tion strict to stroke and foriely--we canter feeling upon intemptrens to certaincy had differe at this nour emen to it seet with should it be demard.i only a precisely,
------ temperature: 1.2
emen to it seet with should it be demard.i only a precisely, cquelulad jeason tacwing, dung oy hinsil wact hap to quarated, has vigew--that ahon his very findiam , weidnough of many makesit (wanton; from this class ot, between knotter to good he modent for to no non, he besuevery ondorate in men who sec almost will colousleten, in on the suftent opposited
life begon, invalual and othatics and over this wholenest, an crince spirit, a  say as he
puce, humant
epoch 7
Epoch 1/1
--- Generating with seed: "oking from above, grows
up within us gradually and in the sa"
------ temperature: 0.2
oking from above, grows
up within us gradually and in the same and the same of the suffering of the strong to the same to the conditions and the same and the same and the same and the same who have has a threaths of

W0716 09:04:59.523061 140601544877824 deprecation_wrapper.py:118] From /home/joel/anaconda3/envs/tensorflow-gpu-new/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/api/_v1/estimator/__init__.py:10: The name tf.estimator.inputs is deprecated. Please use tf.compat.v1.estimator.inputs instead.



KeyboardInterrupt: 