In [4]:
import keras
import numpy as np

path = keras.utils.get_file(
    'gita.txt',
    origin='https://www.gutenberg.org/files/2388/2388.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Corpus length: 143514


In [5]:
# Length of extracted character sequences
maxlen = 60

# We sample a new sequence every `step` characters
step = 3

# This holds our extracted sequences
sentences = []

# This holds the targets (the follow-up characters)
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))

# List of unique characters in the corpus
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to their index in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)

# Next, one-hot encode the characters into binary arrays.
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 47818
Unique characters: 59
Vectorization...


In [0]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [0]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [0]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [9]:
import random
import sys

for epoch in range(1, 60):
    print('epoch', epoch)
    # Fit the model for 1 epoch on the available training data
    model.fit(x, y,
              batch_size=128,
              epochs=1)

    # Select a text seed at random
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)

        # We generate 400 characters
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

epoch 1
Epoch 1/1
--- Generating with seed: "ls
  which could not otherwise befall? the birth
  of living"
------ temperature: 0.2
ls
  which could not otherwise befall? the birth
  of livingiiiiiiiiii iiiiiiiiiiiiiiiiii iiiiiiiiiiiiiiiiiiiiiiiniiiiiiiiiiiiiiiiiiiiiii iiiiini  iiiiiiiiiiiiiiiii iii iiiiii iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii iiiiiiii iiiiiiiiiiiiiiiiiiniiii iiiiiiiiiiiiiiiiiiiii iiiiiiiiiiiiiiiiiiiiiiiiiiiiii  ii iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii iiiiiiiiiiiiiiiiiiiiiiiiii iiiiiiiiiiiiiiiiii 
------ temperature: 0.5
iiiiiiiiiiiii iiiiiiiiiiiiiiiiiiiiiiiiii iiiiiiiiiiiiiiiiii iiiiiiiliiiiii iiii iiiii iiioiiiiii liiiiiiiiii niii  iini ii iiinniiniiiiii nii   iiii niii  iiiiiinniiniiin iiiaii i  iiii ii,int i i  iiitoiiii iinoiiniiiiiiii iiii  iimo iiiiiwiiiiiiiihi iiii iiniiao iii n  niii iai iiitniiiiei h toi iii iii  iii liii iniii ini ri  i iiiiiiiiimiiki iii iiiii li ii ii iiiiiii ii n ii  

  This is separate from the ipykernel package so we can avoid doing imports until


                                                          h                                                                 h                                                                                                   m                                                                                                                                                      
------ temperature: 0.5
                                                            shii ah  h    s   
             h   h h    h ih          h i       r  e mi        m emh  m   i       a   ar       h hi   h  i mh  h   m a rh i     te h          h h h   imh   r      e     h    m m ri  hm hs h i   ahh     r hd   d         m dmih h
  hh      mh  rm h   r p     v      h   h sh h       a     iar   a  h      m     h   i    i    i  s    h r    m r      m t        i e    m     hmh  ,r a 
------ temperature: 1.0
  i  s    h r    m r      m t        i e    m     hmh  ,r a lie ireisd el m eeiurhhseheumog  mh m nyisrrad her,o i.