<a href="https://colab.research.google.com/github/YangxuanWu/Python/blob/master/Python/YangxuanWu_generation_with_lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import keras
keras.__version__

Using TensorFlow backend.


'2.3.1'

In [None]:
#如何使用 使用LSTM生成文本

In [2]:
#使用LSTM生成文本
import keras
import numpy as np

path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length: 600893


In [3]:
# Length of extracted character sequences
maxlen = 60

# We sample a new sequence every `step` characters
step = 3

# This holds our extracted sequences
sentences = []

# This holds the targets (the follow-up characters)
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))

# List of unique characters in the corpus
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to their index in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)

# Next, one-hot encode the characters into binary arrays.
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 200278
Unique characters: 57
Vectorization...


In [6]:
#我们的网络是一个单一的LSTM层，然后是一个密集的分类器和所有可能的字符softmax。
#但是让我们注意到递归神经网络不是序列数据生成的唯一方法;一维convnets在这方面也被证明非常成功。
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [7]:
#目标是单热编码，我们将使用categorical_crossentropy作为损失来训练模型:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [8]:
#训练语言模型并从中取样
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
import random
import sys

for epoch in range(1, 60):
    print('epoch', epoch)
    # Fit the model for 1 epoch on the available training data
    model.fit(x, y,
              batch_size=128,
              epochs=1)

    # Select a text seed at random
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)

        # We generate 400 characters
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

epoch 1
Epoch 1/1
--- Generating with seed: " there have
also been human herds (family alliances, communi"
------ temperature: 0.2
 there have
also been human herds (family alliances, community of the sentiment of the sentiment of the resting of the sentity of the german and the sentiment of the man a something and that the sentiment to the sentiment of the restand and the served that the german and the sentiment of the sentity of the resting to against that the sentiment and which is the sention of the sense of the sentive and the fain of the man a man and a something of the sentical
------ temperature: 0.5
nd the fain of the man a man and a something of the sentical sentures of the germans of the the the even of than in the sers, by the
exister enterty of man increasing of the restination of the confection, and the can persative on the raice on the dearand are as a wolld and sone a sones is all always this morality of the server,
are one of the fact of the relation to ressing and her

In [None]:
#低温会导致极其重复和可预测的文本，但是局部结构是非常真实的:特别是，所有单词(一个单词是字符的局部模式)都是真正的英语单词；
#温度越高，生成的文本就越有趣、令人惊讶，甚至更有创意;
#这里的高低温还需要时间理解一下。