In [1]:
import tensorflow.keras
import numpy as np

In [2]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
from tensorflow.compat.v1.keras.backend import set_session
from tensorflow.compat.v1 import ConfigProto, Session
import tensorflow.keras
config = ConfigProto(allow_soft_placement=True)

config.gpu_options.per_process_gpu_memory_fraction = 0.6
set_session(Session(config=config))



## 下载数据并导入

In [3]:
path = tensorflow.keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt'
)
text = open(path).read().lower()
print('Corpus length:', len(text))

Corpus length: 600893


## 数据one-hot编码化

In [4]:
maxlen = 60     #单个序列字符长度
steps = 3       #每个序列相差的字符长度
sentences = []  #用来存放序列的list
next_chars = [] #监督数据
for i in range(0, len(text)-maxlen, steps):
    sentences.append(text[i: i+maxlen])
    next_chars.append(text[i+maxlen])

print('Numbers of sentences:', len(sentences))

chars = sorted(list(set(text)))    #制作为重复的chars元组
print('Unique characters:', len(chars))

char_indices = dict((char, chars.index(char)) for char in chars)    #将chars制作成映射字典 即为每一个字符提供一个int值
print('Vectorization......')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)   #制作存放训练数据的数组， (samples, maxlen, len(chars)) 条数 最大长度 编码长度 ----one-hot编码下
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)           #制作监督数据
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Numbers of sentences: 200278
Unique characters: 57
Vectorization......


## 搭建LSTM网络
### 其中， 网络由一个LSTM层和一个Dense全连接层，采用softmax激活

In [5]:
input_tensor = tensorflow.keras.Input(shape=(60, 57, ))
output_tensor = tensorflow.keras.layers.LSTM(128, return_sequences=False)(input_tensor)
output_vector = tensorflow.keras.layers.Dense(57, activation='softmax')(output_tensor)
model = tensorflow.keras.models.Model(input_tensor, output_vector)
model.compile(
    loss='categorical_crossentropy',
    optimizer=tensorflow.keras.optimizers.RMSprop(lr=1e-2),
    metrics=['acc']
)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 60, 57)]          0         
_________________________________________________________________
lstm (LSTM)                  (None, 128)               95232     
_________________________________________________________________
dense (Dense)                (None, 57)                7353      
Total params: 102,585
Trainable params: 102,585
Non-trainable params: 0
_________________________________________________________________


## 创建一个采样函数

In [6]:
def sample_with_temperature(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    preds = np.exp(preds)
    preds = preds / np.sum(preds)

    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

## 循环生成文本

In [7]:
import random
import sys

for epoch in range(1, 60):
    print('epoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = random.randint(0, len(text) - maxlen - 1)             #随机选取一个int值
    generated_text = text[start_index: start_index + maxlen]            #获得生成种子文本
    if epoch > 40:                                  
        print('\n--- Generating with seed:"' + generated_text + '"')
        for temperature in [0.2, 0.5, 1.0, 1.2]:
            print('\n--- Temperature:', temperature)
            sys.stdout.write(generated_text)

            for i in range(400):
                sampled = np.zeros((1, maxlen, len(chars)))             #生成序列
                for t, char in enumerate(generated_text):
                    sampled[0, t, char_indices[char]] = 1.              #生成输入数据

                preds = model.predict(sampled, verbose=0)[0]            #预测下一个字符 
                next_index = sample_with_temperature(preds, temperature)    #下一个字符的索引
                next_char = chars[next_index]                           #从字典中获得下一个字符

                generated_text += next_char                             #将下一个字符加入生成文本中重新导入到网络中继续生成 
                generated_text = generated_text[1:]

                sys.stdout.write(next_char)

ks himself which is
--- Temperature: 1.0
extent the german souls and before he seeks himself which is are sopeative least of things the significaning (ruddered a dustificity dective.

.=pane act upon that and that perhaps
better the boldly is believs. here all reproach. the philosophers of a fellowsan, with the facts
to child old man meanseso
stull cillour of the act to the first exception, a veheness in goal, he is terribul in
nomaling there a stephaconess
strange
profuct an irigenouse",
edove a
--- Temperature: 1.2
there a stephaconess
strange
profuct an irigenouse",
edove any
from
all cy thrustilves
even of same eached thes will does throws agfoe than feptal or in
itures,
turn in error of the obwitovily blunds to dutiestly following.


1e=ven closses of their most more emxtior
and the excession of nort exulps agaeti whom
willfuntinge" that this any philosopher, we mourness on in view itself in the momenta. the life, and
of drema
rejudied time, much is ; it--and thesepoch 51

--- Gen