In [1]:
import numpy as np
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras import layers
import tensorflow as tf
import logging

tf.get_logger().setLevel(logging.ERROR)

In [2]:
EPOCHS = 32
BATCH_SIZE = 256
INPUT_FILE_NAME = 'data/frankenstein.txt'
WINDOW_LENGTH = 40  # 创建训练样本的字符数
WINDOW_STEP = 3  # 移动 step
BEAM_SIZE = 8
NUM_LETTERS = 11
MAX_LENGTH = 50

In [5]:
# open the input file
file = open("data/frankenstein.txt", 'r', encoding='utf-8')
text = file.read()
file.close()

# make lowercase and remove newline and extra spaces
text = text.lower()
text = text.replace('\n', ' ')
text = text.replace('  ', ' ')

# encode characters as indices
unique_chars = list(set(text))
char_to_index = dict((ch, index) for index, ch in enumerate(unique_chars))
index_to_char = dict((index, ch) for index, ch in enumerate(unique_chars))
encoding_width = len(char_to_index)

In [6]:
# create training examples
fragments = []
targets = []
for i in range(0, len(text) - WINDOW_LENGTH, WINDOW_STEP):
    fragments.append(text[i:i + WINDOW_LENGTH])
    targets.append(text[i + WINDOW_LENGTH])

# convert to one-hot encoded trainnig data
X = np.zeros((len(fragments), WINDOW_LENGTH, encoding_width))
y = np.zeros((len(fragments), encoding_width))
for i, fragment in enumerate(fragments):
    for j, char in enumerate(fragment):
        X[i, j, char_to_index[char]] = 1
    target_char = targets[i]
    y[i, char_to_index[target_char]] = 1

In [7]:
# build and train model
model = Sequential()
model.add(layers.LSTM(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2,
                      input_shape=(None, encoding_width)))
model.add(layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(layers.Dense(encoding_width, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, None, 128)         100352    
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense (Dense)                (None, 67)                8643      
Total params: 240,579
Trainable params: 240,579
Non-trainable params: 0
_________________________________________________________________


In [9]:
history = model.fit(X, y, validation_split=0.05, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=2, shuffle=True)

Epoch 1/32
542/542 - 466s - loss: 2.1566 - val_loss: 2.3594
Epoch 2/32
542/542 - 471s - loss: 2.0943 - val_loss: 2.3359
Epoch 3/32
542/542 - 468s - loss: 2.0470 - val_loss: 2.2963
Epoch 4/32
542/542 - 474s - loss: 2.0026 - val_loss: 2.2711
Epoch 5/32
542/542 - 485s - loss: 1.9797 - val_loss: 2.3440
Epoch 6/32
542/542 - 485s - loss: 2.0286 - val_loss: 2.5014
Epoch 7/32
542/542 - 484s - loss: 2.0758 - val_loss: 2.3263
Epoch 8/32
542/542 - 487s - loss: 2.0578 - val_loss: 2.4648
Epoch 9/32
542/542 - 485s - loss: 2.2448 - val_loss: 2.4029
Epoch 10/32


KeyboardInterrupt: 

In [None]:
# Create initial single beam represented by triplet
# (probability , string , one-hot encoded string).
letters = 'the body '
one_hots = []
for i, char in enumerate(letters):
    x = np.zeros(encoding_width)
    x[char_to_index[char]] = 1
    one_hots.append(x)
beams = [(np.log(1.0), letters, one_hots)]

# Predict NUM_LETTERS into the future.
for i in range(NUM_LETTERS):
    minibatch_list = []
    # Create minibatch from one-hot encodings, and predict.
    for triple in beams:
        minibatch_list.append(triple[2])
    minibatch = np.array(minibatch_list)
    y_predict = model.predict(minibatch, verbose=0)
    new_beams = []
    for j, softmax_vec in enumerate(y_predict):
        triple = beams[j]
        # Create BEAM_SIZE new beams from each existing beam.
        for k in range(BEAM_SIZE):
            char_index = np.argmax(softmax_vec)
            new_prob = triple[0] + np.log(softmax_vec[char_index])
            new_letters = triple[1] + index_to_char[char_index]
            x = np.zeros(encoding_width)
            x[char_index] = 1
            new_one_hots = triple[2].copy()
            new_one_hots.append(x)
            new_beams.append((new_prob, new_letters, new_one_hots))
            softmax_vec[char_index] = 0
    # Prune tree to only keep BEAM_SIZE most probable beams.
    new_beams.sort(key=lambda tup: tup[0], reverse=True)
    beams = new_beams[0:BEAM_SIZE]
for item in beams:
    print(item[1])
