多分类 softmax

In [8]:
import tensorflow as tf
import tensorflow_datasets as tfds
import os

# 三个版本的翻译分别来自于:
#
# William Cowper — text
#
# Edward, Earl of Derby — text
#
# Samuel Butler — text

URL = 'https://storage.googleapis.com/download.tensorflow.org/data/illiad/'
FILE_NAMES = ['cowper.txt', 'derby.txt', 'butler.txt']

for name in FILE_NAMES:
    text_dir = tf.keras.utils.get_file(name, origin=URL + name)

parent_dir = os.path.dirname(text_dir)

parent_dir

'/Users/jacky/.keras/datasets'

#### 将 txt 加载到 dataset 中

In [9]:
def labeler(example, index):
    return example, tf.cast(index, tf.int64)


datasets = []

# 每一行对应一个label(作者)
for i, name in enumerate(FILE_NAMES):
    lines_dataset = tf.data.TextLineDataset(os.path.join(parent_dir, name))
    labeled_dataset = lines_dataset.map(lambda ex: labeler(ex, i))
    datasets.append(labeled_dataset)

In [10]:
BUFFER_SIZE = 50000
BATCH_SIZE = 64
TAKE_SIZE = 5000

In [11]:
# 合并 3 份文本
merged_dataset = datasets[0]

for ds in datasets:
    merged_dataset = merged_dataset.concatenate(ds)

merged_dataset = merged_dataset.shuffle(BUFFER_SIZE, reshuffle_each_iteration=False)

In [12]:
# 把文本转化成数字 (即建立词汇表)

vocabulary_set = set()
# 弃用的 API
tokenizer = tfds.deprecated.text.Tokenizer()

for line, _ in merged_dataset:
    tokens = tokenizer.tokenize(line.numpy())
    vocabulary_set.update(tokens)

encoder = tfds.deprecated.text.TokenTextEncoder(vocabulary_set)


def encode(line, label):
    encoded_text = encoder.encode(line.numpy())
    return encoded_text, label


def encode_map_fn(text, label):
    # py_func doesn't set the shape of the returned tensors.
    encoded_text, label = tf.py_function(encode,
                                         inp=[text, label],
                                         Tout=(tf.int64, tf.int64))

    # `tf.data.Datasets` work best if all components have a shape set
    #  so set the shapes manually:
    encoded_text.set_shape([None])
    label.set_shape([])

    return encoded_text, label


all_encoded_data = merged_dataset.map(encode_map_fn)

In [13]:
train_data = all_encoded_data.skip(TAKE_SIZE).shuffle(BUFFER_SIZE)
train_data = train_data.padded_batch(BATCH_SIZE)

test_data = all_encoded_data.take(TAKE_SIZE)
test_data = test_data.padded_batch(BATCH_SIZE)

vocab_size = len(vocabulary_set) + 1

In [14]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(vocab_size, 64))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)))

# 一个或多个紧密连接的层
# 编辑 `for` 行的列表去检测层的大小
for units in [64, 64]:
    model.add(tf.keras.layers.Dense(units, activation='relu'))

# 输出层。第一个参数是标签个数。
model.add(tf.keras.layers.Dense(3, activation='softmax'))

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [15]:
model.fit(train_data, epochs=3, validation_data=test_data)

Epoch 1/3


2021-08-29 12:50:50.826219: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-08-29 12:50:55.561438: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-08-29 12:50:55.573872: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-08-29 12:50:56.120610: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-08-29 12:50:56.140799: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


    997/Unknown - 55s 49ms/step - loss: 0.4602 - accuracy: 0.7985

2021-08-29 12:51:45.665553: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-08-29 12:51:46.045085: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-08-29 12:51:46.053334: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x176efc310>

In [16]:
model.evaluate(test_data)



[0.4237177073955536, 0.8380000591278076]