#### regularization - 多分类问题

In [None]:
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import regularizers

import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots

from IPython import display

import matplotlib.pyplot as plt
import numpy as np
import pathlib
import shutil
import tempfile

In [None]:
logdir = pathlib.Path(tempfile.mkdtemp()) / "tensorboard_logs"
shutil.rmtree(logdir, ignore_errors=True)

不用在意数据集，重点在处理过拟合和欠拟合上。

The Higgs Dataset.

It contains 11 000 000 examples, each with 28 features, and a binary class label.

In [None]:
# The Higgs Dataset.
# It contains 11 000 000 examples, each with 28 features, and a binary class label.

FEATURES = 28
ds = tf.data.experimental.CsvDataset("/Users/jacky/Downloads/HIGGS.csv", [float(), ] * (FEATURES + 1))

In [None]:
def pack_row(*row):
    label = row[0]
    features = tf.stack(row[1:], 1)
    return features, label


In [None]:
# todo 这里的 batch 是 10000 条数据吗
packed_ds = ds.batch(10000).map(pack_row).unbatch()

In [None]:
for features, label in packed_ds.batch(1000).take(1):
    print(features[0])
    plt.hist(features.numpy().flatten(), bins=101)

In [None]:
N_VALIDATION = int(1e3)
N_TRAIN = int(1e4)
BUFFER_SIZE = int(1e4)
BATCH_SIZE = 500
# 10000/500 = 20
STEPS_PER_EPOCH = N_TRAIN // BATCH_SIZE

In [None]:
validate_ds = packed_ds.take(N_VALIDATION).cache()
train_ds = packed_ds.skip(N_VALIDATION).take(N_TRAIN).cache()

In [None]:
validate_ds = validate_ds.batch(BATCH_SIZE)
train_ds = train_ds.shuffle(BUFFER_SIZE).repeat().batch(BATCH_SIZE)

如果在训练过程中逐渐降低学习率，许多模型的训练效果会更好。用于optimizers.schedules随时间降低学习率。

设置一个schedules.InverseTimeDecay以双曲线将学习率降低到 1000 epoch 时基本速率的 1/2，2000 epoch 时降低到 1/3，依此类推。

In [None]:
# model param: optimizer (with scheduled learning rate)
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
    initial_learning_rate=0.001,
    decay_steps=STEPS_PER_EPOCH * 1000,
    # lr *= 1/2, 1/3, 1/4, ...per decay_steps
    decay_rate=1
)


def get_optimizer():
    return tf.keras.optimizers.Adam(lr_schedule)


# plot learning rate
step = np.linspace(0, 100000)
lr = lr_schedule(step)

plt.figure(figsize=(8, 6))
plt.plot(step / STEPS_PER_EPOCH, lr)
plt.ylim([0, max(plt.ylim())])
plt.xlabel('Epoch')
plt.ylabel('Learning Rate')
plt.show()


In [None]:
# model param: callbacks
def get_callbacks(name):
    """
    :param name: tensorboard log file name
    """
    return [
        tfdocs.modeling.EpochDots(),
        tf.keras.callbacks.EarlyStopping(monitor='val_binary_crossentropy', patience=200),
        tf.keras.callbacks.TensorBoard(logdir / name)
    ]

In [None]:
# compile and fit
def compile_and_fit(model, name, optimizer=None, max_epochs=10000):
    if optimizer is None:
        optimizer = get_optimizer()
    model.compile(optimizer=optimizer,
                  # todo from_logics=True 说明不需要输出单元了
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=[
                      tf.keras.losses.BinaryCrossentropy(from_logits=True, name='binary_crossentropy'),
                      'accuracy']
                  )
    model.summary()
    history = model.fit(train_ds,
                        steps_per_epoch=STEPS_PER_EPOCH,
                        epochs=max_epochs,
                        validation_data=validate_ds,
                        callbacks=get_callbacks(name),
                        verbose=0)
    return history


In [None]:
# tiny model

tiny_model = tf.keras.Sequential([
    # todo 有这个激活函数吗
    layers.Dense(16, activation='elu', input_shape=(FEATURES,)),
    layers.Dense(1)
])

In [None]:
size_histories = {}

In [None]:
size_histories['Tiny'] = compile_and_fit(tiny_model, 'sizes/Tiny')

In [None]:
plotter = tfdocs.plots.HistoryPlotter(metric='binary_crossentropy', smoothing_std=10)
plotter.plot(size_histories)
plt.ylim([0.5, 0.7])

此处省略了很多模型(由小型到大型)...

下面是一个使用L2正则化的大型模型。


#### L2 model

In [None]:
regularizer_histories = {}
regularizer_histories['Tiny'] = size_histories['Tiny']

In [None]:
l2_model = tf.keras.Sequential([
    layers.Dense(512, activation='elu',
                 kernel_regularizer=regularizers.l2(0.001),
                 input_shape=(FEATURES,)),
    layers.Dense(512, activation='elu',
                 kernel_regularizer=regularizers.l2(0.001)),
    layers.Dense(512, activation='elu',
                 kernel_regularizer=regularizers.l2(0.001)),
    layers.Dense(512, activation='elu',
                 kernel_regularizer=regularizers.l2(0.001)),
    layers.Dense(1)
])

regularizer_histories['l2'] = compile_and_fit(l2_model, "regularizers/l2")

#### Dropout model



In [None]:
dropout_model = tf.keras.Sequential([
    layers.Dense(512, activation='elu', input_shape=(FEATURES,)),
    layers.Dropout(0.5),
    layers.Dense(512, activation='elu'),
    layers.Dropout(0.5),
    layers.Dense(512, activation='elu'),
    layers.Dropout(0.5),
    layers.Dense(512, activation='elu'),
    layers.Dropout(0.5),
    layers.Dense(1)
])

regularizer_histories['dropout'] = compile_and_fit(dropout_model, "regularizers/dropout")