# Setup

In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    pass

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

# Common imports
import numpy as np
import os
import time

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "ann"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

# Keras 介绍


> keras与tf.keras的区别：keras多后端，tf.keras单后端（只能是TensorFlow）下面的教程中使用的是tf.keras, 因此它可以使用处了keras以外的其他功能

![image-20210310093333481](images/image-20210310093333481.png)

导入方法：
```python
from tensorflow import keras
```

keras 在使用过程中，主要考虑以下几点：

1. 模型创建方法
2. 模型细节查看
3. 损失函数，优化器定义，模型编译
4. 训练，评估及预测
5. 保存和还原模型
6. 使用回调函数
7. Tensorbload可视化
8. 参数调整


# 模型创建方法









* 数据准备

1. fetch_california_housing：加载加州住房数据集， 该数据集仅包含数字特征，没有缺失值
2. train_test_split: 分割数据集为训练集，验证集，测试集
3. StandardScaler：标准化数据集，x-μ/σ



In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

np.random.seed(42)
tf.random.set_seed(42)

housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
X_new = X_test[:3]

## Sequential API

最简单的keras模型，仅由顺序连接的单层堆栈组成，又称为顺序API

In [5]:
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu", input_shape=X_train.shape[1:]),
    keras.layers.Dense(1)
])

In [6]:
model.compile(loss="mean_squared_error",
              optimizer=keras.optimizers.SGD(lr=1e-3))

history = model.fit(X_train, y_train,
                    epochs=20,
                    validation_data=(X_valid, y_valid))

mse_test = model.evaluate(X_test, y_test)

y_pred = model.predict(X_new)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Functional API



### 单输入单输出

![image-20210314172807709](images/image-20210314172807709.png)

In [8]:
keras.backend.clear_session()

input_ = keras.layers.Input(shape=X_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation="relu")(input_)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_, hidden2])
output = keras.layers.Dense(1)(concat)
model = keras.models.Model(inputs=[input_], outputs=[output])

model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 8)]          0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 30)           270         input_1[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 30)           930         dense[0][0]                      
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 38)           0           input_1[0][0]                    
                                                                 dense_1[0][0]                

In [9]:
model.compile(loss="mean_squared_error",
              optimizer=keras.optimizers.SGD(lr=1e-3))

history = model.fit(X_train, y_train,
                    epochs=20,
                    validation_data=(X_valid, y_valid))

mse_test = model.evaluate(X_test, y_test)

y_pred = model.predict(X_new)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### 多输入单输出

假设通过宽路径送入5个特征，深路径送入6个特征

![image-20210314172915275](images/image-20210314172915275.png)

In [14]:
X_train_A.shape

(11610, 5)

In [15]:
# 为input_A, input_B拆分数据，此时因为是多输入单输出，所以输入数据需要调整，如下
X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]
X_valid_A, X_valid_B = X_valid[:, : 5], X_valid[:, 2: ]
X_test_A, X_test_B = X_test[:, : 5], X_test[:, 2: ]
X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]

In [16]:
keras.backend.clear_session()

input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1, name="output")(concat)
model = keras.models.Model(inputs=[input_A, input_B], outputs=[output])

model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
deep_input (InputLayer)         [(None, 6)]          0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 30)           210         deep_input[0][0]                 
__________________________________________________________________________________________________
wide_input (InputLayer)         [(None, 5)]          0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 30)           930         dense[0][0]                      
______________________________________________________________________________________________

In [17]:
model.compile(loss="mse",
              optimizer=keras.optimizers.SGD(lr=1e-3))

history = model.fit((X_train_A, X_train_B), y_train,
                    epochs=20,
                    validation_data=((X_valid_A, X_valid_B), y_valid))

mse_test = model.evaluate((X_test_A, X_test_B), y_test)

y_pred = model.predict((X_new_A, X_new_B))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### 多输入多输出

需要多个输出的场景：
1. 基于同一数据的多个独立任务，当然可以为每个任务训练一个神经网络，但是在许多情况下，通过训练每个任务一个输出的单个神经网络效果更好
2. 训练约束：例如在神经网络结构中添加一些辅助输出，以确保网络的主要部分自己能学习到有用的东西，而不依赖网络的其余部分
3. 在图片中定位和分类，其中定位是一个回归问题例如长宽，同时分类是一个分类任务

![image-20210314173755752](images/image-20210314173755752.png)

In [18]:
X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]
X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]
X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]
X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]

In [19]:
keras.backend.clear_session()

input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1, name="main_output")(concat)
aux_output = keras.layers.Dense(1, name="aux_output")(hidden2)
model = keras.models.Model(inputs=[input_A, input_B],
                           outputs=[output, aux_output])

In [20]:
# 每个输出都需要自己的损失，如果传递单个损失，会将所有输出使用同一个损失，这样不好，
# 同时如果我主要关注main-output的损失，可以为不同的损失添加权重，即loss_weights
model.compile(loss=["mse", "mse"], loss_weights=[0.9, 0.1],
              optimizer=keras.optimizers.SGD(lr=1e-3))

history = model.fit([X_train_A, X_train_B], [y_train, y_train],
                    epochs=20,
                    validation_data=([X_valid_A, X_valid_B], [y_valid, y_valid]))

# 评估模型时，keras将返回总损失以及所有单个损失
total_loss, main_loss, aux_loss = model.evaluate(
    [X_test_A, X_test_B], [y_test, y_test])

y_pred_main, y_pred_aux = model.predict([X_new_A, X_new_B])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## SubClass API

一些模型涉及循环，变化的形状，条件分支，和其他动态行为，子类API会很实用

```python

class SomeModel(keras.Model):
    def __init__(self, **kwargs):
        # 构造器
        # ...
        
    def call(self, inputs):
        # some code
        
```

* 构造器中创建所需的层，相当于各个层节点
* call函数中执行每个层上的计算，相当于层与层之间的连接关系
        


In [21]:
X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]
X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]
X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]
X_new_A, X_new_B = X_test_A[: 3], X_test_B[: 3]

In [22]:
keras.backend.clear_session()

class WideAndDeepModel(keras.models.Model):
    def __init__(self, units=30, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.hidden1 = keras.layers.Dense(units, activation=activation)
        self.hidden2 = keras.layers.Dense(units, activation=activation)
        self.main_output = keras.layers.Dense(1)
        self.aux_output = keras.layers.Dense(1)

    def call(self, inputs):
        input_A, input_B = inputs
        hidden1 = self.hidden1(input_B)
        hidden2 = self.hidden2(hidden1)
        concat = keras.layers.concatenate([input_A, hidden2])
        main_output = self.main_output(concat)
        aux_output = self.aux_output(hidden2)
        return main_output, aux_output


model = WideAndDeepModel(30, activation="relu")

In [23]:
model.compile(loss="mse", loss_weights=[0.9, 0.1],
              optimizer=keras.optimizers.SGD(lr=1e-3))

history = model.fit((X_train_A, X_train_B), (y_train, y_train),
                    epochs=10,
                    validation_data=((X_valid_A, X_valid_B), (y_valid, y_valid)))

total_loss, main_loss, aux_loss = model.evaluate(
    (X_test_A, X_test_B), (y_test, y_test))

y_pred_main, y_pred_aux = model.predict((X_new_A, X_new_B))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# 模型细节查看

## 网络层

- `model.layers`会返回层列表
- `model.get_layer(...)`通过名称获取层


## 网络权重

- `layer_name.get_weights`访问给定层的权重（包含weights和bias）
- `layer_name.set_weights`设置给定层的权重（包含weights和bias）

注意：Dense层已经默认设置了随机化权重，如果需要使用其他初始化方法，创建层时使用`kernal_initializer`


## 查看参数

- `model.summay()`会显示模型所有层，包括每个层的名称，输出形状，参数数量

# 损失函数，优化器定义，模型编译

## 损失函数定义

- 损失函数的定义通过`model.compile(loss=...)`来进行定义
- 有两种方式给定：
    - 一种是给定缩写名称，例如mean squared error可以是`loss='mean_squared_error'`
    - 另一种是给定keras的引用路径，`loss=keras.losses.MeanSquaredError`

损失函数的完整列表参照文档：[keras.losses](https://keras.io/api/losses/)


## 优化器定义
- 优化器的定义通过`model.compile(optimizer=...)`来进行定义
- 有两种方式给定：
    - 一种是给定缩写名称，例如sgd可以是`optimizer='sgd'`
    - 另一种是给定keras的引用路径，`optimizer=keras.optimizers.SGD()`,如果是这种方式可以通过加入参数`lr=...`来调整学习率
- 

优化器的完整列表参照文档：[keras.optimizers](https://keras.io/api/optimizers/)


## 评价指标定义
- 评价指标定义通过`model.compile(metrics=[...])`来进行定义
- 有两种方式给定：
    - 一种是给定缩写名称，例如sgd可以是`metrics=['accuracy']`
    - 另一种是给定keras的引用路径，`metrics=[keras.metrics.Accuracy]`

评价指标的完整列表参照文档：[keras.metrics](https://keras.io/api/metrics/)



## 模型编译

- `model.compile(loss=..., optimizer=..., metrics=[...], etc)`

# 训练，评估及预测

## 训练

- `model.fit(training data, training label, epoch=..., validation_data=(valid data, valid label))`
- 如果使用`validation_split=0.x`,则不需要使用`validation_data`
- 如果label不平衡，可以增加`class_weight`参数为不同的类别增加权重
- 调用`model.fit`后，会返回一个`history`对象，里面的`history.history`可以用于绘制epoch-loss学习曲线, 例如`pd.DataFrame(history.history).plot(figsize=(8,5))`


## 评估

- `model.evaluate(test data, test label)`


## 预测

- `model.predict(new data)` or `model.predict_classes(new data)`

# 保存和还原模型






In [24]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

np.random.seed(42)
tf.random.set_seed(42)

housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
X_new = X_test[:3]

keras.backend.clear_session()

model = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu", input_shape=[8]),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(1)
])

model.compile(loss="mse",
              optimizer=keras.optimizers.SGD(lr=1e-3))

history = model.fit(X_train, y_train,
                    epochs=10,
                    validation_data=(X_valid, y_valid))

mse_test = model.evaluate(X_test, y_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## 保存模型

- keras使用HDF5保存模型结构（仅限于顺序API和函数式API）
    - 包括每一层的超参数
    - 每一层的连接权重和偏置
    - 优化器等（以及各自的超参数或可能的状态）

In [26]:
model.save("models/my_keras_model.h5")

- 子类API至少可以使用`save_weights()`保存模型参数

In [27]:
model.save_weights("models/my_keras_weights.ckpt")

## 还原模型

In [30]:
model = keras.models.load_model("models/my_keras_model.h5")

model.predict(X_new)



array([[0.5398443],
       [1.6505727],
       [3.0097804]], dtype=float32)

In [36]:
model.load_weights("models/my_keras_weights.ckpt")

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2050eb7d070>

# 回调函数

- fit()方法使用callbacks的方式，`fit(..., callbacks=[...])`
- 目的是为了在训练过程中执行某些操作，例如定期模型保存，提前停止，tensorboard等
- 定义方式是`keras.callbacks.xxx`


In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

np.random.seed(42)
tf.random.set_seed(42)

housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
X_new = X_test[:3]



model = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu", input_shape=[8]),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(1)
])

## ModelCheckpoint - 定期保存模型

- 训练期间，`ModelCheckpoint`回调会定期保存检查点
- 设置`save_best_only=True`时，只有在验证集上模型性能达到目前最好时，才会保存模型，这样就不用担心训练过长而过拟合训练集，只需还原最后一个模型，即为验证集中最佳模型
- 相当于实现了提前停止



In [3]:
keras.backend.clear_session()

model.compile(loss="mse",
              optimizer=keras.optimizers.SGD(lr=1e-3))

checkpoint_cb = keras.callbacks.ModelCheckpoint("models/my_keras_model.h5", 
                                                save_best_only=True)

history = model.fit(X_train, y_train, 
                    epochs=10,
                    validation_data=(X_valid, y_valid),
                    callbacks=[checkpoint_cb])

# rollback to best model
model = keras.models.load_model("models/my_keras_model.h5")
mse_test = model.evaluate(X_test, y_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## EarlyStopping - 提前停止

- 另一种实现提前停止的方法是使用`EarlyStopping`回调
- 该回调的实现提前停止的方式是如果在多个轮次（通过`patience`定义）的验证集上没有任何进展，它将中断训练，并且回滚到最佳模型
- 使用方式：通常会和`ModelCheckpoint`回调一起使用，既可以保存最优模型，又可以防止浪费计算资源




In [5]:
keras.backend.clear_session()

model.compile(loss="mse", 
              optimizer=keras.optimizers.SGD(lr=1e-3))

early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  restore_best_weights=True)

history = model.fit(X_train, y_train, 
                    epochs=100,
                    validation_data=(X_valid, y_valid),
                    callbacks=[checkpoint_cb, early_stopping_cb])

mse_test = model.evaluate(X_test, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100


## 自定义回调函数

- 自定义回调通过继承`keras.callbacks.Callback`实现

In [19]:
keras.backend.clear_session()


class CustomCallback(keras.callbacks.Callback):
    def on_train_begin(self, logs=None):
        print("Starting training")

    def on_train_end(self, logs=None):
        print("Stop training")

    def on_epoch_begin(self, epoch, logs=None):
        print("Start epoch {} of training".format(epoch))

    def on_epoch_end(self, epoch, logs=None):
        print("End epoch {} of training; loss: {}".format(epoch, logs['loss']))
    
    def on_train_batch_begin(self, batch, logs=None):
        print("...Training: start of batch {}".format(batch))

    def on_train_batch_end(self, batch, logs=None):
        print("...Training: end of batch {}; loss: {}".format(batch, logs['loss']))

    def on_test_begin(self, logs=None):
        print("Start testing")

    def on_test_end(self, logs=None):
        print("Stop testing")

    def on_test_batch_begin(self, batch, logs=None):
        print("...Evaluating: start of batch {}".format(batch))

    def on_test_batch_end(self, batch, logs=None):
        print("...Evaluating: end of batch {}; loss: {}".format(batch, logs['loss']))
        
    def on_predict_begin(self, logs=None):
        print("Start predicting")

    def on_predict_end(self, logs=None):
        keys = list(logs.keys())
        print("Stop predicting")

    def on_predict_batch_begin(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Predicting: start of batch {}".format(batch))

    def on_predict_batch_end(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Predicting: end of batch {}; output: {}".format(batch, logs['outputs']))
        
custom_cb = CustomCallback()

history = model.fit(X_train, y_train, 
                    epochs=1,
                    validation_data=(X_valid, y_valid),
                    callbacks=[custom_cb])
model.predict(X_new, callbacks=[custom_cb])

Starting training
Start epoch 0 of training
...Training: start of batch 0
  1/363 [..............................] - ETA: 1s - loss: 0.5215...Training: end of batch 0; loss: 0.5215221643447876
...Training: start of batch 1
...Training: end of batch 1; loss: 0.38260430097579956
...Training: start of batch 2
...Training: end of batch 2; loss: 0.41070976853370667
...Training: start of batch 3
...Training: end of batch 3; loss: 0.39290082454681396
...Training: start of batch 4
...Training: end of batch 4; loss: 0.39595308899879456
...Training: start of batch 5
...Training: end of batch 5; loss: 0.404376357793808
...Training: start of batch 6
...Training: end of batch 6; loss: 0.38314396142959595
...Training: start of batch 7
...Training: end of batch 7; loss: 0.36870065331459045
...Training: start of batch 8
...Training: end of batch 8; loss: 0.3704698383808136
...Training: start of batch 9
...Training: end of batch 9; loss: 0.374715656042099
...Training: start of batch 10
...Training: end

...Training: end of batch 98; loss: 0.33025291562080383
...Training: start of batch 99
...Training: end of batch 99; loss: 0.33085164427757263
...Training: start of batch 100
...Training: end of batch 100; loss: 0.3292800486087799
...Training: start of batch 101
...Training: end of batch 101; loss: 0.3296352028846741
...Training: start of batch 102
...Training: end of batch 102; loss: 0.3301103413105011
...Training: start of batch 103
...Training: end of batch 103; loss: 0.3301202058792114
...Training: start of batch 104
...Training: end of batch 104; loss: 0.3298487663269043
...Training: start of batch 105
...Training: end of batch 105; loss: 0.3308788239955902
...Training: start of batch 106
...Training: end of batch 106; loss: 0.329477459192276
...Training: start of batch 107
...Training: end of batch 107; loss: 0.32812774181365967
...Training: start of batch 108
...Training: end of batch 108; loss: 0.32802730798721313
...Training: start of batch 109
...Training: end of batch 109; l

...Training: start of batch 200
...Training: end of batch 200; loss: 0.3336067795753479
...Training: start of batch 201
...Training: end of batch 201; loss: 0.33309072256088257
...Training: start of batch 202
...Training: end of batch 202; loss: 0.3329603970050812
...Training: start of batch 203
...Training: end of batch 203; loss: 0.33207282423973083
...Training: start of batch 204
...Training: end of batch 204; loss: 0.331658273935318
...Training: start of batch 205
...Training: end of batch 205; loss: 0.3310810327529907
...Training: start of batch 206
...Training: end of batch 206; loss: 0.3315809369087219
...Training: start of batch 207
...Training: end of batch 207; loss: 0.3312411904335022
...Training: start of batch 208
...Training: end of batch 208; loss: 0.3308991491794586
...Training: start of batch 209
...Training: end of batch 209; loss: 0.3312266170978546
...Training: start of batch 210
...Training: end of batch 210; loss: 0.3312881290912628
...Training: start of batch 211

...Training: end of batch 291; loss: 0.33016228675842285
...Training: start of batch 292
...Training: end of batch 292; loss: 0.329924076795578
...Training: start of batch 293
...Training: end of batch 293; loss: 0.3297005593776703
...Training: start of batch 294
...Training: end of batch 294; loss: 0.3293306231498718
...Training: start of batch 295
...Training: end of batch 295; loss: 0.3289760649204254
...Training: start of batch 296
...Training: end of batch 296; loss: 0.32887816429138184
...Training: start of batch 297
...Training: end of batch 297; loss: 0.3283041715621948
...Training: start of batch 298
...Training: end of batch 298; loss: 0.3279137909412384
...Training: start of batch 299
...Training: end of batch 299; loss: 0.32732394337654114
...Training: start of batch 300
...Training: end of batch 300; loss: 0.32727906107902527
...Training: start of batch 301
...Training: end of batch 301; loss: 0.32672950625419617
...Training: start of batch 302
...Training: end of batch 30

...Evaluating: end of batch 20; loss: 0.32957571744918823
...Evaluating: start of batch 21
...Evaluating: end of batch 21; loss: 0.32394033670425415
...Evaluating: start of batch 22
...Evaluating: end of batch 22; loss: 0.32553213834762573
...Evaluating: start of batch 23
...Evaluating: end of batch 23; loss: 0.3289577066898346
...Evaluating: start of batch 24
...Evaluating: end of batch 24; loss: 0.33220797777175903
...Evaluating: start of batch 25
...Evaluating: end of batch 25; loss: 0.3358355760574341
...Evaluating: start of batch 26
...Evaluating: end of batch 26; loss: 0.3319954574108124
...Evaluating: start of batch 27
...Evaluating: end of batch 27; loss: 0.32664206624031067
...Evaluating: start of batch 28
...Evaluating: end of batch 28; loss: 0.3233335614204407
...Evaluating: start of batch 29
...Evaluating: end of batch 29; loss: 0.32625457644462585
...Evaluating: start of batch 30
...Evaluating: end of batch 30; loss: 0.32139989733695984
...Evaluating: start of batch 31
...

End epoch 0 of training; loss: 0.3269706964492798
Stop training
Start predicting
...Predicting: start of batch 0
...Predicting: end of batch 0; output: [[0.6734809]
 [1.7383174]
 [4.2687154]]
Stop predicting


array([[0.6734809],
       [1.7383174],
       [4.2687154]], dtype=float32)

# TensorBoard可视化

用途：
1. 在训练期间查看学习曲线
2. 比较多次运行的学习曲线
3. 可视化计算图
4. 分析训练统计数据
5. 查看由模型生成的图像
6. 高维数据投影到3D空间
7. 聚类并可视化


使用方式分为三步：

1. 定义tensorboard的日志目录以及可以生成当前日期和时间的子目录函数
2. 定义一个`TensorBoard()`回调
3. 开始训练，然后启动tensorboard服务

每一次运行model.fit,都会产生一个目录，目录结构如下所示（显示了两次运行结果）：

- 一个是用于训练日志的子目录
- 一个是用于验证日志的子目录

![image-20210315075556996](images/image-20210315075556996.png)

In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

np.random.seed(42)
tf.random.set_seed(42)

housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
X_new = X_test[:3]

keras.backend.clear_session()

model = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu", input_shape=[8]),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(1)
])

model.compile(loss="mse", 
              optimizer=keras.optimizers.SGD(lr=1e-3))

## 定义日志目录

In [3]:
def get_run_logdir(logpath="my_logs"):
    root_logdir = os.path.join(os.curdir, logpath)
    
    try:
        os.mkdir(root_logdir)
    except OSError:
        print ("Creation of the directory %s failed" % root_logdir)
    else:
        print ("Successfully created the directory %s " % root_logdir)

    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

run_logdir = get_run_logdir()
run_logdir

Creation of the directory .\my_logs failed


'.\\my_logs\\run_2021_03_15-08_19_31'

## 定义TensorBoard回调


In [4]:
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
checkpoint_cb = keras.callbacks.ModelCheckpoint('models/my_keras_model.h5', save_best_only=True)
earlystop_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=100,
                    validation_data=(X_valid, y_valid),
                    callbacks=[checkpoint_cb, earlystop_cb, tensorboard_cb])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100


## 启动TensorBoard服务



### 终端启动


```bash
$ tensorboard --logdir=./my_logs --port=6006
```



### jupyter 中启动

In [7]:
%load_ext tensorboard
%tensorboard --logdir=./my_logs --port=6006

ERROR: Timed out waiting for TensorBoard to start. It may still be running as pid 16912.

## 比较两次运行过程

例如模拟如下场景，通过调整学习率后再次训练模型，通过如下的tensorboard可以看到两次的收敛过程，很明显第二次的学习率更好一点

![image-20210315081519157](images/image-20210315081519157.png)

In [5]:
keras.backend.clear_session()


model = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu", input_shape=[8]),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(1)
])

model.compile(loss="mse", 
              optimizer=keras.optimizers.SGD(lr=0.05))

In [6]:
run_logdir2 = get_run_logdir()
run_logdir2

tensorboard_cb = keras.callbacks.TensorBoard(run_logdir2)
history = model.fit(X_train, y_train, 
                    epochs=100,
                    validation_data=(X_valid, y_valid),
                    callbacks=[checkpoint_cb, earlystop_cb, tensorboard_cb])

Creation of the directory .\my_logs failed
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


## 查看Tensorboard的其他设置

help(keras.callbacks.TensorBoard.__init__)

# 参数调整



In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

np.random.seed(42)
tf.random.set_seed(42)

housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
X_new = X_test[:3]



## 隐藏层数


- 简单问题：单层MLP
- 复杂问题：深层MLP



## 单元数

通常做法：选择一个比你实际需要的层和神经元更多的模型，然后使用提前停止和其他正则化技术防止模型过拟合

注意：通常增加层数而不是每层的神经元数，将获得更多收益



## Batch Size


大批量：更利于GPU加速，在GPU显存允许的情况下，但是大批量会导致训练不稳定

通常做法：尝试使用大批量，慢慢增加学习率，如果训练不稳定或最终表现不佳，可以使用小批量



## 结合sklearn进行随机搜索

模型参数和超参数到底选择哪些，通常最简单的思路就是尝试各种参数的组合，然后查看哪种对验证集最有效，可以使用GridSearchCV或RandomizedSearchCV,但是要使用scikit-learn中的这两个功能，需要将keras模型包装在常规scikit-learn对象中。

那么为了包装这个模型需要进行两步：
- 构建一个函数，函数中包括两部分，模型构建以及模型编译 
- 基于该函数, 如果是回归模型，使用`keras.wrappers.scikit_learn.KerasRegressor`,如果是分类模型，使用`keras.wrappers.scikit-learn.KerasClassifier`

In [7]:
keras.backend.clear_session()

# step 1: build model function
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[8]):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=input_shape))
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
    model.add(keras.layers.Dense(1))
    optimizer = keras.optimizers.SGD(lr=learning_rate)
    model.compile(loss="mse", optimizer=optimizer)
    return model

# step 2: wrap model function
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)

# wrap后的对象，可以像一般的scikit-learn模型一样训练，评估，预测，如下方
# keras_reg.fit(X_train, y_train, 
#               epochs=100,
#               validation_data=(X_valid, y_valid),
#               callbacks=[keras.callbacks.EarlyStopping(patience=10)])

# mse_test = keras_reg.score(X_test, y_test)

# y_pred = keras_reg.predict(X_new)

In [8]:
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {
    "n_hidden": [0, 1, 2, 3],
    "n_neurons": np.arange(1, 100),
    "learning_rate": reciprocal(3e-4, 3e-2),
}

rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, 
                                   n_iter=10, 
                                   cv=3, 
                                   verbose=2)
rnd_search_cv.fit(X_train, y_train, epochs=100,
                  validation_data=(X_valid, y_valid),
                  callbacks=[keras.callbacks.EarlyStopping(patience=10)])



Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] learning_rate=0.001683454924600351, n_hidden=0, n_neurons=15 ....
Epoch 1/100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
[CV]  learning_rate=0.001683454924600351, n_hidden=0, n_neurons=15, total=   9.3s
[CV] learning_rate=0.001683454924600351, n_hidden=0, n_neurons=15 ....
Epoch 1/100


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    9.2s remaining:    0.0s


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
[CV]  learning_rate=0.001683454924600351, n_hidden=0, n_neurons=15, total=  28.9s
[CV] learning_rate=0.00168345

Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
[CV]  learning_rate=0.001683454924600351, n_hidden=0, n_neurons=15, total=  14.6s
[CV] learning_rate=0.008731907739399206, n_hidden=0, n_neurons=21 ....
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
[CV]  learning_rate=0.008731907739399206, n_hidden=0, n_neurons=21, total=   4.8s
[CV] learning_rate=0.008731907739399206, n_hidden=0, n_neurons=21 ....
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
[CV]  learning_rate=0.008731907739399206, n_hidden=0, n_neurons

Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
[CV]  learning_rate=0.0006154014789262348, n_hidden=2, n_neurons=87, total=  32.1s
[CV] learning_rate=0.0006154014789262348, n_hidden=2, n_neurons=87 ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/1

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
[CV]  learning_rate=0.0006154014789262348, n_hidden=2, n_neurons=87, total=  50.4s
[CV] learning_rate=0.0003920021771415983, n_hidden=3, n_neurons=24 ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

[CV]  learning_rate=0.0003920021771415983, n_hidden=3, n_neurons=24, total=  45.2s
[CV] learning_rate=0.0003920021771415983, n_hidden=3, n_neurons=24 ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/1

Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
[CV]  learning_rate=0.0003920021771415983, n_hidden=3, n_neurons=24, total=  55.4s
[CV] learning_rate=0.006010328378268217, n_hidden=0, n_neurons=2 .....
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
[CV]  learning_rate=0.006010328378268217, n_hidden=0, n_neurons=2, total=   5.2s
[CV] learning_rate=0.006010328378268217, n_hidden=0, n_neurons=2 .....
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
[CV]  learning_rate=0.006010328378268217, n_hidden=0, n_neurons=2, total=   4.7s
[CV] learning_rate=0.006010328378268217, n_hi

Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
[CV]  learning_rate=0.008339092654580042, n_hidden=1, n_neurons=38, total=  10.4s
[CV] learning_rate=0.008339092654580042, n_hidden=1, n_neurons=38 ....
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
[CV]  learning_rate=0.008339092654580042, n_hidden=1, n_neurons=38, total=   7.5s
[CV] learning_rate=0.008339092654580042, n_hidden=1, n_neurons=38 ....
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/1

Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
[CV]  learning_rate=0.008339092654580042, n_hidden=1, n_neurons=38, total=  26.9s
[CV] learning_rate=0.00030107783636342726, n_hidden=3, n_neurons=21 ..
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/10

Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
[CV]  learning_rate=0.00030107783636342726, n_hidden=3, n_neurons=21, total=  55.3s
[CV] learning_rate=0.00030107783636342726, n_hidden=3, n_neurons=21 ..
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35

Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
[CV]  learning_rate=0.00030107783636342726, n_hidden=3, n_neurons=21, total=  33.4s
[CV] learning_rate=0.00030107783636342726, n_hidden=3, n_neurons=21 ..
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/

Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
[CV]  learning_rate=0.00030107783636342726, n_hidden=3, n_neurons=21, total=  55.5s
[CV] learning_rate=0.005153286333701512, n_hidden=1, n_neurons=22 ....
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30

Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
[CV]  learning_rate=0.005153286333701512, n_hidden=1, n_neurons=22, total=  38.2s
[CV] learning_rate=0.005153286333701512, n_hidden=1, n_neurons=22 ....
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
[CV]  learning_rate=0.005153286333701512, n_hidden=1, n_neurons=22, total=   6.8s
[CV] learning_rate=0.005153286333701512, n_hidden=1, n_neurons=22 ....
E

Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
[CV]  learning_rate=0.005153286333701512, n_hidden=1, n_neurons=22, total=  16.1s
[CV] learning_rate=0.0003099230412972121, n_hidden=0, n_neurons=49 ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
[CV]  learning_rate=0.0003099230412972121, n_hidden=0, n_neurons=49, total=  10.7s
[CV] learning_rate=0.0003099230412972121, n_hidden=0, n_neurons=49 ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
[CV]  learning_rate=0.0003099230412972121, n_hidden=0, n_neurons=49, total=  14.8s
[CV] learning_rate=0.0033625641252688094, n_hidden=2, n_neurons=42 ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
[CV]  learning_rate=0.0033625641252688094, n_hidden=2, n_neurons=42, total=  11.1s
[CV] learning_rate=0.0033625641252688094, n_hidden=2

Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
[CV]  learning_rate=0.0033625641252688094, n_hidden=2, n_neurons=42, t

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed: 10.9min finished


RuntimeError: Cannot clone object <tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x000001EE9B169910>, as the constructor either does not set or modifies parameter learning_rate

In [1]:
rnd_search_cv.best_params_




NameError: name 'rnd_search_cv' is not defined

In [None]:
rnd_search_cv.best_score_

In [None]:
rnd_search_cv.best_estimator_

In [None]:
rnd_search_cv.score(X_test, y_test)

In [None]:
model = rnd_search_cv.best_estimator_.model
model

In [None]:
model.evaluate(X_test, y_test)

# 练习：基于MLP的MNIST分类任务

> todo:
> - load dataset
> - normalize dataset
> - create exponential learning rate callback
> - create model using sequential
> - train model
> - create plot loss function vs learning rate
> - based loss and learning rate relation to adjust learning rate 
> - create early stopping, model checkpoint, tensorboard callback
> - re-training model
> - from checkpoint load best model
> - evalute model
> - using tensorboard check learning curve



In [1]:
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt
import os
import time
import tensorflow as tf

In [2]:
def get_run_logdir(logpath):
    root_logdir = os.path.join(os.curdir, logpath)
    
    try:
        os.mkdir(root_logdir)
    except OSError:
        print ("Creation of the directory %s failed" % root_logdir)
    else:
        print ("Successfully created the directory %s " % root_logdir)

    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

In [3]:
# load dataset
mnist = keras.datasets.mnist.load_data()

# normalize dataset
(X_train_all, y_train_all), (X_test, y_test) = mnist
X_train, X_valid, X_test = X_train_all[:50000]/255.0, X_train_all[50000:]/255.0, X_test/255.0
y_train, y_valid = y_train_all[:50000], y_train_all[50000:]

K = keras.backend
class ExponentialLearningRate(keras.callbacks.Callback):
    def __init__(self, factor):
        self.rates = []
        self.losses = []
        self.factor = factor
        
    def on_batch_end(self, batch, logs):
        self.losses.append(logs['loss'])
        self.rates.append(K.get_value(self.model.optimizer.lr))
        K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)


# create model using sequential
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=(28,28)),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

# model.compile(loss=keras.losses.SparseCategoricalCrossentropy(),
#               optimizer=keras.optimizers.Adam(lr=0.001),
#               metrics=['accuracy'])

# expon_lr = ExponentialLearningRate(factor=1.005)

# history = model.fit(X_train, y_train,
#                     epochs=1,
#                     validation_data=(X_valid, y_valid),
#                     callbacks=[expon_lr])

# plt.plot(expon_lr.rates, expon_lr.losses)
# plt.gca().set_xscale('log')
# plt.hlines(min(expon_lr.losses), min(expon_lr.rates), max(expon_lr.rates))
# plt.axis([min(expon_lr.rates), max(expon_lr.rates), 0, expon_lr.losses[0]])
# plt.xlabel("Learning rate")
# plt.ylabel("Loss")

In [4]:
# keras.backend.clear_session()
# np.random.seed(42)
# tf.random.set_seed(42)

model.compile(loss=keras.losses.SparseCategoricalCrossentropy(),
              optimizer=keras.optimizers.Adam(lr=0.001),
              metrics=['accuracy'])

modelcheck_cb = keras.callbacks.ModelCheckpoint('models/mnist_keras_model.h5',
                                                save_best_only=True)
earlystop_cb = keras.callbacks.EarlyStopping(patience=10,
                                             restore_best_weights=True)
tensorboard_cb = keras.callbacks.TensorBoard(get_run_logdir('mnist_logs'))

history = model.fit(X_train, y_train,
                    epochs=30,
                    validation_data=(X_valid, y_valid),
                    callbacks=[modelcheck_cb, earlystop_cb, tensorboard_cb])

Creation of the directory .\mnist_logs failed
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30


In [5]:
model.evaluate(X_test, y_test)



[0.11641941219568253, 0.9668999910354614]