## 手动实现模型优化过程

In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.0.0-alpha0
sys.version_info(major=3, minor=6, micro=8, releaselevel='final', serial=0)
matplotlib 3.1.0
numpy 1.14.5
pandas 0.24.2
sklearn 0.21.2
tensorflow 2.0.0-alpha0
tensorflow.python.keras.api._v2.keras 2.2.4-tf


In [2]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
print(housing.DESCR)
print(housing.data.shape)
print(housing.target.shape)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block
        - HouseAge      median house age in block
        - AveRooms      average number of rooms
        - AveBedrms     average number of bedrooms
        - Population    block population
        - AveOccup      average house occupancy
        - Latitude      house block latitude
        - Longitude     house block longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
http://lib.stat.cmu.edu/datasets/

The target variable is the median house value for California districts.

This dataset was derived from the 1990 U.S. census, using one row per census
block group. A block group is the smallest geographical unit for which the U.S.
Census Bur

In [3]:
from sklearn.model_selection import train_test_split

# 默认划分比例为3:1
X_train_all, X_test, y_train_all, y_test = train_test_split(housing.data, housing.target,
                                                           random_state=7)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_all, y_train_all,
                                                     random_state=11)
print(X_train.shape, y_train.shape)
print(X_valid.shape, y_valid.shape)
print(X_test.shape, y_test.shape)

(11610, 8) (11610,)
(3870, 8) (3870,)
(5160, 8) (5160,)


In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

keras中metric使用

In [5]:
metric = keras.metrics.MeanSquaredError()
# metric默认会将结果累加  
print(metric([5.], [2.]))
print(metric([0.], [1.]))
print(metric.result())

# 取消metric的结果累加效果
metric.reset_states()
metric([1.], [3.])
print(metric.result())

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)


In [11]:
# 模型优化过程，即fit函数的操作：
# 1. 使用batch的形式遍历数据集，并计算loss和其他metric
#    1.1 自动求导
# 2. 每个epoch结束后，在验证集上测试

# 定义相关变量
epochs = 100
batch_size = 32
steps_per_epoch = len(X_train_scaled) // batch_size
opt = keras.optimizers.SGD()
# 定义metric
metric = keras.metrics.MeanSquaredError()

# 取数据,fit函数中的batch获取会首先对数据集进行shuffle
def random_batch(x, y, batch_size=32):
    random_idx = np.random.randint(0, len(X_train_scaled), size=batch_size)
    return x[random_idx], y[random_idx]    

# 模型构建
model = keras.models.Sequential()
model.add(keras.layers.Dense(30, activation='relu',
                             input_shape=X_train_scaled.shape[1:]))
model.add(keras.layers.Dense(1))


# 模型优化
for epoch in range(epochs):
    metric.reset_states()
    for step in range(steps_per_epoch):
        X_batch, y_batch = random_batch(X_train_scaled,
                                        y_train,
                                        batch_size)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch)
            # 上一步求得的y_pred维度为(32, 1), 而y_batch的维度为(32, )
            y_pred = tf.squeeze(y_pred, 1)
            loss = keras.losses.mean_squared_error(y_batch, y_pred)
            metric(y_pred, y_batch)
        # 对所有参数求导
        grads = tape.gradient(loss, model.variables)
        # 对grads和参数进行打包
        grads_and_vars = zip(grads, model.variables)
        opt.apply_gradients(grads_and_vars)
        print("\rEpoch", epoch, "train mse:", 
              metric.result().numpy(), end='')
    # 每个epoch结束后，计算在验证集上的metric
    y_valid_pred = model(X_valid_scaled)
    y_valid_pred = tf.squeeze(y_valid_pred, 1)
    valid_loss = keras.losses.mean_squared_error(y_valid_pred, y_valid)
    print("\t valid mse: ", valid_loss.numpy())

Epoch 0 train mse: 2.0203366	 valid mse:  0.9557379326975552
Epoch 1 train mse: 0.8204608	 valid mse:  0.7786624292807339
Epoch 2 train mse: 0.6920122	 valid mse:  0.7379015203381617
Epoch 3 train mse: 0.66292644	 valid mse:  0.703234844544089
Epoch 4 train mse: 0.62197036	 valid mse:  0.674487303459918
Epoch 5 train mse: 0.632427	 valid mse:  0.6479788840299442
Epoch 6 train mse: 0.5877473	 valid mse:  0.6271420756871153
Epoch 7 train mse: 0.5785055	 valid mse:  0.6046631285897986
Epoch 8 train mse: 0.537586	 valid mse:  0.5889433030525009
Epoch 9 train mse: 0.53496933	 valid mse:  0.5725628142400243
Epoch 10 train mse: 0.5075732	 valid mse:  0.5597182269399109
Epoch 11 train mse: 0.51765627	 valid mse:  0.5477711630669624
Epoch 12 train mse: 0.4907182	 valid mse:  0.5374188098930682
Epoch 13 train mse: 0.5124846	 valid mse:  0.5270803572211568
Epoch 14 train mse: 0.48795757	 valid mse:  0.5190491943117357
Epoch 15 train mse: 0.48253918	 valid mse:  0.5118230910023864
Epoch 16 train m