tf.GradientTape与tf.keras结合使用

In [2]:
# 使用tf.keras搭建回归模型，数据集使用加利福尼亚的房价预测
#导入必要的库即版本

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras
#import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd,sklearn,tf,keras:
    print(module.__name__, module.__version__)

2.0.0-beta1
sys.version_info(major=3, minor=7, micro=0, releaselevel='final', serial=0)
matplotlib 2.2.3
numpy 1.16.4
pandas 0.23.4
sklearn 0.19.2
tensorflow 2.0.0-beta1
tensorflow.python.keras.api._v2.keras 2.2.4-tf


In [3]:
# 导入数据
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()

print(housing.DESCR)
print(housing.data.shape)
print(housing.target.shape)

California housing dataset.

The original database is available from StatLib

    http://lib.stat.cmu.edu/datasets/

The data contains 20,640 observations on 9 variables.

This dataset contains the average house value as target variable
and the following input variables (features): average income,
housing average age, average rooms, average bedrooms, population,
average occupation, latitude, and longitude in that order.

References
----------

Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,
Statistics and Probability Letters, 33 (1997) 291-297.


(20640, 8)
(20640,)


In [4]:
# 打印数据
import pprint

pprint.pprint(housing.data[:5])
pprint.pprint(housing.target[:5])

array([[ 8.32520000e+00,  4.10000000e+01,  6.98412698e+00,
         1.02380952e+00,  3.22000000e+02,  2.55555556e+00,
         3.78800000e+01, -1.22230000e+02],
       [ 8.30140000e+00,  2.10000000e+01,  6.23813708e+00,
         9.71880492e-01,  2.40100000e+03,  2.10984183e+00,
         3.78600000e+01, -1.22220000e+02],
       [ 7.25740000e+00,  5.20000000e+01,  8.28813559e+00,
         1.07344633e+00,  4.96000000e+02,  2.80225989e+00,
         3.78500000e+01, -1.22240000e+02],
       [ 5.64310000e+00,  5.20000000e+01,  5.81735160e+00,
         1.07305936e+00,  5.58000000e+02,  2.54794521e+00,
         3.78500000e+01, -1.22250000e+02],
       [ 3.84620000e+00,  5.20000000e+01,  6.28185328e+00,
         1.08108108e+00,  5.65000000e+02,  2.18146718e+00,
         3.78500000e+01, -1.22250000e+02]])
array([4.526, 3.585, 3.521, 3.413, 3.422])


In [5]:
# 拆分训练集、测试集、验证集
from sklearn.model_selection import train_test_split

# 默认按3：1的比例拆分
x_train_all, x_test, y_train_all, y_test = train_test_split(housing.data, housing.target, random_state=7)
x_train, x_valid, y_train, y_valid = train_test_split(x_train_all, y_train_all, random_state=11)

print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)
print(x_test.shape, y_test.shape)

(11610, 8) (11610,)
(3870, 8) (3870,)
(5160, 8) (5160,)


### 数据归一化

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
# 训练机上获得均值和方差，测试集和验证集上用相同的均值和方差
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)

### 构建模型

In [14]:
# metric使用
metric = keras.metrics.MeanSquaredError()
# 可以累加使用
print(metric([5.],[2.]))
print(metric([0.],[1.]))
print(metric.result())
# 不想累加 reset
metric.reset_states()
metric([1.],[3.])
print(metric.result())

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)


In [17]:
# 修改model中的fit部分
# 1、按照batch的形式遍历训练集，每次训练都会得到一个指标 metric
#    1.1自动求导，
# 2、一个epoch结束，验证集验证，输出metric
epochs = 100
batch_size = 32
steps_per_epoch = len(x_train_scaled) // batch_size
optimizer = keras.optimizers.SGD()
metric = keras.metrics.MeanSquaredError()

# 随机遍历取数据
def random_batch(x,y,batch_size=32):
    # 随机索引
    idx = np.random.randint(0,len(x),size=batch_size)
    return x[idx],y[idx]

model = keras.models.Sequential([
    keras.layers.Dense(30, activation='relu', input_shape=x_train.shape[1:]),
    #最后一层一个神经元
    keras.layers.Dense(1),
])

for epoch in range(epochs):
    metric.reset_states()
    for step in range(steps_per_epoch):
        x_batch, y_batch = random_batch(x_train_scaled, y_train, batch_size)
        
        with tf.GradientTape() as tape:
            
            y_pred = model(x_batch)
            loss = tf.reduce_mean(
                keras.losses.mean_squared_error(y_batch,y_pred))
            metric(y_batch,y_pred)
        grads = tape.gradient(loss, model.variables)
        grads_and_vars = zip(grads, model.variables)
        optimizer.apply_gradients(grads_and_vars)
        print('\rEpoch', epoch, 'train mse', metric.result().numpy(),end='')
    
    y_valid_pred = model(x_valid_scaled)
    valid_loss = tf.reduce_mean(
        keras.losses.mean_squared_error(y_valid_pred,y_valid))
    print('\t','valid mse:', valid_loss.numpy())
    
        
    
model.summary()
# 编译，目标函数使用均方差，优化方法使用随机梯度下降，也可以自己定义
model.compile(loss='mean_squared_error', optimizer='sgd')
# 使用earlystopping
callbacks = [keras.callbacks.EarlyStopping(patience=5, min_delta=1e-3)]

Epoch 0 train mse 1.5235689	 valid mse: 1.4166715688935319
Epoch 1 train mse 1.7961156	 valid mse: 1.45398741139984
Epoch 2 train mse 1.2464117	 valid mse: 1.4197592354390824
Epoch 3 train mse 1.2720236	 valid mse: 1.4110270663545423
Epoch 4 train mse 1.22343643	 valid mse: 1.3928980161174214
Epoch 5 train mse 1.2563934	 valid mse: 1.3887724574502907
Epoch 6 train mse 1.269748	 valid mse: 1.3885760220974723
Epoch 7 train mse 1.2529523	 valid mse: 1.3874902406251794
Epoch 8 train mse 1.2494687	 valid mse: 1.390298433298022
Epoch 9 train mse 1.2713538	 valid mse: 1.386875324155706
Epoch 10 train mse 1.3050228	 valid mse: 1.390824866527549
Epoch 11 train mse 1.2404352 train mse 1.2600132	 valid mse: 1.3860032837728455
Epoch 12 train mse 1.251884	 valid mse: 1.386308964300463
Epoch 13 train mse 1.2474616	 valid mse: 1.388066388448043
Epoch 14 train mse 1.2356681	 valid mse: 1.3952145639953155
Epoch 15 train mse 1.2545469	 valid mse: 1.3951665591222697
Epoch 16 train mse 1.2410151	 valid ms