In [1]:
import tensorflow as tf

In [6]:
# 3.3.1 生成数据集
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
# 生成X
features = tf.random.normal((num_examples, num_inputs), stddev=1)
# 生成y
labels = true_w[0] * features[:,0] + true_w[1] * features[:,1] + true_b
# 加上噪声
labels += tf.random.normal(labels.shape, stddev=0.01)
# features的每一行是长度为2的向量，而lablels的每一行是一个长度为1的向量（标量）


2022-04-07 16:22:46.727050: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2022-04-07 16:22:46.727927: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.


In [7]:
# 3.3.2 读取数据（这一步可以省略）
from tensorflow import data as tfdata
batch_size = 10
# 将训练数据的特征和标签组合
dataset = tfdata.Dataset.from_tensor_slices((features, labels))
# 随机读取小批量
dataset = dataset.shuffle(buffer_size=num_examples)
# buffer_size参数应大于等于样本数
dataset = dataset.batch(batch_size)
data_iter = iter(dataset)

for (batch, (X, y)) in enumerate(dataset):
    print(X, y)
    break

tf.Tensor(
[[ 1.9801545   0.28648812]
 [-1.1141671   0.07752962]
 [ 1.8002766   0.6286295 ]
 [-0.8636981  -0.48647034]
 [ 1.4503883   1.8294076 ]
 [ 0.7715016   0.14329799]
 [-1.0409961   0.54246694]
 [ 0.69592255 -0.735941  ]
 [-0.9899787   0.52541214]
 [-0.30471757  1.4439908 ]], shape=(10, 2), dtype=float32) tf.Tensor(
[ 7.182835    1.7167048   5.6569357   4.144114    0.8862296   5.2446136
  0.2783067   8.071374    0.42869046 -1.3098644 ], shape=(10,), dtype=float32)


In [8]:
# 3.3.3 定义模型和初始化参数
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow import initializers as init
model = keras.Sequential()
model.add(layers.Dense(1, kernel_initializer=init.RandomNormal(stddev=0.01)))

In [9]:
# 3.3.4 定义损失函数
from tensorflow import losses
loss = losses.MeanSquaredError()

In [13]:
# 3.3.5 定义优化函数
from tensorflow.keras import optimizers
trainer = optimizers.SGD(learning_rate=0.03)


In [16]:
# 3.3.6 训练模型
num_epoches = 3
for epoch in range(1, num_epoches + 1):
    for (batch, (X,y)) in enumerate(dataset):
        # 调用tensorflow.GradientTape记录动态图梯度
        with tf.GradientTape() as tape:
            l = loss(model(X, training=True), y)
        # 执行tape.gradient获得动态图中各变量梯度
        grads = tape.gradient(l, model.trainable_variables)
        # trainer.apply_gradient更新权重, 完成一步训练。
        trainer.apply_gradients(zip(grads, model.trainable_variables))

    l = loss(model(features), labels)
    print('epoch %d. loss: %f' %(epoch, l))

epoch 1. loss: 0.000103
epoch 2. loss: 0.000104
epoch 3. loss: 0.000103


In [18]:
# 比较学到的模型参数和真实的模型参数
true_w, model.get_weights()[0]

([2, -3.4],
 array([[ 2.0002832],
        [-3.40012  ]], dtype=float32))

In [19]:
true_b, model.get_weights()[1]

(4.2, array([4.200269], dtype=float32))