In [30]:
# 以 p51 範例為例子
from tensorflow.keras import optimizers, layers, datasets
import tensorflow as tf

# step1 讀取圖片
(x, y), (x_val, y_val) = datasets.mnist.load_data()
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
# 转换为整形张量
y = tf.convert_to_tensor(y, dtype=tf.int32)
# one-hot 编码
y = tf.one_hot(y, depth=10)

print(x.shape)  # (60000, 28, 28)
x = tf.reshape(x, [-1, 28 * 28])
x = tf.reshape(x, [x.shape[0], -1])  # 等於上面寫法
print(x.shape)  # (60000, 784)

train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
train_dataset = train_dataset.batch(200)

(60000, 28, 28)
(60000, 784)


In [31]:

# step2 定義初始化參數
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

In [32]:
# step3 單次epoch訓練
def train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001):
  for step, (x, y) in enumerate(train_dataset):
    with tf.GradientTape() as tape:
      # 第一层计算， [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b,256] + [b, 256]
      h1 = x @ w1 + tf.broadcast_to(b1, (x.shape[0], 256))
      h1 = tf.nn.relu(h1)  # 通过激活函数

      # 第二层计算， [b, 256] => [b, 128]
      h2 = h1 @ w2 + b2
      h2 = tf.nn.relu(h2)
      # 输出层计算， [b, 128] => [b, 10]
      out = h2 @ w3 + b3

      # 计算网络输出与标签之间的均方差， mse = mean(sum(y-out)^2)
      # [b, 10]
      loss = tf.square(y - out)
      # 误差标量， mean: scalar
      loss = tf.reduce_mean(loss)

      # 自动梯度，需要求梯度的张量有[w1, b1, w2, b2, w3, b3]
      grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])

    # 梯度更新， assign_sub 将当前值减去参数值，原地更新
    w1.assign_sub(lr * grads[0])
    b1.assign_sub(lr * grads[1])
    w2.assign_sub(lr * grads[2])
    b2.assign_sub(lr * grads[3])
    w3.assign_sub(lr * grads[4])
    b3.assign_sub(lr * grads[5])

  return loss.numpy()

In [33]:
# step4 訓練所有epoch
losses = []
epochs = 100
for epoch in range(epochs):
    loss = train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001)
    print('epoch:', epoch, 'loss:', loss)
    losses.append(loss)

x = [i for i in range(0, epochs)]
# 绘制曲线
plt.plot(x, losses, color='blue', marker='s', label='训练')
plt.xlabel('Epoch')
plt.ylabel('MSE')
plt.legend()
plt.show()

KeyboardInterrupt: 