In [1]:
import tensorflow as tf
from tensorflow.keras import layers, datasets, Sequential, losses, optimizers

In [2]:
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
print(f"训练集维度为：{x_train.shape}")

训练集维度为：(60000, 28, 28)


MINST 数据集加载

In [3]:
# 数据的前处理模块
def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    return x, y

In [4]:
# 划分子集的个数
batchsz = 128
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_db = train_db.map(preprocess).shuffle(10000).batch(batchsz)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.map(preprocess).batch(batchsz)

![](img/16.jpg)

In [8]:
model = Sequential([
    layers.Conv2D(6, kernel_size=3, strides=1),
    layers.MaxPool2D(pool_size=2, strides=2),
    layers.ReLU(),
    
    layers.Conv2D(16, kernel_size=3, strides=1),
    layers.MaxPool2D(pool_size=2, strides=2),
    layers.ReLU(),

    # 打平层
    layers.Flatten(),
    layers.Dense(120, activation='relu'),
    layers.Dense(84, activation='relu'),
    layers.Dense(10)
])
model.build(input_shape=(None, 28, 28, 1))

In [14]:
# 参数统计,约6万个参数,绝大部分为全连接网络的参数
print((48120+10164)/60074)
model.summary()
# 输入28*28*1
# 3*3*1+1=10，10*6=60
# 3*3*6+1=55, 55*16=880
# 5*5*16=400
# 400*120 + 120 =48120
# 120*84+84=10164
# 84*10+10 = 850

0.9702034157871958
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 26, 26, 6)         60        
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 6)         0         
_________________________________________________________________
re_lu_2 (ReLU)               (None, 13, 13, 6)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 11, 11, 16)        880       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 5, 5, 16)          0         
_________________________________________________________________
re_lu_3 (ReLU)               (None, 5, 5, 16)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None,

In [49]:
# 对整个样本进行训练
def train_epoch(epoch):
    """
    epoch -- 训练的次数
    """
    # optimizer = optimizers.SGD(learning_rate=0.001) #Adam梯度下降
    optimizer = optimizers.Adam(learning_rate=0.001) #Adam梯度下降
    criteon = losses.CategoricalCrossentropy(from_logits=True)
    for step, (x, y) in enumerate(train_db):
        # 训练部分
        with tf.GradientTape() as tape:
            # 增加维度，[b, 28, 28]-> [b, 28, 28, 1]
            x = tf.expand_dims(x, axis=3)
            # 前向传播 [b, 784] -> [b, 10]
            out = model(x)
            y_onehot = tf.one_hot(y, depth=10)
            # 计算损失函数
            loss = tf.reduce_mean(criteon(y_onehot, out))
        # 优化更新参数
        grads = tape.gradient(loss, model.trainable_variables)
        # w' = w - lr * grad
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if step % 100 == 0:
            print(epoch, step, "loss",loss.numpy())
        
    # 测试集的精确度
    total_correct, total_num = 0, 0
    for x,y in test_db:
        x = tf.expand_dims(x, axis=3)
        out = model(x)
        prob = tf.nn.softmax(out, axis=1) 
        pred = tf.argmax(prob, axis=1)
        pred = tf.cast(pred, dtype=tf.int32)

        correct = tf.equal(pred, y)
        # 统计正确的个数
        correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32))
        total_correct += int(correct)
        # 统计样本总数
        total_num += x.shape[0]

    acc = total_correct / total_num
    print(f"准确率为：{acc}")

In [None]:
for epoch in range(10):
    train_epoch(epoch)

In [None]:
# 测试集的精确度
total_correct, total_num = 0, 0
for x,y in test_db:
    x = tf.expand_dims(x, axis=3)
    out = model(x)
    prob = tf.nn.softmax(out, axis=1) 
    pred = tf.argmax(prob, axis=1)
    pred = tf.cast(pred, dtype=tf.int32)

    correct = tf.equal(pred, y)
    # 统计正确的个数
    correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32))
    total_correct += int(correct)
    # 统计样本总数
    total_num += x.shape[0]

acc = total_correct / total_num
print(f"准确率为：{acc}")


以下为训练过程的分解

In [None]:
# 创建损失函数类，实际计算时直接调用类实例
# from_logits=True 标志位将 softmax 激活函数实现在损失函数中，无需手动添加损失函数
criteon = losses.CategoricalCrossentropy(from_logits=True)
# 训练部分
with tf.GradientTape() as tape:
    # 增加维度，[b, 28, 28]-> [b, 28, 28, 1]
    x = tf.expand_dims(x_train, axis=3)
    # 前向传播 [b, 784] -> [b, 10]
    out = model(x)
    y_onehot = tf.one_hot(y_train, depth=10)
    # 计算损失函数
    loss = criteon(y_onehot, out)

获得损失值后，通过 TF 的梯度记录器 tf.GradientTape() 计算损失函数 loss 对网络参数的 para_m 的梯度，并通过 optimizer 对象自动更新网络权值

In [None]:
# 优化设置
# optimizer = optimizers.SGD(learning_rate=0.001) #随机梯度下降
optimizer = optimizers.Adam(learning_rate=0.001) #Adam梯度下降

In [None]:
para_m = model.trainable_variables
# 自动计算梯度
grads = tape.gradient(loss, para_m)
# 自动更新参数
optimizer.apply_gradients(zip(grads, para_m))