# 载入数据

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
mnist = tf.keras.datasets.mnist
(train_images,train_labels),(test_images,test_labels) = mnist.load_data()

# 数据集划分

In [3]:
# 划分验证集
total_num = len(train_images)
valid_split = 0.2           # 验证集的比例占20%
train_num = int(total_num*(1-valid_split))  # 训练集的数目

train_x = train_images[:train_num]
train_y = train_labels[:train_num]

valid_x = train_images[train_num:]
valid_y = train_labels[train_num:]

test_x = test_images
test_y = test_labels

# 数据塑形

In [4]:
# 把(28,28)的结构拉直为一行 784
train_x = train_x.reshape(-1,784)
valid_x = valid_x.reshape(-1,784)
test_x = test_x.reshape(-1,784)

# 特征数据归一化

In [5]:
train_x = tf.cast(train_x/255.0,tf.float32)
valid_x = tf.cast(valid_x/255.0,tf.float32)
test_x = tf.cast(test_x/255.0,tf.float32)

# 标签数据独热编码

In [6]:
#对标签数据进行独热编码
train_y = tf.one_hot(train_y,depth = 10)
valid_y = tf.one_hot(valid_y,depth = 10)
test_y = tf.one_hot(test_y,depth = 10)

# 创建待优化变量

In [7]:
# 定义第一层隐藏层权重和偏置项变量
Input_Dim = 784
H1_NN = 64
W1 = tf.Variable(tf.random.normal([Input_Dim, H1_NN],mean=0.0,stddev=1.0,dtype=tf.float32))
B1 = tf.Variable(tf.zeros([H1_NN]),dtype = tf.float32)

In [8]:
# 定义第2层隐藏层权重和偏置项变量
H2_NN = 32
W2 = tf.Variable(tf.random.normal([H1_NN,H2_NN],mean=0.0,stddev=1.0,dtype=tf.float32))
B2 = tf.Variable(tf.zeros([H2_NN]),dtype=tf.float32)

In [9]:
# 定义输出层权重和偏置项变量
Output_Dim = 10
W3 = tf.Variable(tf.random.normal([H2_NN, Output_Dim],mean=0.0,stddev=1.0,dtype=tf.float32))
B3 = tf.Variable(tf.zeros([Output_Dim]),dtype = tf.float32)

In [10]:
# 建立待优化变量列表
W = [W1, W2,W3]
B = [B1, B2,B3]

# 定义模型前向计算

In [11]:
def model(x,w,b):
    x = tf.matmul(x,w[0]) + b[0]
    x = tf.nn.relu(x)
    x = tf.matmul(x,w[1]) + b[1]
    x = tf.nn.relu(x)
    x = tf.matmul(x,w[2]) + b[2]
    pred = tf.nn.softmax(x)
    return pred

# 定义损失函数

In [12]:
# 定义交叉熵损失函数

def loss(x,y,w,b):
    pred = model(x,w,b)   # 计算模型预测值和标签值的差异
    loss_  = tf.keras.losses.categorical_crossentropy(y_true=y,y_pred=pred)
    return tf.reduce_mean(loss_) # 求均值，得出均方差

# 设置训练超参数

In [13]:
train_epochs = 30 #训练轮数
batch_size  = 50  #单次训练样本数（批次大小）
learning_rate = 0.005 # 学习率

# 定义梯度计算函数

In [14]:
# 计算样本数据[x，y]在参数[w,b]点上的梯度
def grad(x,y,w,b):
    var_list = w + b
    with tf.GradientTape() as tape:
        loss_ = loss(x,y,w,b)
    return tape.gradient(loss_,var_list) # 返回梯度向量

# 选择优化器

In [15]:
# Adam优化器

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

# 定义准确率

In [16]:
def accuracy(x,y,w,b):
    pred = model(x,w,b)# 计算模型预测值和标签值的差异
    # 检查预测值类别tf.argmax(pred,1)与实际类别tf.argmax(pred,1)的匹配情况
    correct_prediction = tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
    # 准确率，将布尔值转换为浮点数，并计算平均值
    return tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

# 训练模型

In [17]:
steps = int(train_num/batch_size)     #一轮训练有多少批次

loss_list_train = []  #用于保存训练集loss值的列表
loss_list_valid = []  #用于保存验证集loss值的列表
acc_list_train = []   #用于保存训练集acc值的列表
acc_list_valid = []   #用于保存验证集Acc值的列表

for epoch in range(train_epochs):
    for step in range(steps):
        xs = train_x[step*batch_size:(step+1)*batch_size]
        ys = train_y[step*batch_size:(step+1)*batch_size]
        grads = grad(xs,ys,W,B)
        optimizer.apply_gradients(zip(grads,W+B))  # 优化器根据梯度自动调节值
    
    loss_train = loss(train_x,train_y,W,B).numpy()
    loss_valid = loss(valid_x,valid_y,W,B).numpy()
    acc_train = accuracy(train_x,train_y,W,B).numpy()
    acc_valid = accuracy(valid_x,valid_y,W,B).numpy()
    loss_list_train.append(loss_train)
    loss_list_valid.append(loss_valid)
    acc_list_train.append(acc_train)
    acc_list_valid.append(acc_valid)
    print("epoch={:3d},train_loss={:.4f},train_acc={:.4f},val_loss={:.4f},val_acc={:.4f}".format(epoch+1,loss_train,acc_train,loss_valid,acc_valid))

epoch=  1,train_loss=5.9346,train_acc=0.6282,val_loss=5.8398,val_acc=0.6343
epoch=  2,train_loss=5.7867,train_acc=0.6390,val_loss=5.7049,val_acc=0.6436
epoch=  3,train_loss=5.6381,train_acc=0.6484,val_loss=5.5691,val_acc=0.6532
epoch=  4,train_loss=5.5167,train_acc=0.6561,val_loss=5.4788,val_acc=0.6582
epoch=  5,train_loss=5.5073,train_acc=0.6570,val_loss=5.5567,val_acc=0.6544
epoch=  6,train_loss=5.4747,train_acc=0.6592,val_loss=5.4273,val_acc=0.6620
epoch=  7,train_loss=5.3582,train_acc=0.6665,val_loss=5.3229,val_acc=0.6687
epoch=  8,train_loss=5.3214,train_acc=0.6689,val_loss=5.2860,val_acc=0.6711
epoch=  9,train_loss=5.2692,train_acc=0.6721,val_loss=5.2594,val_acc=0.6727
epoch= 10,train_loss=5.3484,train_acc=0.6672,val_loss=5.3207,val_acc=0.6692
epoch= 11,train_loss=5.3244,train_acc=0.6686,val_loss=5.3386,val_acc=0.6678
epoch= 12,train_loss=5.2254,train_acc=0.6752,val_loss=5.2083,val_acc=0.6763
epoch= 13,train_loss=5.3530,train_acc=0.6671,val_loss=5.3367,val_acc=0.6678
epoch= 14,tr