In [5]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
(x_train,label_train),(x_test,label_test) = mnist.load_data()
print(x_train.shape)
#这个label_train还没有经过独热编码
print(label_train.shape)

(60000, 28, 28)
(60000,)


In [15]:
#将训练集中的自变量打平 从[60000,28,28]变为[60000,784]
x_train,x_test = x_train.reshape([-1,784]),x_test.reshape([-1,784])
x_train,x_test = x_train/255.,x_test/255.
print(x_train)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [7]:
#构建逻辑回归所用参数
W = tf.Variable(tf.ones([784,10]))
b = tf.Variable(tf.ones([10]))
print(W.shape)
print(b.shape)
#这样逻辑回归 XW+b = y 得到[n,10]大小的矩阵
#n为每次处理批次大小

(784, 10)
(10,)


In [8]:
#逻辑回归
def logistic_regression(x):
    return tf.matmul(x,W)+b
#但是mnist手写字识别属于多分类问题，所以需要在逻辑回归上多加入一个softmax回归
def predict_y(x):
    return tf.nn.softmax(x)

In [9]:
#构建每次处理的批次，这样可以根据每一次批次进行一次参数的调整，而不是等到全部训练之后才知道参数的正确率
train_data = tf.data.Dataset.from_tensor_slices((x_train, label_train))
print(train_data)
train_data = train_data.repeat().shuffle(5000).batch(256).prefetch(1)
print(train_data)

<TensorSliceDataset shapes: ((784,), ()), types: (tf.float64, tf.uint8)>
<PrefetchDataset shapes: ((None, 784), (None,)), types: (tf.float64, tf.uint8)>


In [10]:
#交叉熵，多分类问题一般使用交叉熵作为损失函数
def cross_entropy(y_pred, y_true):
    # 将真实标签进行独热编码
    y_true = tf.one_hot(y_true, depth=10)
    # 防止出现log0的错误
    y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)
    # 计算交叉熵
    #0表示每一列的元素相加，1表示每一行的元素相加
    return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred),1))
#准确率 用来表示模型的准确程度
def accuracy(y_pred, y_true):
    # tf.argmax是从y_pred的每一行元素中找到最大的那个值的下标，之后和真实标签进行比较，这里可以不使用独热编码
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    #将[true,false....]转化为float64类型并相加求平均值得到准确率
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [11]:
#优化，梯度加速器，使用学习率
optimizer = tf.optimizers.SGD(0.01)
#优化参数的过程，修改参数的值
def run_optimize(x,y):
    with tf.GradientTape() as g:
        pred = predict_y(logistic_regression(x))
        loss = cross_entropy(pred, y)
    # 计算梯度
    gradients = g.gradient(loss, [W, b])
    # 更新w和b的参数值
    optimizer.apply_gradients(zip(gradients, [W, b]))
    pass

In [17]:
#开始训练
#训练1000次，每次批次大小为256
x_test = tf.cast(x_test,tf.float32)
for step, (batch_x, batch_y) in enumerate(train_data.take(1000), 1):
    batch_x = tf.cast(batch_x,tf.float32)
    run_optimize(batch_x, batch_y)
    pred = predict_y(logistic_regression(batch_x))
    acc = accuracy(pred, batch_y)
    if step % 10 == 0:
        loss = cross_entropy(pred, batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))
        # pred = logistic_regression(x_test)
        # print("Test Accuracy: %f" % accuracy(pred, label_test))
print(W,b)
#正确率89%

step: 10, loss: 0.522170, accuracy: 0.894531
step: 20, loss: 0.566009, accuracy: 0.871094
step: 30, loss: 0.520456, accuracy: 0.902344
step: 40, loss: 0.650000, accuracy: 0.835938
step: 50, loss: 0.622721, accuracy: 0.867188
step: 60, loss: 0.594598, accuracy: 0.871094
step: 70, loss: 0.599342, accuracy: 0.867188
step: 80, loss: 0.497922, accuracy: 0.890625
step: 90, loss: 0.581602, accuracy: 0.878906
step: 100, loss: 0.620264, accuracy: 0.835938
step: 110, loss: 0.573070, accuracy: 0.882812
step: 120, loss: 0.642184, accuracy: 0.835938
step: 130, loss: 0.631358, accuracy: 0.851562
step: 140, loss: 0.645019, accuracy: 0.816406
step: 150, loss: 0.676627, accuracy: 0.839844
step: 160, loss: 0.578804, accuracy: 0.863281
step: 170, loss: 0.559333, accuracy: 0.875000
step: 180, loss: 0.613397, accuracy: 0.859375
step: 190, loss: 0.575548, accuracy: 0.898438
step: 200, loss: 0.549319, accuracy: 0.875000
step: 210, loss: 0.480967, accuracy: 0.867188
step: 220, loss: 0.531869, accuracy: 0.8828