In [3]:
import tensorflow as tf
import numpy as np

class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()
        # MNIST中的图像默认为uint8（0-255的数字）。以下代码将其归一化到0-1之间的浮点数，并在最后增加一维作为颜色通道
        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):
        # 从数据集中随机取出batch_size个元素并返回
        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]
    



In [4]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    # Flatten层将除第一维（batch_size）以外的维度展平
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

In [5]:
num_epochs = 5
batch_size = 50
learning_rate = 0.001

In [7]:
model = MLP()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

checkpoint = tf.train.Checkpoint(myAwesomeModel=model)
# 使用tf.train.CheckpointManager来管理CheckPoint
manager = tf.train.CheckpointManager(checkpoint, directory='./save', max_to_keep=3)

num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
    #使用checkpoint把model.variables存储起来
    path = manager.save()
    print("model saved to %s" % path)
    
# loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)

loss = tf.keras.losses.categorical_crossentropy(
    y_true=tf.one_hot(y, depth=tf.shape(y_pred)[-1]),
    y_pred=y_pred
)

batch 0: loss 2.398695
model saved to ./save\ckpt-1
batch 1: loss 2.378308
model saved to ./save\ckpt-2
batch 2: loss 2.213981
model saved to ./save\ckpt-3
batch 3: loss 2.139094
model saved to ./save\ckpt-4
batch 4: loss 2.036330
model saved to ./save\ckpt-5
batch 5: loss 2.022789
model saved to ./save\ckpt-6
batch 6: loss 1.992425
model saved to ./save\ckpt-7
batch 7: loss 1.841374
model saved to ./save\ckpt-8
batch 8: loss 1.950590
model saved to ./save\ckpt-9
batch 9: loss 1.888216
model saved to ./save\ckpt-10
batch 10: loss 1.758012
model saved to ./save\ckpt-11
batch 11: loss 1.589187
model saved to ./save\ckpt-12
batch 12: loss 1.567833
model saved to ./save\ckpt-13
batch 13: loss 1.533204
model saved to ./save\ckpt-14
batch 14: loss 1.397109
model saved to ./save\ckpt-15
batch 15: loss 1.529853
model saved to ./save\ckpt-16
batch 16: loss 1.459800
model saved to ./save\ckpt-17
batch 17: loss 1.483278
model saved to ./save\ckpt-18
batch 18: loss 1.449621
model saved to ./save\c

model saved to ./save\ckpt-152
batch 152: loss 0.203906
model saved to ./save\ckpt-153
batch 153: loss 0.353706
model saved to ./save\ckpt-154
batch 154: loss 0.341884
model saved to ./save\ckpt-155
batch 155: loss 0.313355
model saved to ./save\ckpt-156
batch 156: loss 0.313145
model saved to ./save\ckpt-157
batch 157: loss 0.190414
model saved to ./save\ckpt-158
batch 158: loss 0.333955
model saved to ./save\ckpt-159
batch 159: loss 0.342028
model saved to ./save\ckpt-160
batch 160: loss 0.275797
model saved to ./save\ckpt-161
batch 161: loss 0.247955


KeyboardInterrupt: 

In [8]:
# 从checkpoint中加载回之前的model.variables
model = MLP()
checkpoint = tf.train.Checkpoint(myAwesomeModel=model)      
checkpoint.restore(tf.train.latest_checkpoint('./save'))

sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data // batch_size)
for batch_index in range(num_batches):
    start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
    y_pred = model.predict(data_loader.test_data[start_index: end_index])
    sparse_categorical_accuracy.update_state(y_true=data_loader.test_label[start_index: end_index], y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())

test accuracy: 0.899800
