In [31]:
import tensorflow as tf
from tensorflow import keras
from keras.datasets import mnist
(train_images, train_labels),(test_images,test_labels) = mnist.load_data()
train_images = train_images.reshape(60000,28*28)
train_images = train_images.astype("float32")/255
test_images = test_images.reshape(10000,28*28)
test_images = test_images.astype("float32")/255

保存在float32类型的NumPy张量中，形状分别为`(60000,28*28)`,`(10000,28*28)`

In [32]:
#模型部分
from keras import layers
model = keras.Sequential([
    layers.Dense(512,activation='relu'),
    layers.Dense(10,activation='softmax')
]
)

模型包含两个链接在一起的Dense层，每层都对输入数据做了一些简单的张量运算，这些运算涉及了权重和张量。

In [33]:
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"]
              )

sparse_categorical_crossentropy是损失函数，是用于学习权重张量的反馈信号，在训练过程中应该最小化
降低损失值是通过小批量SGD实现的，即第一个参数，rmsprop优化器

In [34]:
# 训练循环
model.fit(train_images,train_labels,epochs=5,batch_size=128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1697c00d0>

在调用fit的过程中，训练数据不点迭代，一共迭代了5轮，每一轮叫做一共epoch，模型会计算损失相对于权重的梯度，从而向减小该批量对应损失值的方向移动

## 使用TensorFlow从头实现第一个例子

In [35]:
#简单的Dense类
import tensorflow as tf
class NaiveDense:
    def __init__(self,input_size,output_size,activation):
        self.activation  = activation

        w_shape = (input_size,output_size) #创建形状为(input_size,output_size)的矩阵W，并将其随机化
        w_initial_value = tf.random.uniform(w_shape,minval=0,maxval=1e-1)
        self.W = tf.Variable(w_initial_value)

        b_shape = (output_size,) #创建一个形状为(output_size)的零向量b
        b_initial_value = tf.zeros(b_shape)
        self.b = tf.Variable(b_initial_value)

 #向前传播
    def __call__(self, inputs):
        return self.activation(tf.matmul(inputs,self.W)+self.b)
    @property
    def weights(self): #获取该层权重的便捷方法
        return [self.W,self.b]


## 简单的Sequential类

In [36]:
# 创建NaiveSequential类，将这些层链接起来
class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers

    def __call__(self,inputs):
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x
    # 记录该层的参数
    @property
    def weights(self):
        weights = []
        for layer in self.layers:
            weights += layer.weights
        return weights

In [37]:
# 利用NaiverDense和NaiveSequential类，创建一个与Keras类似的模型
model = NaiveSequential([
    NaiveDense(input_size=28*28,output_size=512,activation=tf.nn.relu),
    NaiveDense(input_size=512,output_size=10,activation=tf.nn.softmax)
])

assert len(model.weights) == 4

## 批量生成器

In [38]:
import math
# 对MNIST数据进行小批量迭代
class BatchGenerator:
    def __init__(self,images,labels,batch_size = 128):
        assert len(images) == len(labels)
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        # 返回上入整数
        self.num_batches = math.ceil(len(images)/batch_size)

    def next(self):
        images = self.images[self.index:self.index+self.batch_size]
        labels = self.labels[self.index:self.index+self.batch_size]
        self.index += self.batch_size
        return images,labels

In [39]:
learning_rate = 1e-3
def update_weights(gradients,weights):
    # zip用于将可迭代的对象作为参数，将对象中对应的元素打包成一个个元组，然后返回这些元组组成的列表
    for g,w in zip(gradients,weights):
        w.assign_sub(g * learning_rate) # 相当于TensorFlow变量的-=

In [40]:
def one_training_step(model,images_batch,labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(
            labels_batch,predictions
        )
        average_loss = tf.reduce_mean(per_sample_losses)
        # 计算相对于权重的的梯度，输出的gradients是一个列表，每个元素对应model.weights列表的权重
        gradients = tape.gradient(average_loss,model.weights)
        # 利用梯度更新权重
        update_weights(gradients,model.weights)
        return average_loss

In [41]:
# 实践中用Keras的Optimizer更新权重
from tensorflow import keras
from keras import optimizers
optimizer = optimizers.SGD(learning_rate=1e-3)
def update_weights(gradients,weights):
    optimizer.apply_gradients(zip(gradients,weights))

## 完整的循环训练

In [42]:
# 一轮训练就是对训练数据的每个批量都重复上述训练步骤
# 完整的训练循环就是重复多轮训练
def fit(model,images,labels,epochs,bath_size = 128):
    for epoch_counter in range(epochs):
        print(f"Epoch {epoch_counter}")
        batch_generator = BatchGenerator(images,labels)
        for batch_counter in range(batch_generator.num_batches):
            images_batch,labels_batch = batch_generator.next()
            loss = one_training_step(model,images_batch,labels_batch)
            if batch_counter %100 == 0:
                print(f"loss at batch {batch_counter}:{loss:.2f}")

In [44]:
from tensorflow import keras
from keras.datasets import mnist
import numpy as np
(train_images,train_labels),(test_images,test_labels) = mnist.load_data()

train_images = train_images.reshape((60000,28*28))
train_images = train_images.astype("float32")/255
test_images = test_images.reshape((10000,28 * 28))
test_images = test_images.astype("float32")/255

fit(model,train_images,train_labels,epochs=10,bath_size=128)

Epoch 0
loss at batch 0:0.64
loss at batch 100:0.66
loss at batch 200:0.56
loss at batch 300:0.62
loss at batch 400:0.70
Epoch 1
loss at batch 0:0.60
loss at batch 100:0.62
loss at batch 200:0.53
loss at batch 300:0.59
loss at batch 400:0.67
Epoch 2
loss at batch 0:0.57
loss at batch 100:0.59
loss at batch 200:0.50
loss at batch 300:0.57
loss at batch 400:0.65
Epoch 3
loss at batch 0:0.55
loss at batch 100:0.56
loss at batch 200:0.48
loss at batch 300:0.55
loss at batch 400:0.63
Epoch 4
loss at batch 0:0.53
loss at batch 100:0.54
loss at batch 200:0.46
loss at batch 300:0.53
loss at batch 400:0.61
Epoch 5
loss at batch 0:0.51
loss at batch 100:0.52
loss at batch 200:0.44
loss at batch 300:0.51
loss at batch 400:0.60
Epoch 6
loss at batch 0:0.50
loss at batch 100:0.50
loss at batch 200:0.43
loss at batch 300:0.49
loss at batch 400:0.59
Epoch 7
loss at batch 0:0.48
loss at batch 100:0.48
loss at batch 200:0.41
loss at batch 300:0.48
loss at batch 400:0.58
Epoch 8
loss at batch 0:0.47
los

In [47]:
## 评估模型
predictions = model(test_images)
# 转换为NumPy张量
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions,axis=1)
matches = predicted_labels == test_labels
print(f"accuracy: {matches.mean():.2f}")

accuracy: 0.86
