##### 简单dense类

In [2]:
import tensorflow as tf

class NaiveDense:
    def __init__(self, input_size, output_size, activation):
        self.activation = activation

        w_shape = (input_size, output_size)
        b_shape = (output_size,)

        w_init = tf.random.uniform(w_shape, minval=0, maxval=1e-1)
        b_init = tf.zeros(b_shape)

        self.w = tf.Variable(w_init)
        self.b = tf.Variable(b_init)

    def __call__(self, x):
        z = tf.matmul(x, self.w) + self.b
        return self.activation(z)
    
    def weights(self):
        return [self.w, self.b]

##### 简单Sequential类

In [3]:
class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def weights(self):
        weights = []
        for layer in self.layers:
            weights += layer.weights()
        return weights
        

In [4]:
model = NaiveSequential([
    NaiveDense(input_size=28*28, output_size=512, activation=tf.nn.relu),
    NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)
])

assert len(model.weights()) == 4  # 2 layers, each with weights and biases


##### 批量生成器

In [5]:
import math

class BatchGenerator:
    def __init__(self, images, labels, batch_size):
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images) / batch_size)

    def next(self):
        images = self.images[self.index:self.index + self.batch_size]
        labels = self.labels[self.index:self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels

##### 一轮训练

In [6]:
def one_train_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        loss = tf.reduce_mean(
            tf.keras.losses.sparse_categorical_crossentropy(
                labels_batch, predictions
            )
        )
    grads = tape.gradient(loss, model.weights())
    learning_rate = 1e-3
    for var, grad in zip(model.weights(), grads):
        var.assign_sub(learning_rate * grad)
    return loss

##### 完整训练

In [7]:
def fit(model, images, labels, epochs, batch_size):
    for epoch in range(epochs):
        print(f"Epoch {epoch}/{epochs}")
        batch_generator = BatchGenerator(images, labels, batch_size)
        for batch_index in range(batch_generator.num_batches):
            images_batch, labels_batch = batch_generator.next()
            loss = one_train_step(model, images_batch, labels_batch)
        print(f"Epoch {epoch + 1}, Loss: {loss.numpy():.4f}")

In [8]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28)).astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28)).astype("float32") / 255

fit(model, train_images, train_labels, epochs=5, batch_size=128)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 0/5
Epoch 1, Loss: 2.0106
Epoch 1/5
Epoch 2, Loss: 1.7608
Epoch 2/5
Epoch 3, Loss: 1.5313
Epoch 3/5
Epoch 4, Loss: 1.3413
Epoch 4/5
Epoch 5, Loss: 1.1946


##### 模型评估

In [9]:
predictions = model(test_images)
predicted_labels = tf.argmax(predictions, axis=1)
matches = tf.equal(predicted_labels, test_labels)
accuracy = tf.reduce_mean(tf.cast(matches, tf.float32))
print(f"Test accuracy: {accuracy.numpy():.4f}")

Test accuracy: 0.7604
