# 利用自动微分机制建立自定义训练模型

In [4]:
import tensorflow as tf

In [2]:
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

In [3]:
train_images = tf.expand_dims(train_images, -1)
train_images = tf.cast(train_images/255, tf.float32)
test_images = tf.expand_dims(test_images, -1)
test_images = tf.cast(test_images/255, tf.float32)
train_labels = tf.cast(train_labels, tf.int64)
test_labels = tf.cast(test_labels, tf.int64)

In [4]:
dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
dataset = dataset.shuffle(buffer_size=60000).batch(32)

test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
test_dataset = test_dataset.batch(32)

In [5]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(16, [3, 3], input_shape=(None, None, 1), activation="relu"))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Conv2D(32, [3, 3], activation="relu"))
model.add(tf.keras.layers.MaxPooling2D())

model.add(tf.keras.layers.Conv2D(64, (3,3), activation="relu"))
model.add(tf.keras.layers.Conv2D(128, (3,3), activation="relu"))
model.add(tf.keras.layers.MaxPooling2D())

model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(tf.keras.layers.Dense(128, activation="relu"))
model.add(tf.keras.layers.Dense(64, activation="relu"))
model.add(tf.keras.layers.Dense(10))

In [6]:
optimizer = tf.keras.optimizers.Adam()

In [7]:
# from_logits为true表示网络的直接输出，没经过sigmoid或者softmax的概率化
loss_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [8]:
train_loss = tf.keras.metrics.Mean()
train_acc = tf.keras.metrics.SparseCategoricalAccuracy()

test_loss = tf.keras.metrics.Mean()
test_acc = tf.keras.metrics.SparseCategoricalAccuracy()

In [9]:
def train_step(model, x, y):
    with tf.GradientTape() as t:
        pred = model(x)
        loss_step = loss_func(y, pred)
    grads = t.gradient(loss_step, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    train_loss(loss_step)
    train_acc(y, pred)

In [10]:
def test_step(model, x, y):
    pred = model(x)
    loss_step = loss_func(y, pred)
    test_loss(loss_step)
    test_acc(y, pred)

In [11]:
def train():
    for epoch in range(10):
        for(batch, (x, y)) in enumerate(dataset):
            train_step(model, x, y)
        for(batch, (x, y)) in enumerate(test_dataset):
            test_step(model, x, y)
        print("Epoch {} loss is {}, accuracy is {}, test_loss is {}, test_acc is {}".format(
            epoch, 
            train_loss.result(), 
            train_acc.result(), 
            test_loss.result(),
            test_acc.result()
        ))
        train_loss.reset_states()
        train_acc.reset_states()
        test_loss.reset_states()
        test_acc.reset_states()

In [12]:
train()

Epoch 0 loss is 0.3330059349536896, accuracy is 0.8883166909217834, test_loss is 0.08115518093109131, test_acc is 0.975600004196167
Epoch 1 loss is 0.08101185411214828, accuracy is 0.9747999906539917, test_loss is 0.06435876339673996, test_acc is 0.9797000288963318


KeyboardInterrupt: 

### 利用梯度磁带和优化器求最小值

In [79]:
x = tf.Variable(0.0,name = "x",dtype = tf.float32)
a = tf.constant(1.0)
b = tf.constant(-2.0)
c = tf.constant(1.0)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
for _ in range(1):
    with tf.GradientTape() as tape:
        y = a*tf.pow(x,2) + b*x + c
    dy_dx = tape.gradient(y,x)
    optimizer.apply_gradients(grads_and_vars=[(dy_dx, x)])

0
0.02
