In [None]:
import tensorflow as tf

tf.GradientTape是在eager模式下计算梯度用的

In [None]:
x = tf.constant(3.0)
with tf.GradientTape() as g:
    g.watch(x) # track tensor
    y = x * x
dy_dx = g.gradient(y, x)

In [None]:
dy_dx

gradient(target, source)

* target 被微分的Tensor, 可以理解为loss值
* sources: Tensors 或者 Variables列表

返回：

一个列表表示各个变量的梯度值

# 案例1 模型自动求导

In [None]:
class MyModel(tf.keras.Model):
    
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__(name='my_model')
        self.num_classes = num_classes
        self.dense_1 = tf.keras.layers.Dense(32, activation='relu')
        self.dense_2 = tf.keras.layers.Dense(num_classes)
        
    def call(self, inputs):
        x = self.dense_1(inputs)
        return self.dense_2(x)

In [None]:
import numpy as np

data = np.random.random((1000, 32))
labels = np.random.random((1000, 10))

In [None]:
model = MyModel(num_classes=10)

loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

with tf.GradientTape() as tape:
    predictions = model(data)
    loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))

In [None]:
model.trainable_variables

# 案例2:使用GradientType自定义训练模型

In [126]:
class MyModel(tf.keras.Model):
    
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__(name='my_model')
        self.num_classes = num_classes
        self.dense_1 = tf.keras.layers.Dense(32, activation='relu')
        self.dense_2 = tf.keras.layers.Dense(num_classes)
        
    def call(self, inputs):
        x = self.dense_1(inputs)
        return self.dense_2(x)

In [127]:
import numpy as np

data = np.random.random((1000, 32))
labels = np.random.random((1000, 10))

In [128]:
model = MyModel(num_classes=10)

optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)

loss_fn = tf.keras.losses.CategoricalCrossentropy()

batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices((data, labels))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

In [129]:
epochs = 3
for epoch in range(epochs):
    print('Start of epoch %d' % (epoch,))
    
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        
        with tf.GradientTape() as tape:
            
            logits = model(x_batch_train, training=True)
            
            loss_value = loss_fn(y_batch_train, logits)
            
        grads = tape.gradient(loss_value, model.trainable_weights)
        
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        if step % 200 == 0:
            print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
            print('Seen so far: %s samples' % ((step + 1) * 64))

Start of epoch 0


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Training loss (for one batch) at step 0: 28.78843116760254
Seen so far: 64 samples
Start of epoch 1
Training loss (for one batch) at step 0: 19.241195678710938
Seen so far: 64 samples
Start of epoch 2
Training loss (for one batch) at step 0: 12.845794677734375
Seen so far: 64 samples


# 案例3: 加入评估函数

* 再循环开始时初始化metrics
* metric.update_state():每batch之后更新
* metric.result():需要现实metric的当前值时调用
* metric.reset_states():需要清除metric状态时重置

In [130]:
class MyModel(tf.keras.Model):
    
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__(name='my_model')
        self.num_classes = num_classes
        self.dense_1 = tf.keras.layers.Dense(32, activation='relu')
        self.dense_2 = tf.keras.layers.Dense(num_classes)
        
    def call(self, inputs):
        x = self.dense_1(inputs)
        return self.dense_2(x)

In [133]:
import numpy as np

x_train = np.random.random((1000, 32))
y_train = np.random.random((1000, 10))
x_val = np.random.random((200, 32))
y_val = np.random.random((200, 10))
x_test = np.random.random((200, 32))
y_test = np.random.random((200, 10))

optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)

loss_fn = tf.keras.losses.CategoricalCrossentropy()

train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
val_acc_metric = tf.keras.metrics.CategoricalAccuracy()

batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(batch_size)

In [134]:
model = MyModel(num_classes=10)
epochs = 3
for epoch in range(epochs):
    print('Start of epoch %d' % (epoch,))
    
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        
        with tf.GradientTape() as tape:
            
            logits = model(x_batch_train, training=True)
            
            loss_value = loss_fn(y_batch_train, logits)
            
        grads = tape.gradient(loss_value, model.trainable_weights)
        
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        train_acc_metric(y_batch_train, logits)
        
        if step % 200 == 0:
            print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
            print('Seen so far: %s samples' % ((step + 1) * 64))
            
    train_acc = train_acc_metric.result()
    print('Training acc over epoch: %s' % (float(train_acc),))
    
    train_acc_metric.reset_states()
    
    for x_batch_val, y_batch_val in val_dataset:
        val_logits = model(x_batch_val)
        
        val_acc_metric(y_batch_val, val_logits)
        
    val_acc = val_acc_metric.result()
    val_acc_metric.reset_states()
    print('Validation acc: %s' % (float(val_acc),))

Start of epoch 0


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Training loss (for one batch) at step 0: 35.33246612548828
Seen so far: 64 samples
Training acc over epoch: 0.11299999803304672
Validation acc: 0.08500000089406967
Start of epoch 1
Training loss (for one batch) at step 0: 25.33914566040039
Seen so far: 64 samples
Training acc over epoch: 0.11100000143051147
Validation acc: 0.08500000089406967
Start of epoch 2
Training loss (for one batch) at step 0: 21.316570281982422
Seen so far: 64 samples
Training acc over epoch: 0.11100000143051147
Validation acc: 0.09000000357627869


In [135]:
help(model)

Help on MyModel in module __main__ object:

class MyModel(tensorflow.python.keras.engine.training.Model)
 |  MyModel(*args, **kwargs)
 |  
 |  `Model` groups layers into an object with training and inference features.
 |  
 |  There are two ways to instantiate a `Model`:
 |  
 |  1 - With the "functional API", where you start from `Input`,
 |  you chain layer calls to specify the model's forward pass,
 |  and finally you create your model from inputs and outputs:
 |  
 |  ```python
 |  import tensorflow as tf
 |  
 |  inputs = tf.keras.Input(shape=(3,))
 |  x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs)
 |  outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x)
 |  model = tf.keras.Model(inputs=inputs, outputs=outputs)
 |  ```
 |  
 |  2 - By subclassing the `Model` class: in that case, you should define your
 |  layers in `__init__` and you should implement the model's forward pass
 |  in `call`.
 |  
 |  ```python
 |  import tensorflow as tf
 |  
 |  class MyMod