In [1]:
import tensorflow as tf

### $$y = 2x^2$$
### $$\frac{dy}{dx} = 4x$$

In [2]:
# tensorflow的梯度带一般只能对变量求导，这样是为了优化训练过程
x = tf.Variable(3.0)

with tf.GradientTape() as Tape:
    y = 2 * x**2

d_yx = Tape.gradient(y, x)
print (d_yx)

tf.Tensor(12.0, shape=(), dtype=float32)


In [3]:
# 对常量求导需要用到tape.watch
x = tf.constant(3.0)

with tf.GradientTape() as Tape:
    y = 2 * x**2
d_yx = Tape.gradient(y, x)
print (d_yx)

with tf.GradientTape() as Tape:
    Tape.watch(x)
    y = 2 * x**2
d_yx = Tape.gradient(y, x)
print (d_yx)

None
tf.Tensor(12.0, shape=(), dtype=float32)


### $$y=2x^2$$
### $$\frac{d^2y}{dx^2} = 4$$

In [4]:
# 还可以求高阶导数
x = tf.Variable(3.0)

with tf.GradientTape() as Tape1:
    with tf.GradientTape() as Tape2:
        y = 2 * x**2
    d_yx = Tape2.gradient(y, x)
d2_yx2 = Tape1.gradient(d_yx, x)
print (d2_yx2)

tf.Tensor(4.0, shape=(), dtype=float32)


如何用优化器进行梯度下降，这里用SGD优化举例。

根据上一节提出的公式：参数的变化量为：

### $$\Delta w = -lr * \triangledown w$$

In [5]:
learning_rate = 0.01
SGD_opt = tf.keras.optimizers.SGD(learning_rate)

x = tf.Variable(3.0)
with tf.GradientTape() as Tape:
    y = 2 * x**2
d_y = Tape.gradient(y, x)
print('用公式推出的新x：', x - learning_rate * d_y)

SGD_opt.apply_gradients(zip([d_y], [x]))
print('用优化器更新得到的x：', x)

用公式推出的新x： tf.Tensor(2.88, shape=(), dtype=float32)
用优化器更新得到的x： <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.88>


接下来，我们来看看如何将梯度带用到模型的训练中，然后有请我们的老朋友MNIST。。

In [6]:
import input_data

dataset = input_data.read_data_sets('mnist_data', one_hot=True)

input_ = tf.keras.Input(shape=(784, ))
dense = tf.keras.layers.Dense(128, activation='relu')(input_)
out = tf.keras.layers.Dense(10, activation='softmax')(dense)

model = tf.keras.Model(inputs=input_, outputs=out)
optimizor = tf.keras.optimizers.Adam()

total_steps = 2000
batch_size = 128
print('开始训练')
for i in range(total_steps):
    train_image, train_label = dataset.train.next_batch(batch_size)
    with tf.GradientTape() as tape:
        out = model(train_image)
        loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(train_label, out))
    grad = tape.gradient(loss, model.trainable_variables)
    optimizor.apply_gradients(zip(grad, model.trainable_variables))
    if i % 100 == 0:
        print ('\rsteps: %d, losses: %f' %(i, loss.numpy()), end='')
        

Extracting mnist_data\train-images-idx3-ubyte.gz
Extracting mnist_data\train-labels-idx1-ubyte.gz
Extracting mnist_data\t10k-images-idx3-ubyte.gz
Extracting mnist_data\t10k-labels-idx1-ubyte.gz
开始训练
steps: 1900, losses: 0.054487