In [20]:
import numpy as np
import pandas as pd
import os
import gzip
import tensorflow as tf
from tensorflow import keras
import sklearn
import matplotlib as mpl
import matplotlib.pyplot as plt

In [21]:
# 如果不用float，很容易溢出
def f(x):
    return 3. * x ** 2 + 2. * x -1

def approximate_derivative(f, x, eps=1e-3):
    return (f(x + eps) - f(x - eps)) / (2. * eps)

print(approximate_derivative(f, 1.))

7.999999999999119


In [22]:
def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)

def approximate_gradient(g, x1, x2, eps=1e-3):
    # 固定一个
    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
    return dg_x1, dg_x2

print(approximate_gradient(g, 2., 3.))

(8.999999999993236, 41.999999999994486)


In [8]:
# TensorFlow
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    # 定义函数
    z = g(x1, x2)

# 求偏导
dz_x1 = tape.gradient(z, x1)
print(dz_x1)

# tape只能一次性使用，用过tape.gradient后，tape就会被消解
try:
    dz_x2 = tape.gradient(z, x2)
except RuntimeError as ex:
    print(ex)

tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.


In [11]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
# 如果persistent=True， 后面需要自己删除
with tf.GradientTape(persistent=True) as tape:
    z = g(x1, x2)
dz_x1 = tape.gradient(z, x1)
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)

del tape

tf.Tensor(9.0, shape=(), dtype=float32) tf.Tensor(42.0, shape=(), dtype=float32)


In [12]:
# 同时求出各变量的偏导
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)
dz_x1_x2 = tape.gradient(z, [x1, x2])
print(dz_x1_x2)

[<tf.Tensor: id=163, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=169, shape=(), dtype=float32, numpy=42.0>]


In [13]:
# 对常量偏导
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)
dz_x1_x2 = tape.gradient(z, [x1, x2])
print(dz_x1_x2)

[None, None]


In [14]:
# 关注常量的导数
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
    tape.watch(x1)
    tape.watch(x2)
    z = g(x1, x2)
dz_x1_x2 = tape.gradient(z, [x1, x2])
print(dz_x1_x2)

[<tf.Tensor: id=185, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=191, shape=(), dtype=float32, numpy=42.0>]


In [15]:
# 两个函数对一个变量求导
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
tape.gradient([z1, z2], x)  # 结果为两个导数相加

<tf.Tensor: id=214, shape=(), dtype=float32, numpy=13.0>

In [16]:
# 二阶导数  嵌套
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2])
               for inner_grad in inner_grads]
print(outer_grads)
del inner_tape
del outer_tape

[[None, <tf.Tensor: id=251, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=262, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=260, shape=(), dtype=float32, numpy=14.0>]]


In [None]:
# 模拟梯度下降
learning_rate = 0.1
x = tf.Variable(0.0)

for i in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx) 
print(x)

# 结果：<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>

In [None]:
# 结合keras中的optimizer
learning_rate = 0.1
x = tf.Variable(0.0)

optimizer = keras.optimizers.SGD(lr = learning_rate)
for i in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    # apply_gradients的参数是一个列表，列表中每个元素都是一个pair
    # pair里存储的是一个变量及其梯度，梯度在前
    optimizer.apply_gradients([(dz_dx, x)])
print(x)

# 结果：<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>