## 自定义求导

In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.0.0-alpha0
sys.version_info(major=3, minor=6, micro=8, releaselevel='final', serial=0)
matplotlib 3.1.0
numpy 1.14.5
pandas 0.24.2
sklearn 0.21.2
tensorflow 2.0.0-alpha0
tensorflow.python.keras.api._v2.keras 2.2.4-tf


### 先回顾近似求导方法   
给定一个偏差epsilon，计算当前点左右各偏差eps的两个点，计算这两个点的斜率就是该点的导数，当eps足够小时，求得的导数也就足够精确  

In [2]:
# 定义函数 f(x) = 3x^2+2x-1
def f(x):
    return 3. * x ** 2 + 2. * x - 1

def approximate_derivative(f, x, eps=1e-3):
    return (f(x + eps) - f(x - eps)) / (2. * eps) 

print(approximate_derivative(f, 1.))

7.999999999999119


In [3]:
# 对多元函数求偏导
# g(x1, x2) = (x1 + 5) * (x2^2)
def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)

def approximate_gradient(g, x1, x2, eps=1e-3):
    """分别求g对x1，x2的偏导"""
    # 计算x1偏导时，先将x2固定
    dg_x1 = approximate_derivative(lambda x : g(x, x2), x1, eps)
    dg_x2 = approximate_derivative(lambda x : g(x1, x), x2, eps)
    return dg_x1, dg_x2

print(approximate_gradient(g, 2., 3.))

(8.999999999993236, 41.999999999994486)


### 使用tf.GradientTape实现求导

每个tape只能实现一次求导操作，需要多次使用，需要将persistent参数设置为True，但是需要手动del tape   

In [4]:
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)
with tf.GradientTape() as tape:
    # 定义目标函数
    z = g(x1, x2)
dz_x1 = tape.gradient(z, x1)
print(dz_x1)

# 定义的tape只能时使用一次，否则会抛出异常 
try:
    dz_x1 = tape.gradient(z, x2)
except RuntimeError as ex:
    print(ex)

tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.


In [5]:
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)
with tf.GradientTape(persistent=True) as tape:
    z = g(x1, x2)
dz_x1 = tape.gradient(z, x1)
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)

# 手动释放tape
del tape

tf.Tensor(9.0, shape=(), dtype=float32) tf.Tensor(42.0, shape=(), dtype=float32)


可以对两个目标函数同时求导

In [6]:
x = tf.Variable(5.)

with tf.GradientTape(persistent=True) as tape:
    z1 = 3 * x
    z2 = x ** 2
# 同时求z1和z2对x的导数，最后的结果是两个导数之和
print(tape.gradient([z1, z2], x))
del tape

tf.Tensor(13.0, shape=(), dtype=float32)


tf.GradientTape嵌套求二阶导  

In [8]:
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)
with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grads, [x1, x2])
              for inner_grad in inner_grads]
print(outer_grads)
del inner_tape
del outer_tape

[[<tf.Tensor: id=293, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=289, shape=(), dtype=float32, numpy=20.0>], [<tf.Tensor: id=358, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=354, shape=(), dtype=float32, numpy=20.0>]]


使用tf.GradientTape模拟梯度下降

In [9]:
learning_rate = 0.1
x = tf.Variable(0.0)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    # 求出导数
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx)
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>


结合keras使用tf.GradientTape

In [11]:
learning_rate = 0.1
x = tf.Variable(0.0)

opt = keras.optimizers.SGD(learning_rate)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    # optimizers.apply_gradients方法传参是一个列表，列表元素是tuple，每个tuple对应一个目标变量和它的导数
    opt.apply_gradients([(dz_dx, x)])
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
