In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import os
import sys
import time
import sklearn
import tensorflow as tf

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for model in mpl, np, pd, sklearn, keras:
    print(model.__name__, model.__version__)

2.0.0
sys.version_info(major=3, minor=7, micro=7, releaselevel='final', serial=0)
matplotlib 3.2.2
numpy 1.19.0
pandas 1.0.5
sklearn 0.23.1
tensorflow_core.keras 2.2.4-tf


In [3]:
# 自定义求导
# 近似求导  --- 两点x1,x2之间足够小   
def f(x):
    return 3. * x ** 2 + 2. * x -1

def approximae_derivate(f, x, eps = 1e-3):
    return (f(x + eps) - f(x- eps))/ (2.* eps)

print(approximae_derivate(f, 1.))

7.999999999999119


In [4]:
# 自定义求导
def g(x1, x2):
    return (x1 + 5) * (x2 **2)

def approximate_gradient(g, x1, x2, eps=1e-3):
    dg_x1 = approximae_derivate(lambda x1 : g(x1,x2), x1, eps)
    dg_x2 = approximae_derivate(lambda x2 : g(x1,x2), x2, eps)
    return dg_x1, dg_x2

print(approximate_gradient(g, 2., 3.))

(8.999999999993236, 41.999999999994486)


In [5]:
# tf.GradientTape   求导
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1,x2)

dz_x1 = tape.gradient(z,x1)
print(dz_x1)
try:
    dz_x2 = tape.gradient(z,x2) #tape对象执行一次之后 别销毁了  non-persistent
    print(dz_x2)
except RuntimeError as ex:
    print(ex)


tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.


In [6]:
# tf.GradientTape     persistent=True对象持久化
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as tape:
    z = g(x1,x2)

dz_x1 = tape.gradient(z,x1) # tape第一次执行
print(dz_x1)
dz_x2 = tape.gradient(z,x2) # tape第二次执行
print(dz_x2)

del tape # 销毁对象

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(42.0, shape=(), dtype=float32)


In [7]:
x1 = tf.Variable(2.0) # variable
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)
dz_x1x2 = tape.gradient(z,[x1, x2]) # 二元求导
print(dz_x1x2)

[<tf.Tensor: id=89, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=95, shape=(), dtype=float32, numpy=42.0>]


In [8]:
x1 = tf.constant(2.0) # constant
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
    tape.watch(x1) #  因为是constant 所以 watch
    tape.watch(x2)
    z = g(x1, x2)
    
dz_x1x2 = tape.gradient(z,[x1, x2]) 
print(dz_x1x2)

[<tf.Tensor: id=104, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=110, shape=(), dtype=float32, numpy=42.0>]


In [9]:
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
    
dz1z2_x = tape.gradient([z1,z2],x) # 多函数对一元变量
print(dz1z2_x)

tf.Tensor(13.0, shape=(), dtype=float32)


In [10]:
# 二阶导数
x1 = tf.Variable(2.0) 
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z,[x1,x2])
outer_grads = [outer_tape.gradient(inner_grad,[x1, x2]) for inner_grad in inner_grads] #二阶
print(outer_grads)

del outer_tape #删除对象 释放资源
del inner_tape

[[None, <tf.Tensor: id=170, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=181, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=179, shape=(), dtype=float32, numpy=14.0>]]


In [11]:
# 模拟求导更新
learning_rate = 0.1
x = tf.Variable(0.0)

for i in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_x = tape.gradient(z,x)
    x.assign_sub(learning_rate * dz_x)
print(x)


<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>


In [12]:
learning_rate = 0.1
x = tf.Variable(0.0)
optimizer = keras.optimizers.SGD(learning_rate=learning_rate) # optimizers
for i in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_x = tape.gradient(z,x)
    optimizer.apply_gradients([(dz_x,x)]) # apply_gradients
print(x)


<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
