In [1]:
import matplotlib as mpl #画图用的库
import matplotlib.pyplot as plt
#下面这一句是为了可以在notebook中画图
%matplotlib inline
import numpy as np
import sklearn   #机器学习算法库
import pandas as pd #处理数据的库   
import os
import sys
import time
import tensorflow as tf
 
from tensorflow import keras   #使用tensorflow中的keras
#import keras #单纯的使用keras
 
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, sklearn, pd, tf, keras:
    print(module.__name__, module.__version__)

2.0.0
sys.version_info(major=3, minor=6, micro=9, releaselevel='final', serial=0)
matplotlib 3.1.2
numpy 1.18.0
sklearn 0.21.3
pandas 0.25.3
tensorflow 2.0.0
tensorflow_core.keras 2.2.4-tf


In [2]:
############对一元函数进行求导############
def f(x):
    return 3. * x ** 2 + 2. * x - 1

#近似求导函数
def approximate_derivative(f, x, eps = 1e-3):#传入函数f,x,eps用来求导数的
    return (f(x+eps) - f(x-eps)) / (2. * eps)

print(approximate_derivative(f, 1.))

7.999999999999119


In [3]:
############对二元函数进行求导############
def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)
def approximate_gradient(g, x1, x2, eps=1e-3):
    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)#lambda表达式生成的函数(x2是固定值)
    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)#lambda表达式生成的函数(x1是固定值)
    return dg_x1, dg_x2

print(approximate_gradient(g, 2., 3.))
    

(8.999999999993236, 41.999999999994486)


In [8]:
##### 变量求导1 #####

x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)

dz_x1 = tape.gradient(z, x1)#tape求对x1的偏导
print(dz_x1)


#没有被保存的tape只能被使用一次，所以这里会报异常
try :
    dz_x2 = tape.gradient(z, x2)
except RuntimeError as ex:
    print(ex)


with tf.GradientTape() as tape:
    z = g(x1, x2)
dz_x2 = tape.gradient(z, x2)
print(dz_x2)

tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.
tf.Tensor(42.0, shape=(), dtype=float32)


In [11]:
##### 变量求导2 #####

x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
# persistent=True表示 tape会被保存，所以我们需要手动删除，系统不会自动释放
with tf.GradientTape(persistent=True) as tape:
    z = g(x1, x2)

dz_x1 = tape.gradient(z, x1)#tape求对x1的偏导
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)

del tape#手动删除tape

tf.Tensor(9.0, shape=(), dtype=float32) tf.Tensor(42.0, shape=(), dtype=float32)


In [12]:
##### 变量求导3 #####

x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
# persistent=True表示 tape会被保存，所以我们需要手动删除，系统不会自动释放
with tf.GradientTape() as tape:
    z = g(x1, x2)

dz_x1x2 = tape.gradient(z, [x1, x2])#tape求对x1的偏导
print(dz_x1x2)

[<tf.Tensor: id=288, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=294, shape=(), dtype=float32, numpy=42.0>]


In [13]:
#常量求导
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
# persistent=True表示 tape会被保存，所以我们需要手动删除，系统不会自动释放
with tf.GradientTape() as tape:
    tape.watch(x1)
    tape.watch(x2)
    z = g(x1, x2)

dz_x1x2 = tape.gradient(z, [x1, x2])#tape求对 x1、x2的偏导，所以这里有两个输出结果
print(dz_x1x2)

[<tf.Tensor: id=303, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=309, shape=(), dtype=float32, numpy=42.0>]


In [15]:
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
tape.gradient([z1, z2], x)# 求z1、z2 对 x 的导数，然后求和

<tf.Tensor: id=355, shape=(), dtype=float32, numpy=13.0>

In [17]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:#定义外层tape
    with tf.GradientTape(persistent=True) as inter_tape:
        z=g(x1, x2)
    inter_grads = inter_tape.gradient(z, [x1, x2]) #求对 x1、x2 的偏导，所以这里有两个输出结果
outer_grads = [outer_tape.gradient(inter_grad, [x1, x2]) for inter_grad in inter_grads] #对inter_grads每个结果求偏导

print(outer_grads)
del inter_grads
del outer_grads

[[None, <tf.Tensor: id=444, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=455, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=453, shape=(), dtype=float32, numpy=14.0>]]


In [19]:
#模拟梯度下降
learning_rate = 0.1
x = tf.Variable(0.0)

for _ in range(100):
    with tf.GradientTape() as tape:
        z=f(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx) #对x进行更新 x = x - learning_rate * dz_dx
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>


In [20]:
#模拟梯度下降并结合optimizer使用
learning_rate = 0.1
x = tf.Variable(0.0)

optimizer = keras.optimizers.SGD(lr=learning_rate)

for _ in range(100):
    with tf.GradientTape() as tape:
        z=f(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])

print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
