## 一，利用梯度磁带求导数

In [3]:
import numpy as np 
import tensorflow as tf 

x = tf.Variable(0.0)
a = tf.constant(1.0)
b = tf.constant(-2.0)
c = tf.constant(1.0)

with tf.GradientTape() as tape:
    y = a*tf.pow(x,2) + b*x +c

dy_dx = tape.gradient(y, x)
dy_dx

<tf.Tensor: shape=(), dtype=float32, numpy=-2.0>

In [5]:
with tf.GradientTape() as tape:
    tape.watch([a, b, c])
    y = a*tf.pow(x,2) + b*x + c

dy_dx, dy_da, dy_db, dy_dc = tape.gradient(y, [x, a, b, c])
dy_dx, dy_da, dy_db, dy_dc

(<tf.Tensor: shape=(), dtype=float32, numpy=-2.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1.0>)

In [6]:
with tf.GradientTape() as tape2:
    with tf.GradientTape() as tape1:
        y = a*tf.pow(x,2) + b*x + c
    dy_dx = tape1.gradient(y, x)
dy2_dx2 = tape2.gradient(dy_dx, x)

dy2_dx2

<tf.Tensor: shape=(), dtype=float32, numpy=2.0>

In [11]:
@tf.function
def f(x):
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)

    x = tf.cast(x, tf.float32)
    with tf.GradientTape() as tape:
        tape.watch(x)
        y = a*pow(x,2) + b*x + c

    dy_dx = tape.gradient(y, x)
    return (dy_dx, y)

f(tf.constant(0.0)), f(tf.constant(1.0))


((<tf.Tensor: shape=(), dtype=float32, numpy=-2.0>,
  <tf.Tensor: shape=(), dtype=float32, numpy=1.0>),
 (<tf.Tensor: shape=(), dtype=float32, numpy=0.0>,
  <tf.Tensor: shape=(), dtype=float32, numpy=0.0>))

## 二，利用梯度磁带和优化器求最小值

In [16]:
x = tf.Variable(0.0)
a = tf.constant(1.0)
b = tf.constant(-2.0)
c = tf.constant(1.0)

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
for _ in range(1000):
    with tf.GradientTape() as tape:
        y = a*pow(x, 2) + b*x + c
    dy_dx = tape.gradient(y, x)
    optimizer.apply_gradients(grads_and_vars=[(dy_dx, x)])

y, x

(<tf.Tensor: shape=(), dtype=float32, numpy=0.0>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.99999857>)

In [24]:
x = tf.Variable(0.0)

def f():
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    y = a*pow(x, 2) + b*x + c
    return y
    
optimizer = tf.keras.optimizers.SGD(learning_rate = 0.01)
for _ in range(1000):
    optimizer.minimize(f, [x])

x, y

(<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.99999857>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0>)

In [29]:
x = tf.Variable(0.0)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

@tf.function
def minimizef():
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    
    for _ in range(1000):
        with tf.GradientTape() as tape:
            tape.watch(x)
            y = a*pow(x, 2) + b*x + c
        dy_dx = tape.gradient(y, x)
        optimizer.apply_gradients(grads_and_vars=[(dy_dx, x)])
    
    y = a*pow(x, 2) + b*x + c
    return x, y

minimizef()

(<tf.Tensor: shape=(), dtype=float32, numpy=0.9999985>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0>)

In [30]:
x = tf.Variable(0.0)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

@tf.function
def f():
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    y = a*pow(x, 2) + b*x + c
    return y

@tf.function
def train(epochs):
    for _ in range(epochs):
        optimizer.minimize(f, [x])
    return x, y

train(1000)

(<tf.Tensor: shape=(), dtype=float32, numpy=0.9999985>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0>)