# Auto Gradient in TF
## Tensors & Variables in TF for Gradient Descent

M. Amintoosi

In [24]:
import tensorflow as tf

<div dir="rtl">
تفاوت Tensor و Variable:

1. tf.Tensor: معمولاً به عنوان ورودی یا خروجی به لایه‌ها و عملگرها مورد استفاده قرار می‌گیرد.

2. tf.Variable: معمولاً برای نگهداری وزن‌ها و بایاس‌ها در شبکه‌های عصبی استفاده می‌شود. یک متغیر قابل تغییر است ولی با استفاده از عملگرهای خاص به روزرسانی میشود. 
</div>

## Auto gradient using Tensors

In [25]:
x = tf.convert_to_tensor(2.0)
print(x)
with tf.GradientTape() as tape:
    tape.watch(x)
    y = 3 * x ** 2 + 1
grad_of_y_wrt_x = tape.gradient(y, x) # dy_dx
print("x = {:-5.2f}, y = {:-5.2f}, dy_dx = {:-5.2f} ".format(x.numpy(), y.numpy(), grad_of_y_wrt_x.numpy()))


tf.Tensor(2.0, shape=(), dtype=float32)
x =  2.00, y = 13.00, dy_dx = 12.00 


## Auto gradient using Variables

In [26]:
x = tf.Variable(2.0)
print(x)
with tf.GradientTape() as tape:
    y = 3 * x ** 2 + 1
grad_of_y_wrt_x = tape.gradient(y, x)
print("x = {:-5.2f}, y = {:-5.2f}, dy_dx = {:-5.2f} ".format(x.numpy(), y.numpy(), grad_of_y_wrt_x.numpy()))

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0>
x =  2.00, y = 13.00, dy_dx = 12.00 


تبدیل مقادیر عددی معمولی  به تنسور

In [27]:
x = 2.0
y = 3 * x ** 2 + 1
w = 0.5
x, y, w = [tf.convert_to_tensor(float(a)) for a in [x, y, w]]
x, y, w

(<tf.Tensor: shape=(), dtype=float32, numpy=2.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=13.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.5>)

<div dir="rtl">

## الگوریتم گرادیان کاهشی برای پیدا کردن کمینه‌ی تابع

هدف: پیدا کردن کمینه تابع 
زیر در حول و حوش صفر.
</div>

$$
\large y = 3x^2+1
$$

<div dir = "rtl">
کافیست از یک نقطه
 (ایکس)
 تصادفی شروع و در خلاف جهت مشتق تابع حرکت کنیم
</div>

$$
\large
\begin{aligned}
x = x - \eta \frac{\textrm{dy}}{\textrm{d}x}
\end{aligned}
$$

In [28]:
x = tf.convert_to_tensor(3.0) # Strating Point
for _ in range(25):
    with tf.GradientTape() as tape:
        tape.watch(x)
        y = 3 * x ** 2 + 1
    grad_loss = tape.gradient(y, x) # dy_dx
    print("x = {:-5.2f}, y = {:-5.2f}, dy_dx = {:-5.2f} ".format(x.numpy(), y.numpy(), grad_loss.numpy()))
    if y.numpy()<1.0001:
        break
    x -= 0.1 * grad_loss

x =  3.00, y = 28.00, dy_dx = 18.00 
x =  1.20, y =  5.32, dy_dx =  7.20 
x =  0.48, y =  1.69, dy_dx =  2.88 
x =  0.19, y =  1.11, dy_dx =  1.15 
x =  0.08, y =  1.02, dy_dx =  0.46 
x =  0.03, y =  1.00, dy_dx =  0.18 
x =  0.01, y =  1.00, dy_dx =  0.07 
x =  0.00, y =  1.00, dy_dx =  0.03 


<div dir="rtl">

 ## الگوریتم گرادیان کاهشی برای پیدا کردن ضرايب بهینه معادله

در مدل زیر 
$x$
و
$y$
معلوم هستند، هدف پیدا کردن  ضریب 
$x$
هست:
</div>

$$
\large y = 2x 
$$

<div dir="rtl">

به فرض
$x\times w$
خروجی مدل ما ( که می‌تواند یک نورون باشد) هست،‌ به دنبال 
$w$ی بهینه هستیم؛
یعنی آنی که خطای زیر رو کمینه کند:

</div>

$$
\large error = wx - y
$$

$$
\large
\begin{aligned}
loss &= {error}^2
= (w x - y)^2
\end{aligned}
$$

<div dir = "rtl">
کافیست از یک نقطه (دبلیو) تصادفی شروع و در خلاف جهت مشتق تابع ضرر حرکت کنیم
</div>

$$
\large
\begin{aligned}
w = w - \eta \frac{\textrm{d}loss}{\textrm{d}{w}}
\end{aligned}
$$

In [29]:
x = tf.convert_to_tensor(3.0)
y = tf.convert_to_tensor(2*x)

w = tf.convert_to_tensor(0.5) # staring point
for _ in range(20):
    with tf.GradientTape() as tape:
        tape.watch(w)
        loss = (w * x - y) ** 2
    grad_loss = tape.gradient(loss, w) # d_loss/d_w
    print("Loss = {:-5.2f},  w = {:-5.2f} ".format(loss.numpy(), w.numpy()))
    w -= 0.01 * grad_loss


Loss = 20.25,  w =  0.50 
Loss = 13.62,  w =  0.77 
Loss =  9.16,  w =  0.99 
Loss =  6.16,  w =  1.17 
Loss =  4.14,  w =  1.32 
Loss =  2.78,  w =  1.44 
Loss =  1.87,  w =  1.54 
Loss =  1.26,  w =  1.63 
Loss =  0.85,  w =  1.69 
Loss =  0.57,  w =  1.75 
Loss =  0.38,  w =  1.79 
Loss =  0.26,  w =  1.83 
Loss =  0.17,  w =  1.86 
Loss =  0.12,  w =  1.89 
Loss =  0.08,  w =  1.91 
Loss =  0.05,  w =  1.92 
Loss =  0.04,  w =  1.94 
Loss =  0.02,  w =  1.95 
Loss =  0.02,  w =  1.96 
Loss =  0.01,  w =  1.97 


## Using tf.Variable

In [30]:
# x = tf.convert_to_tensor(3.0)
# y = tf.convert_to_tensor(2*x)
# w = tf.Variable(0.5)

x = tf.constant(3.0)
y = tf.constant(2*x)
w = tf.Variable(0.5)
for _ in range(20):
    with tf.GradientTape() as tape:
        # tape.watch(w) # <- This is commented
        loss = (w * x - y) ** 2
    grad_loss = tape.gradient(loss, w)
    print("Loss = {:-5.2f},  w = {:-5.2f} ".format(loss.numpy(), w.numpy()))
    w.assign_sub(0.01 * grad_loss)

Loss = 20.25,  w =  0.50 
Loss = 13.62,  w =  0.77 
Loss =  9.16,  w =  0.99 
Loss =  6.16,  w =  1.17 
Loss =  4.14,  w =  1.32 
Loss =  2.78,  w =  1.44 
Loss =  1.87,  w =  1.54 
Loss =  1.26,  w =  1.63 
Loss =  0.85,  w =  1.69 
Loss =  0.57,  w =  1.75 
Loss =  0.38,  w =  1.79 
Loss =  0.26,  w =  1.83 
Loss =  0.17,  w =  1.86 
Loss =  0.12,  w =  1.89 
Loss =  0.08,  w =  1.91 
Loss =  0.05,  w =  1.92 
Loss =  0.04,  w =  1.94 
Loss =  0.02,  w =  1.95 
Loss =  0.02,  w =  1.96 
Loss =  0.01,  w =  1.97 


# حل مثال کمینه سازی تابع اول با کراس

In [31]:
x = tf.Variable(3.0)
sgd = tf.keras.optimizers.SGD(learning_rate=0.1)
loss = lambda: 3 * x ** 2 + 1
for _ in range(10):
    print("Loss = {:-5.2f},  x = {:-5.2f} ".format(loss().numpy(), x.numpy()))
    sgd.minimize(loss, var_list=[x])


Loss = 28.00,  x =  3.00 
Loss =  5.32,  x =  1.20 
Loss =  1.69,  x =  0.48 
Loss =  1.11,  x =  0.19 
Loss =  1.02,  x =  0.08 
Loss =  1.00,  x =  0.03 
Loss =  1.00,  x =  0.01 
Loss =  1.00,  x =  0.00 
Loss =  1.00,  x =  0.00 
Loss =  1.00,  x =  0.00 


مشتق‌گیری برحسب ایکس به جای دبلیو

In [32]:
x = tf.convert_to_tensor(3.0)
y = tf.convert_to_tensor(2*x)
w = tf.convert_to_tensor(0.5)
for _ in range(100):
    with tf.GradientTape() as tape:
        tape.watch(x)
        loss = (w * x - y) ** 2
    grad_loss = tape.gradient(loss, x)
    print("Loss = {:-5.2f},  x = {:-5.2f} ".format(loss.numpy(), x.numpy()))
    x -= 0.1 * grad_loss


Loss = 20.25,  x =  3.00 
Loss = 18.28,  x =  3.45 
Loss = 16.49,  x =  3.88 
Loss = 14.89,  x =  4.28 
Loss = 13.43,  x =  4.67 
Loss = 12.12,  x =  5.04 
Loss = 10.94,  x =  5.38 
Loss =  9.88,  x =  5.71 
Loss =  8.91,  x =  6.03 
Loss =  8.04,  x =  6.33 
Loss =  7.26,  x =  6.61 
Loss =  6.55,  x =  6.88 
Loss =  5.91,  x =  7.14 
Loss =  5.34,  x =  7.38 
Loss =  4.82,  x =  7.61 
Loss =  4.35,  x =  7.83 
Loss =  3.92,  x =  8.04 
Loss =  3.54,  x =  8.24 
Loss =  3.20,  x =  8.43 
Loss =  2.88,  x =  8.60 
Loss =  2.60,  x =  8.77 
Loss =  2.35,  x =  8.93 
Loss =  2.12,  x =  9.09 
Loss =  1.91,  x =  9.23 
Loss =  1.73,  x =  9.37 
Loss =  1.56,  x =  9.50 
Loss =  1.41,  x =  9.63 
Loss =  1.27,  x =  9.75 
Loss =  1.15,  x =  9.86 
Loss =  1.03,  x =  9.97 
Loss =  0.93,  x = 10.07 
Loss =  0.84,  x = 10.16 
Loss =  0.76,  x = 10.26 
Loss =  0.69,  x = 10.34 
Loss =  0.62,  x = 10.43 
Loss =  0.56,  x = 10.51 
Loss =  0.50,  x = 10.58 
Loss =  0.45,  x = 10.65 
Loss =  0.41

کمبنه‌سازی فوق با کمینه‌ساز کراس

In [36]:
x = tf.Variable(3.0)
y = tf.convert_to_tensor(2*x)
w = tf.convert_to_tensor(0.5)

sgd = tf.keras.optimizers.SGD(learning_rate=0.1)
loss = lambda : (w * x - y) ** 2 
for _ in range(100):
    print("Loss = {:-5.2f},  x = {:-5.2f},  y = {:-5.2f},  w = {:-5.2f} "\
          .format(loss().numpy(), x.numpy(), y.numpy(), w.numpy()))
    sgd.minimize(loss, var_list=[x])


Loss = 20.25,  x =  3.00,  y =  6.00,  w =  0.50 
Loss = 18.28,  x =  3.45,  y =  6.00,  w =  0.50 
Loss = 16.49,  x =  3.88,  y =  6.00,  w =  0.50 
Loss = 14.89,  x =  4.28,  y =  6.00,  w =  0.50 
Loss = 13.43,  x =  4.67,  y =  6.00,  w =  0.50 
Loss = 12.12,  x =  5.04,  y =  6.00,  w =  0.50 
Loss = 10.94,  x =  5.38,  y =  6.00,  w =  0.50 
Loss =  9.88,  x =  5.71,  y =  6.00,  w =  0.50 
Loss =  8.91,  x =  6.03,  y =  6.00,  w =  0.50 
Loss =  8.04,  x =  6.33,  y =  6.00,  w =  0.50 
Loss =  7.26,  x =  6.61,  y =  6.00,  w =  0.50 
Loss =  6.55,  x =  6.88,  y =  6.00,  w =  0.50 
Loss =  5.91,  x =  7.14,  y =  6.00,  w =  0.50 
Loss =  5.34,  x =  7.38,  y =  6.00,  w =  0.50 
Loss =  4.82,  x =  7.61,  y =  6.00,  w =  0.50 
Loss =  4.35,  x =  7.83,  y =  6.00,  w =  0.50 
Loss =  3.92,  x =  8.04,  y =  6.00,  w =  0.50 
Loss =  3.54,  x =  8.24,  y =  6.00,  w =  0.50 
Loss =  3.20,  x =  8.43,  y =  6.00,  w =  0.50 
Loss =  2.88,  x =  8.60,  y =  6.00,  w =  0.50 
