# Differentiation & Gradient

In [30]:
import numpy as np

import tensorflow as tf

## Tensor Basics

### Tensor Variable
- Weights(kernel) has random value & Bias with zero
- Shape is determined by the initial value. But the tensor can't reshape a vector into matrix

In [2]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1, input_shape=(1,))
])

model.variables

[<tf.Variable 'dense/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[0.76252973]], dtype=float32)>,
 <tf.Variable 'dense/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]

In [3]:
tf.Variable(initial_value=[1,2])

<tf.Variable 'Variable:0' shape=(2,) dtype=int32, numpy=array([1, 2], dtype=int32)>

In [4]:
tf.Variable(initial_value=[1,2], dtype=tf.float32)

<tf.Variable 'Variable:0' shape=(2,) dtype=float32, numpy=array([1., 2.], dtype=float32)>

In [5]:
tf.Variable(initial_value=[1, 2, 3, 4], dtype=tf.float32, shape=(2, 2))

ValueError: In this `tf.Variable` creation, the initial value's shape ((4,)) is not compatible with the explicitly supplied `shape` argument ((2, 2)).

In [None]:
tf.Variable(initial_value=[[1, 2], [3, 4]], dtype=tf.float32)

<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[1., 2.],
       [3., 4.]], dtype=float32)>

### Tensor Constant

- Constant tensor cannot be updated
- Reshape can be performed

In [None]:
tf.constant([1, 2, 3])

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 2, 3], dtype=int32)>

In [None]:
tf.constant([[1, 2], [3, 4]], shape=(2, 2))

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1, 2],
       [3, 4]], dtype=int32)>

In [None]:
tf.constant(-1, shape=(2, 2))

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[-1, -1],
       [-1, -1]], dtype=int32)>

### Tensor Operations

In [6]:
x = np.arange(25)
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [7]:
tf.constant(x)

<tf.Tensor: shape=(25,), dtype=int64, numpy=
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])>

In [8]:
tf.square(x)

<tf.Tensor: shape=(25,), dtype=int64, numpy=
array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121, 144,
       169, 196, 225, 256, 289, 324, 361, 400, 441, 484, 529, 576])>

In [9]:
tf.reshape(x, (5, 5))

<tf.Tensor: shape=(5, 5), dtype=int64, numpy=
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])>

In [10]:
tf.cast(x, tf.float32)

<tf.Tensor: shape=(25,), dtype=float32, numpy=
array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.],
      dtype=float32)>

In [11]:
tf.multiply(
    tf.constant(x),
    tf.constant(x)
)

<tf.Tensor: shape=(25,), dtype=int64, numpy=
array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121, 144,
       169, 196, 225, 256, 289, 324, 361, 400, 441, 484, 529, 576])>

In [12]:
tf.constant(x) + tf.constant(x)

<tf.Tensor: shape=(25,), dtype=int64, numpy=
array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
       34, 36, 38, 40, 42, 44, 46, 48])>

### Eager-Based

- Tensorflow works on both Eager & Graph based modes
- Eager mode is `on` by default
- Eager mode can be switched `off` and can be executed in Graph-based mode

#### Tensor Evaluation

- Since eager mode is default any print statement gets evaluated on its own

In [13]:
print(f'Hello {tf.square(5)}')

Hello 25


#### Broadcast values

In [14]:
tf.add(
    tf.constant([1, 2, 3, 4]),
    1
)

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([2, 3, 4, 5], dtype=int32)>

#### Overload Operators

In [15]:
tf.constant([1, 2, 3]) ** 2

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 4, 9], dtype=int32)>

#### Compatability with NumPy

In [16]:
np.multiply(
    tf.constant(5),
    tf.constant(5)
)

25

In [17]:
ndarray = np.ones([3, 3])
print(ndarray)

tensor = tf.multiply(ndarray, 5)
print(tensor)

tensor.numpy()

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
tf.Tensor(
[[5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]], shape=(3, 3), dtype=float64)


array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

### Evaluting Variables

In [18]:
tf.Variable(1) + 1

<tf.Tensor: shape=(), dtype=int32, numpy=2>

In [19]:
tf.Variable(1).assign_add(3)

<tf.Variable 'UnreadVariable' shape=() dtype=int32, numpy=4>

In [20]:
tf.Variable(1) \
    .assign_add(1) \
    .read_value() \
    .numpy()

2

In [21]:
class MyLayer(tf.keras.layers.Layer):

    def __init__(self):
        super(MyLayer, self).__init__()
        self.my_var = tf.Variable(10)
        self.my_list = [tf.Variable(i) for i in range(3)]

m = MyLayer()

[variable.numpy() for variable in m.variables]

[10, 0, 1, 2]

### Change Data Type

In [23]:
tf.cast(tf.constant(5), dtype=tf.float32).dtype

tf.float32

## Gradient Tape

In [25]:
x = np.arange(-1, 5)
y = 2*x - 1

y

array([-3, -1,  1,  3,  5,  7])

In [34]:
# Trainable parameters

w = tf.Variable(np.random.random(), trainable=True)
b = tf.Variable(np.random.random(), trainable=True)

# Loss Function

def simple_loss(y_pred, y_true):
    return tf.abs(y_pred- y_true)

# Learning rate
LEARNING_RATE = 0.001

# Train on data
def fit_data(x, y_true):

    with tf.GradientTape(persistent=True) as tape:
        # Make prediction
        y_pred = w * x + b

        # Compute Loss
        reg_loss = simple_loss(y_pred, y_true)
    
    # Calculate gradients
    w_gradient = tape.gradient(reg_loss, w)
    b_gradient = tape.gradient(reg_loss, b)

    # Update variables
    w.assign_sub(LEARNING_RATE * w_gradient)
    b.assign_sub(LEARNING_RATE * b_gradient)

    return reg_loss

# Trainning Loop
for i in range(1000):
    loss = fit_data(x, y)
    
    if i % 100 == 99:
        print(f'Loss {loss} @ iteration {i}')

# New weight & bias
print(f'Weight {w} Bias {b}')

Loss [2.2937498 1.6268195 0.9598893 0.2929592 0.373971  1.0409012] @ iteration 99
Loss [1.7777526  1.33482    0.8918874  0.44895482 0.00602198 0.43691063] @ iteration 199
Loss [1.3377558  1.00482    0.6718842  0.33894825 0.00601244 0.32692337] @ iteration 299
Loss [0.89775896 0.6748199  0.45188093 0.22894192 0.0060029  0.21693611] @ iteration 399
Loss [0.457762   0.34481972 0.23187757 0.11893535 0.00599289 0.10694933] @ iteration 499
Loss [0.02176547 0.0148201  0.00787473 0.00092936 0.00601578 0.01296139] @ iteration 599
Loss [0.00576544 0.00117993 0.00812519 0.01507044 0.02201557 0.02896118] @ iteration 699
Loss [0.00576544 0.00117993 0.00812519 0.01507044 0.02201557 0.02896118] @ iteration 799
Loss [0.00576544 0.00117993 0.00812519 0.01507044 0.02201557 0.02896118] @ iteration 899
Loss [0.00576544 0.00117993 0.00812519 0.01507044 0.02201557 0.02896118] @ iteration 999
Weight <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0040548> Bias <tf.Variable 'Variable:0' shape=() dty

### `persistent=True` & Higher Order Gradients

In [47]:
x = tf.Variable(3, dtype=tf.float32)

with tf.GradientTape(persistent=True) as tape:

    tape.watch(x)

    y = x * x
    z = y * y

dz_dx = tape.gradient(z, x)
dy_dx = tape.gradient(y, x)

print(dz_dx, dy_dx)

del tape # Since persistence is used tape needs to be deleted(or garbage collected) manually

tf.Tensor(108.0, shape=(), dtype=float32) tf.Tensor(6.0, shape=(), dtype=float32)


$ y = x^3 $

$ \frac{\partial y}{\partial x} = 3x^2 $

$ \frac{\partial^2 y}{\partial x^2} = 6x $

In [48]:
x = tf.Variable(3, dtype=tf.float32)

with tf.GradientTape() as tape1:
    with tf.GradientTape() as tape2:
        y = x * x * x
    dy_dx = tape2.gradient(y, x)
d2y_dx2 = tape1.gradient(dy_dx, x)

dy_dx, d2y_dx2

(<tf.Tensor: shape=(), dtype=float32, numpy=27.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=18.0>)