<a href="https://colab.research.google.com/github/dbaweja/dbaweja.github.io/blob/main/TensorFlow_Advacned.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np

### 5. Reshaping and Broadcasting

#### 5.1. Reshaping

In [None]:
# Reshaping changes the dimensions or axes and assigns the cell values in proper locations in the new tensor
x = tf.Variable([[-3, 2, 4], [2, 0, -2], [5, 2, 3]], dtype=tf.float32)
print("Initial tensor:", x)
x_reshaped = tf.reshape(x, shape=(9, 1)) # a (3, 3) tensor can be reshaped to (9, 1) amd (1, 9) tensors only
print("Reshaped tensor:", x_reshaped)

Initial tensor: <tf.Variable 'Variable:0' shape=(3, 3) dtype=float32, numpy=
array([[-3.,  2.,  4.],
       [ 2.,  0., -2.],
       [ 5.,  2.,  3.]], dtype=float32)>
Reshaped tensor: tf.Tensor(
[[-3.]
 [ 2.]
 [ 4.]
 [ 2.]
 [ 0.]
 [-2.]
 [ 5.]
 [ 2.]
 [ 3.]], shape=(9, 1), dtype=float32)


In [None]:
# tf.reshape raises exception if the new shape is incompatible
try:
    x_reshape = tf.reshape(x, shape=(3, 4))
except Exception as e:
    print("Exception raised while reshaping:\n", str(e))

Exception raised while reshaping:
 Input to reshape is a tensor with 9 values, but the requested shape has 12 [Op:Reshape]


#### 5.2. Broadcasting

##### Mathematical operations on tensors requires shape compatibility otherwise exception is raised. However, broadcasting can resolve shape incompatibilities in few situations. Let us see an example.

In [None]:
x = tf.Variable([[3, 2, 4], [2, 0, 2], [4, 2, 3]], dtype=tf.float32)
k = tf.Variable([2.1])

print("Shape of x:", x.shape)
print("Shape of k:", k.shape)

# element-wise multiplication by broadcasting which repeats the only element in k along both the axes
# to resolve the shape imcompatibility
y = x * k
print("Shape of y:", y)

Shape of x: (3, 3)
Shape of k: (1,)
Shape of y: tf.Tensor(
[[6.2999997 4.2       8.4      ]
 [4.2       0.        4.2      ]
 [8.4       4.2       6.2999997]], shape=(3, 3), dtype=float32)


In [None]:
# Let us see another example
k = tf.Variable([2.1, 3.0, -1.4])

# in this case broadcasting repeats [2.1, 3.0, -1.4] three times along the row to make the shapes compatible
y = x * k
print("y:", y)

y: tf.Tensor(
[[ 6.2999997  6.        -5.6      ]
 [ 4.2        0.        -2.8      ]
 [ 8.4        6.        -4.2      ]], shape=(3, 3), dtype=float32)


In [None]:
# Broadcasting of rank 2 tensor for multiplying with rank 3 tensor

# Rank 2 tensor to be broadcasted
k = tf.Variable([[2.1, 3.0, -1.4], [1.1, 3.2, -1.1], [0.67, 2.1, -0.03]])

# Rank 3 tensor
x = tf.Variable([[[3, 2, 4],
                  [2, 0, 2],
                  [4, 2, 3]],
                 [[0, -2, 1],
                  [4, 0, 2],
                  [6, -2, 3]]
                 ], dtype=tf.float32)

# Here shape of x is (2, 3, 3) and that of k is (3, 3). Hence broadcasting will add a new axes and repeat k along it.
y = x * k
print("y:", y)

y: tf.Tensor(
[[[ 6.2999997  6.        -5.6      ]
  [ 2.2        0.        -2.2      ]
  [ 2.68       4.2       -0.09     ]]

 [[ 0.        -6.        -1.4      ]
  [ 4.4        0.        -2.2      ]
  [ 4.02      -4.2       -0.09     ]]], shape=(2, 3, 3), dtype=float32)


In [None]:
# Alternatively, we can add the new dimension before multiplying
print("Old shape:", k.shape)
k = tf.expand_dims(k, axis=0) # insert a new axis along the first dimension to create a rank 3 tensor
print("New shape:", k.shape)

y = x * k
print("y:", y)

Old shape: (3, 3)
New shape: (1, 3, 3)
y: tf.Tensor(
[[[ 6.2999997  6.        -5.6      ]
  [ 2.2        0.        -2.2      ]
  [ 2.68       4.2       -0.09     ]]

 [[ 0.        -6.        -1.4      ]
  [ 4.4        0.        -2.2      ]
  [ 4.02      -4.2       -0.09     ]]], shape=(2, 3, 3), dtype=float32)


### 6. Automatic Differentiation

#### Gradients or partial derivatives are useful for training linear models and neural networks using gradient-based optimization
#### TensorFlow's tf.GradientTape records computations and computes gradients in reverse-mode differentiation.
#### Let us see how it works.

####  
#### 6.1 How to use tf.GradientTape?

In [None]:
# declare tensor for with respect to which gradient is required
x = tf.Variable(3.0)

# using tf.GradientTape context to perform computations and record them in tape
with tf.GradientTape() as tape:
    y = 3.0 * tf.math.log(x)

# compute gradients
dy_dx = tape.gradient(y, x) # for y = log(x), dy/dx = 1/x
print("Gradient dy_dx:", dy_dx) # alternately doing dy_dx.numpy() returns the result as a numpy array which in this case is a single value

Gradient dy_dx: tf.Tensor(1.0, shape=(), dtype=float32)


In [None]:
# for multi-variate functions, gradients are partial derivatives
x = tf.Variable(3.0)
y = tf.Variable(4.0)

with tf.GradientTape() as tape:
    r = x**2 + y**2

dr_dx, dr_dy = tape.gradient(r, [x, y]) # the first argument is the target function and the second one is a list of variables
print("dr_dx:", dr_dx.numpy()) # ∂r/∂x = 2x
print("dr_dy:", dr_dy.numpy()) # ∂r/∂y = 2y

dr_dx: 6.0
dr_dy: 8.0


In [None]:
# tf.GradientTape supports the Chain Rule of Differentiation as well
x = tf.Variable(3.0)

with tf.GradientTape() as tape:
    y = x ** 2 # y is a function of x
    r = y ** 2 # r is a function of y

dr_dx = tape.gradient(r, x) # Chain rule is applied to compute dr_dx as r is a function of x through y
print("dr_dx:", dr_dx.numpy()) # dr/dx = dr/dy × dy/dx = 2y × 2x = 2(x²) × 2x = 4x³

dr_dx: 108.0


#### Note that the intermediate variable 'y' is released outside the context of tf.GradientTape. To compute gradient dr/dy, tf.GradientTape
#### needs to be persistent

In [None]:
# Persistent tf.GradientTape
x = tf.Variable(3.0)

with tf.GradientTape(persistent=True) as tape:
    y = x ** 2
    r = y ** 2

dr_dx = tape.gradient(r, x)
dr_dy = tape.gradient(r, y)

print("dr_dx:", dr_dx.numpy())
print("dr_dy:", dr_dy.numpy())

dr_dx: 108.0
dr_dy: 18.0


####  
#### 6.2 How to use tf.GradientTape for model training?

In [None]:
# Let us see this in action in the context of Logistic Regression

# Points to note:
# 1. For logistic regression, the probabilty P(y=1|X) = 1/[1+exp{-(WX+b)}]; where W & b are weights and bias of the model
# 2. The loss function is L = -ylog[P(y=1|X)] - (1-y)log[1-P(y=1|X)] otherwise known as binary log loss.
# 3. For training the model using Stochastic Gradient Descent, we need to compute ∂L/∂W and ∂L/∂b.

# Let us see how we can compute the gradients using tf.GradientTape

In [None]:
# declare weights and bias of the model
num_features = 10 # the model has 10 features
W = tf.Variable(tf.random.normal(shape=(10, ))) # wrapping the random numbers with tf.Variable to enable gradients on W
b = tf.Variable(tf.random.normal(shape=(1, )))  # wrapping the random numbers with tf.Variable to enable gradients on b

# let us initialize a training samples
num_samples = 100

# generate training samples randomly and wrap them up in tf.constant to disable gradient computation on samples
X = tf.constant(tf.random.uniform(shape=(100, 10)))
y = tf.constant(tf.cast(tf.greater(tf.random.uniform(shape=(100,)), 0.80), dtype=tf.float32)) # target must be 0 and 1

In [None]:
# compute gradients
with tf.GradientTape() as tape:
    log_odds = tf.reduce_sum(W * X, axis=1) + b
    # tf.reduce_sum(W * X, axis=1) results in a (1, ) vector to which b is added

    probas = tf.sigmoid(log_odds) # using sigmoid function available in TensorFlow

    loss = tf.reduce_mean(-y*tf.math.log(probas) -(1.-y)*tf.math.log(1.-probas))

[dloss_dW, dloss_db]  = tape.gradient(loss, [W, b])

print("dloss_dW:", dloss_dW.numpy())
print("dloss_db:", dloss_db.numpy())

dloss_dW: [0.20875391 0.18559754 0.20375727 0.17207277 0.16018161 0.23883408
 0.16166784 0.17670268 0.15549302 0.14451382]
dloss_db: [0.3515248]


####  
#### 6.3 How to disable gradient computation for selected variables?

In [None]:
# let us use an earlier example to show how to disable gradient computation
# for multi-variate functions, gradients are partial derivatives
x = tf.Variable(3.0, trainable=True)  # by default, trainable=True so we did npt mention it earlier
y = tf.Variable(4.0, trainable=False) # trainable=False signals tf.GradientTape to stop tracking the variable

with tf.GradientTape() as tape:
    r = x**2 + y**2

try:
    dr_dx, dr_dy = tape.gradient(r, [x, y])
    print("dr_dx:", dr_dx.numpy())
    print("dr_dy:", dr_dy.numpy()) # dr_dy is None hence this line would raise an exception
except Exception as e:
    print("Error occurred during gradient computation:\n", str(e))

dr_dx: 6.0
Error occurred during gradient computation:
 'NoneType' object has no attribute 'numpy'
