## Computing Gradients using AutoDiff

This section is to understand how to compute gradients automatically using tensorflow

In [23]:
from tensorflow import keras
import tensorflow as tf

In [24]:
def f(w1, w2):
    return 3 * w1 ** 2 + 2 * w1 * w2

# of course we can find the derivative at specific points by approximating using a small delta
w1, w2 = 5, 3
eps = 1e-6
(f(w1 + eps, w2) - f(w1, w2)) / eps, (f(w1, w2 + eps) - f(w1, w2)) / eps

(36.000003007075065, 10.000000003174137)

In [25]:
## Using tensorflow autodiff

w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape:
    z = f(w1, w2)

gradients = tape.gradient(z, [w1, w2])

gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [26]:
# gradient is automatically ereased when gradient is called
##--> tape.gradient(z, w1)

# another way is to set persistent=True so the gradient is over ereased, but 
# need to remember to delete the tape
with tf.GradientTape(persistent=True) as tape:
    z = f(w1, w2)
    
g1 = tape.gradient(z, [w1])
g2 = tape.gradient(z, [w2])

g1, g2

del tape

In [27]:
## Normally only tracks variables. but can also be set to track constant.
## this can be useful for some case. for example, implement a regularisation loss that 
## panelises activations that very a lot when inputs vary little
c1, c2 = tf.constant(5.), tf.constant(3.)
with tf.GradientTape() as tape:
    tape.watch(c1)
    tape.watch(c2)
    z = f(c1, c2)
    
gradients = tape.gradient(z, [c1, c2])
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

## Custom Training Loops

In [28]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
housing = fetch_california_housing()

keras.backend.clear_session()

# test_size is default to 0.25
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full)

X_train = StandardScaler().fit_transform(X_train)
X_valid = StandardScaler().fit_transform(X_valid)
X_test = StandardScaler().fit_transform(X_test)

In [57]:
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input


keras.backend.clear_session()
l2_reg = keras.regularizers.l2(1.0)


model = keras.models.Sequential([
    Dense(16, input_shape = X_train.shape[1:] , activation = 'relu'),
    Dense(1)
])

model.summary()
# how we would compile and run the model
model.compile(optimizer="adam", loss = 'mse', metrics=['mse'])
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=40)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 16)                144       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 161
Trainable params: 161
Non-trainable params: 0
_________________________________________________________________
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x1b61cfae340>

In [58]:
import numpy as np
def random_batch(X, y, batch_size = 30):
    idx = np.random.randint(len(X), size = batch_size)
    return X[idx], y[idx]

def print_status_bar(iteration, total, loss, metrics = None):
    metrics = "-".join([f"{m.name}: {m.result()}"
                        for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics, end = end)

In [62]:
keras.backend.clear_session()

n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Adam(lr = 0.01)
loss_fn = keras.losses.mean_squared_error
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.MeanAbsoluteError()]

In [76]:
# 2 loops, top is for epochs, and second is for steps (see above)
for epoch in range(1, n_epochs + 1):
    print(f"Epoch: {epoch}/{n_epochs}")
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train, y_train)               # grab a random batch
        with tf.GradientTape() as tape:                                 # gradient tape created for loss
            y_pred = model(X_batch, training = True)                    # calculate prediction
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))        # calculate loss for the batch
            loss = tf.add_n([main_loss] + model.losses)                 # add other losses (for regularisation for example)
            print(main_loss, loss)
            break
        gradients = tape.gradient(loss, model.trainable_variables)      # get gradients of all trainable variables
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))  # apply gradients, smart, as this can be optimiser specific such as learning rate, momentum, etc
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
    break
    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    for metric in [mean_loss] + metrics:
        metric.reset_states()

Epoch: 1/5
tf.Tensor(1.7556337, shape=(), dtype=float32) tf.Tensor(1.7556337, shape=(), dtype=float32)


In [66]:
!git add 9.1_auto_diff.ipynb
!git commit -m "initial commit"
!git push

The file will have its original line endings in your working directory


[master 964b691] initial commit
 1 file changed, 436 insertions(+)
 create mode 100644 tensorflow/9.1_auto_diff.ipynb


To https://github.com/auslei/python.git
   61e93b1..964b691  master -> master


In [48]:
X_train.shape

(11610, 8)

In [72]:
for i in range(1,10):
    tf.add_n(i)

TypeError: 'int' object is not iterable