## Chapter 12 Custom models

using tensorflow like a numpy

In [7]:
import tensorflow as tf

Tensors and Operations

In [8]:
tensor_1 = tf.constant([[1.,2.,3.],[4.,5.,6.]])
tensor_1

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [9]:
tf.constant(42)

<tf.Tensor: shape=(), dtype=int32, numpy=42>

In [10]:
print(tensor_1.shape)
print(tensor_1.dtype)

(2, 3)
<dtype: 'float32'>


In [11]:
tensor_1[...,1,tf.newaxis]

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[2.],
       [5.]], dtype=float32)>

In [12]:
tensor_1 + 10

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[11., 12., 13.],
       [14., 15., 16.]], dtype=float32)>

In [13]:
tf.square(tensor_1)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 1.,  4.,  9.],
       [16., 25., 36.]], dtype=float32)>

In [14]:
tensor_1 @ tf.transpose(tensor_1)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[14., 32.],
       [32., 77.]], dtype=float32)>

Tensosrs and Numpy

In [15]:
import numpy as np

a = np.array([2., 4., 5.])

tf.constant(a)

<tf.Tensor: shape=(3,), dtype=float64, numpy=array([2., 4., 5.])>

In [16]:
tensor_1.numpy() # or np.array(tensor_1)
t = tensor_1

In [17]:
np.square(t)

array([[ 1.,  4.,  9.],
       [16., 25., 36.]], dtype=float32)

In [18]:
# Variables
v = tf.Variable([[1., 2., 3.], [4.,  5., 6.]])
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [19]:
v.assign(2 * v)

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [20]:
v[0,1].assign(42)

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [21]:
v[:, 2].assign([0., 1.])

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  0.],
       [ 8., 10.,  1.]], dtype=float32)>

In [22]:
v.scatter_nd_update(indices=[[0, 0], [1, 2]], updates=[100., 200.])

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[100.,  42.,   0.],
       [  8.,  10., 200.]], dtype=float32)>

In [23]:
tf.where([False],12.,21.)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([21.], dtype=float32)>

In [24]:
def huber_fn(y_true,y_pred): # this a huber loss
  error = y_true - y_pred
  is_small_error = tf.abs(error) < 1
  squared_loss = tf.square(error) / 2
  linear_loss = tf.abs(error) - 0.5
  return tf.where(is_small_error, squared_loss, linear_loss)

In [25]:
t

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

# Exercises

Implement a custom layer that perform layer normalization:


In [52]:
from tensorflow import keras
class CustomDense(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.activation = activation


    def build(self, batch_input_shape):
        self.alpha =  self.add_weight(
            name="alpha", shape=batch_input_shape[-1:],
            initializer="ones",
            dtype = "float32"
        )
        self.beta =  self.add_weight(
            name="beta", shape=batch_input_shape[-1:],
            initializer="zeros",
            dtype = "float32"
        )
        super().build(batch_input_shape)
    

    def call(self, X):
        mean, std = tf.nn.moments(X, axes=-1, keepdims=True)
        std = tf.math.sqrt(std)
        epsilon = 0.001
        return tf.math.multiply(self.alpha, (X - mean)) / (std + epsilon) + self.beta


In [53]:
# Custom training loop

In [85]:
from tensorflow.keras.datasets.mnist import load_data
from tensorflow.keras.utils import to_categorical
# load the dataset
(X_train, y_train), (X_test, y_test) = load_data()

In [86]:
# Normalized the data to range 0-1
X_train.astype('float32')
X_test.astype('float32')
X_train = X_train / 255.0
X_test = X_test / 255.0
y_train = to_categorical(y_train,10)
y_test = to_categorical(y_test,10)

In [87]:
l2_reg = keras.regularizers.l2(0.05)
model = keras.models.Sequential(
    [
     keras.layers.Flatten(input_shape=(28,28)),
     CustomDense(30,'elu'),
     CustomDense(30,'elu'),
     keras.layers.Dense(10,'softmax')   
    ]
)

In [88]:
# randomly sample a batch
def random_batch(X,y,batch_size=32):
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]

In [89]:
def print_status_bar(iteration, total, loss, metrics=None):
    metrics = ' - '.join(["{}: {:.4f}".format(m.name, m.result())
                         for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics,
          end=end)

In [90]:
# hyperparams
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.MeanAbsoluteError()]

In [91]:
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train,y_train)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch, training=True)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
        for metric in [mean_loss] + metrics :
            metric.reset_states() 

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
20160/60000 - mean: 0.7788 - mean_absolute_error: 0.0394(32, 10) (32, 10)
20192/60000 - mean: 0.3947 - mean_absolute_error: 0.0314(32, 10) (32, 10)
20224/60000 - mean: 0.2478 - mean_absolute_error: 0.0243(32, 10) (32, 10)
20256/60000 - mean: 0.2164 - mean_absolute_error: 0.0240(32, 10) (32, 10)
20288/60000 - mean: 0.2893 - mean_absolute_error: 0.0271(32, 10) (32, 10)
20320/60000 - mean: 0.4143 - mean_absolute_error: 0.0270(32, 10) (32, 10)
20352/60000 - mean: 0.3211 - mean_absolute_error: 0.0176(32, 10) (32, 10)
20384/60000 - mean: 0.3630 - mean_absolute_error: 0.0342(32, 10) (32, 10)
20416/60000 - mean: 0.2492 - mean_absolute_error: 0.0192(32, 10) (32, 10)
20448/60000 - mean: 0.1728 - mean_absolute_error: 0.0204(32, 10) (32, 10)
20480/60000 - mean: 0.2126 - mean_absolute_error: 0.0225(32, 10) (32, 10)
20512/60000 - mean: 0.2327 - mean_absolute_error: 0.0187(32, 10) (32, 10)
20544/60000 - mean: 0.2835 - mean_absolute_erro