In [1]:
import tensorflow as tf
import numpy as np

# Tensorflow

### Constants

In [31]:
# Example - a (2,3) matrix
t = tf.constant([
    [1., 2., 3.], 
    [4., 5., 6.]
  ])
print (t.shape)
print (t.dtype)

# Indexing (same as numpy)
print (t[: ,1:])

# ... means all preceding dimensions (all rows)
# 1 - second column
# tf.newaxis - add a new dimension to the result
print (t[..., 1, tf.newaxis])

# addition example (not in-place, returns a result tensor)
print (t + 10)

# dot product
print (t @ tf.transpose(t))

(2, 3)
<dtype: 'float32'>
tf.Tensor(
[[2. 3.]
 [5. 6.]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[2.]
 [5.]], shape=(2, 1), dtype=float32)
tf.Tensor(
[[11. 12. 13.]
 [14. 15. 16.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[14. 32.]
 [32. 77.]], shape=(2, 2), dtype=float32)


### Tensors and Numpy

In [20]:
# Switching between tensor to numpy types and vise versa

# From numpy to tensorflow
a = np.array([2., 4., 5.])
print (tf.constant(a))

# From tensorflow to numpy
t = tf.constant([
    [1., 2., 3.], 
    [4., 5., 6.]
  ])
print (t.numpy())

# Operations on mixed types work as well
# tensorflow functions on numpy types
print (tf.square(a))

# numpy functions on tensorflow types
print (np.square(t))

tf.Tensor([2. 4. 5.], shape=(3,), dtype=float64)
[[1. 2. 3.]
 [4. 5. 6.]]
tf.Tensor([ 4. 16. 25.], shape=(3,), dtype=float64)
[[ 1.  4.  9.]
 [16. 25. 36.]]


### Type conversions

Type match is important in tensorflow when doing any operation (casting is bad for performance and TF does not want
it to go unnoticed).

In [None]:
# Trying to add float32 (tf default float) and float64 results in an error
tf.constant(2.) + tf.constant(4., dtype=tf.float64)

### Variables

All the tensors we've seen so far are immutable. This means that we cannot use them to implement weights in a NN,
since they have to be tweaked when doing backpropagation.

TF variables are like tensors, they work with all the tf functions we've seen. The main difference is that they support 
in-place operations using the `assign()` method and friends.

In [38]:
v = tf.Variable([
    [1., 2., 3.], 
    [4., 5., 6.]
  ])

# Happens in-place!

# Multiply example
v.assign(2 * v)
print (v)

# Work on subsection of the tensor in-place
v[:,1:].assign(v[:,1:] + 100)
print (v)

# Work on subsection of the tensor in-place (another approach better for cell updates)
v.scatter_nd_update(
  indices=[[0, 0], [1, 2]],
  updates=[100., 200.]
)
print (v)

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[  2., 104., 106.],
       [  8., 110., 112.]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[100., 104., 106.],
       [  8., 110., 200.]], dtype=float32)>


# Customizing Models and Training Algorithms

### Customizing Loss Functions

Suppose that you have a regression task and that your dataset is ridden with outliers. The MSE loss function penalizes large errors
too much, causing the model to become imprecise. The MAE loss doesn't penalize the outliers enough and training takes too long
to converge.

Instead you decide to implement the Huber loss (which is available in Keras btw). This loss function is mix between MSE and MAE:
* It computes the error on the given batch (e.g. error = y_true - y_pred)
* For small error values (less than 1), it does a squared loss
* For large error values, it does a linear loss

**These custom components are not saved automatically when serializing the model. See the book for how to save/load models 
  with custom components**


In [40]:
# Huber loss function
def huber_fn(y_true, y_pred):
  # Get the residual vector
  error = y_true - y_pred

  # Make it a boolean vector, marking small and large residuals
  is_small_error = tf.abs(error) < 1

  # squared loss will be used for small residuals
  squared_loss = tf.square(error) / 2
  # linear loss will be used for large residuals
  linear_loss = tf.abs(error) - 0.5

  return tf.where(is_small_error, squared_loss, linear_loss)


# CODE DOES NOT RUN! 
# Use it in a Keras model
model = tf.keras.Sequential([])
model.compile(loss=huber_fn, optimizer="nadam")

In [41]:
# Huber loss function with a configurable parameter
def create_huber(threshold=1.0):
  def huber_fn(y_true, y_pred):
    error = y_true - y_pred
    is_small_error = tf.abs(error) < threshold
    squared_loss = tf.square(error) / 2
    linear_loss = threshold * tf.abs(error) - threshold ** 2 / 2
    return tf.where(is_small_error, squared_loss, linear_loss)
  return huber_fn

model.compile(loss=create_huber(2.0), optimizer="nadam")

### Custom Metrics

### Streaming metrics

These maintain the overall metric throughout the training. For example, the precision metrics below treats every input into 
it as a training batch result (true, pred). It maintains state of the precision so far.

In [43]:
precision = tf.keras.metrics.Precision()

# Emulating batch 1 results (80% precision)
batch1_y_true = [0,1,1,1,0,1,0,1]
batch1_y_pred = [1,1,0,1,0,1,0,1]
print (precision(batch1_y_true, batch1_y_pred))

# Emulating batch 2 results (0% precision). Note that it says 0.5 because it's the accumulated precision - the
# overall precision between the two batches: 
#   Batch1 had 4 TP, 1 FP. 
#   Batch2 had 0 TP, 4 FP)
# Precision = 4 / (4 + 4) = 0.5
batch2_y_true = [0,1,0,0,1,0,1,1]
batch2_y_pred = [1,0,1,1,0,0,0,0]
print (precision(batch2_y_true, batch2_y_pred))


tf.Tensor(0.8, shape=(), dtype=float32)
tf.Tensor(0.5, shape=(), dtype=float32)


### Custom Metric

In [44]:
# Custom Huber metric (taking a mean of the Huber loss and using it as a metric)

def create_huber(threshold=1.0):
  def huber_fn(y_true, y_pred):
    error = y_true - y_pred
    is_small_error = tf.abs(error) < threshold
    squared_loss = tf.square(error) / 2
    linear_loss = threshold * tf.abs(error) - threshold ** 2 / 2
    return tf.where(is_small_error, squared_loss, linear_loss)
  return huber_fn

class HuberMetric(tf.keras.metrics.Metric):
  def __init__(self, threshold=1.0, **kwargs):
    super().__init__(**kwargs)
    self.threshold = threshold
    self.huber_fn = create_huber(threshold)
    # Similar to a tf.Variable only that it's trackable and is specifically for weights storage 
    self.total = self.add_weight("total", initializer="zeros")
    self.count = self.add_weight("count", initializer="zeros")
  
  # Update the metrics sum and count. Gets called first
  def update_state(self, y_true, y_pred, sample_weight=None):
    sample_metrics = self.huber_fn(y_true, y_pred)
    self.total.assign_add(tf.reduce_sum(sample_metrics))
    self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))
  
  # Computes the mean. Gets called after update_state
  def result(self):
    return self.total / self.count
  
  # For save/load model
  def get_config(self):
    base_config = super().get_config()
    return {**base_config, "threshold": self.threshold}


### Custom Layers

Good for building custom layers that Keras doesn't offer and also good for building a number of layers into a single layers
block.

In [45]:
# To build a no weights layer (e.g. Flatten or ReLU), we can use the Lambda layer in Keras.
# For example, a layer that takes the exponent of its inputs (already available in Keras under activation="exponential"...)

exponential_layer = tf.keras.layers.Lambda(lambda x: tf.exp(x))


In [None]:
# Building a layer with weights. For example, building our own version of the Dense layer

class MyDense(tf.keras.layers.Layer):
  def __init__(self, units, activation=None, **kwargs):
    super().__init__(**kwargs)
    self.units = units
    self.activation = tf.keras.activations.get(activation)

  # Called once, the first time that the layer is used
  def build(self, batch_input_shape):
    # Define the layer's weight matrix
    self.kernel = self.add_weight(
      name="kernel", shape=[batch_input_shape[-1], self.units],
      initializer="glorot_normal"
    )
    self.bias = self.add_weight(name="bias", shape=[self.units], initializer="zeros")
  
  # Called when the layer is working during training and inference
  def call(self, X):
    return self.activation(X @ self.kernel + self.bias)
  
  # Used for serialization/deserialization of this custom layer
  def get_config(self):
    base_config = super().get_config()
    return {**base_config, "units": self.units, "activation": tf.keras.activations.serialize(self.activation)}

In [None]:
# A custom implementation of the GaussianNoise layer that Keras offer. This layer adds noise during training but not 
# during inference. It's an example of a layer that has a different behavior during training and inference.

class MyGaussianNoise(tf.keras.layers.Layer):
  def __init__(self, stddev, **kwargs):
    super().__init__(**kwargs)
    self.stddev = stddev
  
  # Keras will pass the training parameter set to True during training and we can have a different 
  # behavior based on it.
  def call(self, X, training=False):
    if training:
      noise = tf.random.normal(tf.shape(X), stddev=self.stddev)
      return X + noise
    else:
      return X

### Custom Models


In [None]:
# Example for a model with a residual block - a block of two dense layers that adds its input to its output.
# This residual block will get called 3 times

# We start by creating a residual block layer that will contain multiple Dense layers
class ResidualBlock(tf.keras.layers.Layer):
  def __init__(self, n_layers, n_neurons, **kwargs):
    super().__init__(**kwargs)
    self.hidden = [tf.keras.layers.Dense(n_neurons, activation="relu", kernel_initializer="he_normal")
                   for _ in range(n_layers)]
  
  def call(self, inputs):
    Z = inputs
    for layer in self.hidden:
      Z = layer(Z)
    return inputs + Z
  
# Next we define the custom model that will use this residual block
class ResidualRegressor(tf.keras.Model):
  # We define the model's layer in the ctor
  def __init__(self, output_dim, **kwargs):
    super().__init__(**kwargs)
    self.hidden1 = tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal")
    self.block1 = ResidualBlock(2, 30)
    self.block2 = ResidualBlock(2, 30)
    self.out = tf.keras.layers.Dense(output_dim)

  # We define the model's layer to layer operations here
  def call(self, inputs):
    Z = self.hidden1(inputs)
    for _ in range(1+3):
      Z = self.block1(Z)
    Z = self.block2(Z)
    return self.out(Z)


### Loss and Metrics that are based on the model's internals

So far we've used loss and metrics that are based on the labels and predictions. We can define models where the loss and metrics
are defined based on internal layers.

One such model uses the "reconstruction loss" - it's the mean squared difference between the reconstruction and the inputs. It is
used in auto-encoder models and encourages a model to preserve as much information as possible through the hidden layers.

In [None]:
class ReconstructingRegressor(tf.keras.Model):
  def __init__(self, output_dim, **kwargs):
    super().__init__(**kwargs)
    # 5 hidden Dense layers
    self.hidden = [tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal") 
                   for _ in range(5)]
    self.out = tf.keras.layers.Dense(output_dim)

    # This will be used to keep track of teh reconstruction error during training
    self.reconstruction_mean = tf.keras.metrics.Mean(name="reconstruction_error")

  def build(self, batch_input_shape):
    # Create an extra Dense layer for the reconstruction error. It must be done here (not in ctor)
    # since its number of units must equal the input shape and this is not known until we call
    # the model for the first time with inputs.
    n_inputs = batch_input_shape[-1]
    self.reconstruct = tf.keras.layers.Dense(n_inputs)
  
  def call(self, inputs, training=False):
    # Process the inputs through the model's hidden layers
    Z = inputs
    for layer in self.hidden:
      Z = layer(Z)

    # Pass the result thru the recon layer and compute the recon error
    reconstruction = self.reconstruct(Z)
    recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))
    # Scale down the recon error
    self.add_loss(0.05 * recon_loss)

    if training:
      result = self.reconstruction_mean(recon_loss)
      self.add_metric(result)
    return self.out(Z)


### Computing Gradient Autodiff

In [4]:
# Working with a toy function to understand how to use autodiff

def f(w1, w2):
  return 3 * w1 ** 2 + 2 * w1 * w2

# Implementing reverse mode autodiff (using inputs 5, 3)
w1, w2 = tf.Variable(5.), tf.Variable(3.) 

# GradientTape records every operation that involves a variable.
# Tape gets erased after calling this method unless we use (persistent=True)
with tf.GradientTape(persistent=True) as tape:
  z = f(w1, w2)

# Compute the gradient of result z with respect to inputs w1, w2.
gradients = tape.gradient(z, [w1, w2])

print (gradients)

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>, <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]
