# CHAPTER 12 - Custom Models and Training with TensorFlow

# A Quick Tour of TensorFlow

# Using TensorFlow like NumPy

## Tensors and Operations

In [2]:
import tensorflow as tf

t = tf.constant([[1., 2., 3.], [4., 5., 6.]])
t

2025-12-12 05:34:52.109595: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-12 05:34:52.141986: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-12 05:34:53.379726: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
W0000 00:00:1765528494.355345   35387 gpu_device.cc:2342] Cannot dlopen some GPU

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [3]:
t[:, 1:]

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[2., 3.],
       [5., 6.]], dtype=float32)>

In [4]:
t + 10

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[11., 12., 13.],
       [14., 15., 16.]], dtype=float32)>

In [5]:
tf.square(t)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 1.,  4.,  9.],
       [16., 25., 36.]], dtype=float32)>

In [6]:
t @ tf.transpose(t)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[14., 32.],
       [32., 77.]], dtype=float32)>

In [7]:
tf.constant(42)

<tf.Tensor: shape=(), dtype=int32, numpy=42>

## Tensors and NumPy

In [10]:
import numpy as np

a = np.array([2., 4., 5.])
tf.constant(a)

<tf.Tensor: shape=(3,), dtype=float64, numpy=array([2., 4., 5.])>

In [11]:
t.numpy()

array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)

## Type Conversions

In [12]:
tf.constant(2.) + tf.constant(10)

InvalidArgumentError: cannot compute AddV2 as input #1(zero-based) was expected to be a float tensor but is a int32 tensor [Op:AddV2] name: 

In [13]:
tf.constant(2.) + tf.constant(10, dtype=tf.float64)

InvalidArgumentError: cannot compute AddV2 as input #1(zero-based) was expected to be a float tensor but is a double tensor [Op:AddV2] name: 

In [14]:
t2 = tf.constant(64., dtype=tf.float64)
tf.constant(2.) + tf.cast(t2, tf.float32)

<tf.Tensor: shape=(), dtype=float32, numpy=66.0>

## Variables

In [15]:
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [16]:
v.assign(2 * v)

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [17]:
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [20]:
v[0, 1].assign(42.)
v[:, 2].assign([0., 1.])
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  0.],
       [ 8., 10.,  1.]], dtype=float32)>

In [21]:
v.scatter_nd_update(indices=[[0, 0], [1, 2]], updates=[100., 200.])

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[100.,  42.,   0.],
       [  8.,  10., 200.]], dtype=float32)>

## Other Data Structures

# Customizing Models and Training Algorithms

## Custom Loss Functions

In [23]:
def huber_fn(y_true, y_pred):
    error = y_true - y_pred
    is_small_error = tf.abs(error) < 1
    squared_loss = tf.square(error) / 2
    linear_loss = tf.abs(error) - 0.5
    return tf.where(is_small_error, squared_loss, linear_loss)

# model.compile(loss=huber_fn, optimizer="nadam")
#model.fit(X_train, y_train, [...])

In [24]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer([28, 28]),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(10, activation="softmax")
])

model.summary()

In [25]:
model.compile(loss=huber_fn, optimizer="nadam")

In [29]:
model.save('../models/custom_model.keras')

In [31]:
model = tf.keras.models.load_model('../models/custom_model.keras', custom_objects={'huber_fn': huber_fn})

  saveable.load_own_variables(weights_store.get(inner_path))


In [32]:
model.summary()

If you decorate the huber_fn() function with @keras.utils.
register_keras_serializable(), it will automatically be available
to the load_model() function: there’s no need to include it in the
custom_objects dictionary.

In [34]:
def create_huber(theshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < theshold
        squared_loss = tf.square(error) / 2
        linear_loss = threshold * tf.abs(error) - threshold ** 2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss) 
    return huber_fn

model.compile(loss=create_huber(2.0), optimizer="nadam")
model.save('../models/custom_model.keras')
model = tf.keras.models.load_model('../models/custom_model.keras', custom_objects={'huber_fn': create_huber(2.0)})

  saveable.load_own_variables(weights_store.get(inner_path))


In [36]:
fn = create_huber(2.)
fn.__name__

'huber_fn'

In [37]:
class HuberLoss(tf.keras.losses.Loss):
    def __init__(self, threshold=1.0, **kwargs):
        self.threshold = threshold
        super().__init__(**kwargs)

    def call(self, y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < self.theshold
        squared_loss = tf.square(error) / 2
        linear_loss = threshold * tf.abs(error) - self.threshold ** 2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss) 

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}

In [38]:
model.compile(loss=HuberLoss(2.), optimizer="nadam")

In [39]:
model.save('../models/custom_model.keras')
model = tf.keras.models.load_model('../models/custom_model.keras', custom_objects={'HuberLoss': HuberLoss})

  saveable.load_own_variables(weights_store.get(inner_path))


## Custom Activation Functions, Initializers, Regularizers, and Constraints

## Computing Gradients Using Autodiff

In [41]:
def f(w1, w2):
    return 3 * w1 ** 2 + 2 * w1 * w2

In [42]:
w1, w2 = 5, 3
eps = 1e-6
(f(w1 + eps, w2) - f(w1, w2)) /eps

36.000003007075065

In [43]:
(f(w1, w2 + eps) - f(w1, w2)) /eps

10.000000003174137

In [45]:
w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape:
    z = f(w1, w2)

gradients = tape.gradient(z, [w1, w2])

In [46]:
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [47]:
z

<tf.Tensor: shape=(), dtype=float32, numpy=105.0>

In [48]:
dir(f)

['__annotations__',
 '__builtins__',
 '__call__',
 '__class__',
 '__closure__',
 '__code__',
 '__defaults__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__get__',
 '__getattribute__',
 '__getstate__',
 '__globals__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__kwdefaults__',
 '__le__',
 '__lt__',
 '__module__',
 '__name__',
 '__ne__',
 '__new__',
 '__qualname__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__type_params__']

In [56]:
for line in f.__code__.co_lines():
    print(line)

(0, 2, 1)
(2, 36, 2)


In [57]:
import dis

dis.dis(f)

  1           0 RESUME                   0

  2           2 LOAD_CONST               1 (3)
              4 LOAD_FAST                0 (w1)
              6 LOAD_CONST               2 (2)
              8 BINARY_OP                8 (**)
             12 BINARY_OP                5 (*)
             16 LOAD_CONST               2 (2)
             18 LOAD_FAST                0 (w1)
             20 BINARY_OP                5 (*)
             24 LOAD_FAST                1 (w2)
             26 BINARY_OP                5 (*)
             30 BINARY_OP                0 (+)
             34 RETURN_VALUE


In [59]:
import inspect

inspect.getsource(f)

'def f(w1, w2):\n    return 3 * w1 ** 2 + 2 * w1 * w2\n'