# Exprole multi backend Keras - Keras 3.0

## Install dependencies

In [1]:
! pip install -q keras-nightly

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m972.9/972.9 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25h

## Set the backend
In this tutorial we will use Tensorflow as our backend, you can change it to "jax" or "torch" for JAX and PyTorch separately.

In [2]:
import os

# Try picking a different value from "tensorflow", "jax" and "torch".
os.environ["KERAS_BACKEND"] = "tensorflow"

In [4]:
import tensorflow as tf
import keras

import numpy as np

## Use a Keras layer

Layers are callable

In [5]:
layer = keras.layers.Dense(
    2,
    activation="relu",
    bias_initializer="zeros",
)
data = np.random.uniform(size=[3, 3])
layer(data)

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0.58891773, 0.03872069],
       [0.        , 0.24556068],
       [0.00214158, 0.29279238]], dtype=float32)>

## Write your custom layer

In [6]:
class Linear(keras.layers.Layer):
    def __init__(self, units):
        super().__init__()
        self.units = units

    def build(self, input_shape):
        input_dim = input_shape[-1]
        self.w = self.add_weight(
            shape=[input_dim, self.units], initializer="glorot_uniform",
        )
        self.b = self.add_weight(
            shape=[self.units,], initializer="zeros"
        )

    def call(self, inputs):
        return keras.ops.matmul(inputs, self.w) + self.b

layer = Linear(2)
layer(data)

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[-0.06542293,  0.22975203],
       [-0.28972575,  0.25333095],
       [-0.30414742,  0.36889717]], dtype=float32)>

## Different ways of building the model

*  Sequential API
*  [Preferred] Functional API
*   Subclassing model

In this demo, we are building a 2-class CNN image classifier.

In [7]:
# Sequential model
sequential_model = keras.Sequential([
    keras.Input([28, 28, 3]),
    keras.layers.Conv2D(8, 2),
    keras.layers.MaxPool2D(2),
    keras.layers.Flatten(),
    keras.layers.Dense(2),
    keras.layers.Softmax(),
])

print(sequential_model.summary())
print(sequential_model(np.random.uniform(size=[1, 28, 28, 3])))

None
tf.Tensor([[0.82510257 0.17489742]], shape=(1, 2), dtype=float32)


In [8]:

# Functional model
inputs = keras.Input([28, 28, 3])
x = keras.layers.Conv2D(8, 2)(inputs)
x = keras.layers.MaxPool2D(2)(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(2)(x)
outputs = keras.layers.Softmax()(x)

functional_model = keras.Model(inputs=inputs, outputs=outputs)
print(functional_model.summary())
print(functional_model(np.random.uniform(size=[1, 28, 28, 3])))

None
tf.Tensor([[0.3255077 0.6744923]], shape=(1, 2), dtype=float32)


In [9]:
# Multi-input model.
input_1 = keras.Input([28, 28, 3])
input_2 = keras.Input([28, 28, 3])
x_1 = keras.layers.Conv2D(8, 2)(input_1)
x_2 = keras.layers.Conv2D(8, 2)(input_2)
x = keras.layers.MaxPool2D(2)(x_1 + x_2)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(2)(x)
outputs = keras.layers.Softmax()(x)

multi_input_functional_model = keras.Model(inputs=inputs, outputs=outputs)
print(multi_input_functional_model.summary())

None


In [10]:
# Subclassing `keras.Model`

class MyModel(keras.Model):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._conv = keras.layers.Conv2D(8, 2)
        self._max_pool = keras.layers.MaxPool2D(2)
        self._flatten = keras.layers.Flatten()
        self._dense = keras.layers.Dense(2)
        self._softmax = keras.layers.Softmax()

    def call(self, inputs):
        x = self._conv(inputs)
        x = self._max_pool(x)
        x = self._flatten(x)
        x = self._dense(x)
        return self._softmax(x)

subclass_model = MyModel()

print(subclass_model(np.random.uniform(size=[1, 28, 28, 3])))
print(subclass_model.summary())


tf.Tensor([[0.65585333 0.34414667]], shape=(1, 2), dtype=float32)


None


## Models are callable.
`keras.Model` instances are also callable. You can call models as if it is a function, no matter which way you used to build the model.

In [11]:
sample_data = np.random.uniform(size=[1, 28, 28, 3])

print(sequential_model(sample_data))
print(functional_model(sample_data))
print(subclass_model(sample_data))

tf.Tensor([[0.8242522 0.1757478]], shape=(1, 2), dtype=float32)
tf.Tensor([[0.4378252 0.5621748]], shape=(1, 2), dtype=float32)
tf.Tensor([[0.6455979  0.35440212]], shape=(1, 2), dtype=float32)


In [12]:
sliced_model = keras.Model(
    inputs=functional_model.inputs,
    outputs=functional_model.get_layer("conv2d_1").output,
)
sliced_model.summary()

## Train the model
Now we have defined the model. For actual training, we need the following pieces:

* Data
* Loss/Target function
* Optimizer

In [13]:

# Prepare data for a 2-class classification.
data = np.random.uniform(size=[20, 28, 28, 3])
label = np.random.randint(2, size=20)

In [14]:

# Let's use functional model defined above.
model = functional_model

In [15]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(0.001),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
)

In [16]:
model.fit(
    data,
    label,
    batch_size=5,
    epochs=2,
)

Epoch 1/2
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - loss: 0.7766 - sparse_categorical_accuracy: 0.3333  
Epoch 2/2
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.7927 - sparse_categorical_accuracy: 0.3800 


<keras.src.callbacks.history.History at 0x7c6cd74b1c90>

# Reading material

Eager mode and graph mode (demo with TF syntax)
Debug in eager mode, put actual job in graph mode. Read more [here](https://www.tensorflow.org/guide/basics#graphs_and_tffunction).

In [17]:

# Eager mode
print(model(data[0:1, ...]))

# Graph mode
print(model.predict(data[0:1, ...]))

tf.Tensor([[0.50335336 0.4966467 ]], shape=(1, 2), dtype=float32)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
[[0.5033533  0.49664667]]


In [18]:

@tf.function(jit_compile=True)
def forward_pass(x):
    outputs = model(x)
    print(outputs)
    return outputs

print(forward_pass(data[0:1, ...]))

Tensor("functional_2_1/softmax_1_1/Softmax:0", shape=(1, 2), dtype=float32)
tf.Tensor([[0.5033533  0.49664667]], shape=(1, 2), dtype=float32)


In [19]:

# You can also run `fit`, `predict` in eager mode.
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(0.001),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    run_eagerly=True
)

# Run `predict` in eager mode by setting `run_eagerly` above.
print(model.predict(data[0:1, ...]))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[[0.50335336 0.4966467 ]]


## Write your custom training loop (demo with TF syntax)
We encourage using fit for training/finetuning as much as possible, but it's still important to write you custom training loop. For example, if you want to debug the training loop, or have a better control over each step, you can use custom training loop.

In [20]:

train_data = tf.data.Dataset.from_tensor_slices((data, label))
train_data = train_data.batch(5).cache().prefetch(tf.data.AUTOTUNE)


In [21]:

loss_fn = keras.losses.SparseCategoricalCrossentropy()
optimizer = keras.optimizers.Adam(0.001)
metric = keras.metrics.SparseCategoricalAccuracy()

In [22]:

@tf.function
def train_step(data):
    x, y = data
    with tf.GradientTape() as tape:
        outputs = model(x)
        loss = loss_fn(y, outputs)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    metric.update_state(y, outputs)

    return loss


for i, data in enumerate(train_data):
    loss = train_step(data)
    print(f"Batch {i}, loss: {loss:.3f}, accuracy: {metric.result():.3f}")

Batch 0, loss: 0.675, accuracy: 0.400
Batch 1, loss: 0.946, accuracy: 0.400
Batch 2, loss: 0.543, accuracy: 0.533
Batch 3, loss: 0.896, accuracy: 0.500


## Save your model

In [None]:

# Save the weights.
model.save_weights("dummy.weights.h5")

In [None]:
sequential_model.load_weights("dummy.weights.h5")

In [None]:
tf.reduce_sum(sequential_model.get_layer("conv2d").kernel - model.get_layer("conv2d_1").kernel)