In [None]:
import os
# Keep using keras-2 (tf-keras) rather than keras-3 (keras).
os.environ['TF_USE_LEGACY_KERAS'] = '1'

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import tensorflow as tf
import numpy as np

from tf_agents.environments import random_py_environment
from tf_agents.environments import tf_py_environment
from tf_agents.networks import encoding_network
from tf_agents.networks import network
from tf_agents.networks import utils
from tf_agents.specs import array_spec
from tf_agents.utils import common as common_utils
from tf_agents.utils import nest_utils

2025-01-08 16:52:20.266337: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-08 16:52:20.270122: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-08 16:52:20.344135: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-08 16:52:20.344204: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-08 16:52:20.346009: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

## Custom Networks
To create your own networks you will only have to override the __init__ and call methods. Let's create a custom network using what we learned about EncodingNetworks to create an ActorNetwork that takes observations which contain an image and a vector.

In [None]:
class ActorNetwork(network.Network):
    """
    Initializes the ActorNetwork to process observations and project them to actions.
    Args:
        observation_spec: A nested structure representing the specifications of the
            observation tensor(s). It provides details such as shape and data type.
        action_spec: A nested structure representing the specifications of the action
            space. Must contain only a single floating-point action.
        preprocessing_layers (optional): A sequence or list of layers (or layer
            constructors) to be applied to the observation(s) prior to any other feature
            processing. Defaults to None.
        preprocessing_combiner (optional): A Keras layer dedicated to combining
            the outputs of the preprocessing layers. Defaults to None.
        conv_layer_params (optional): A tuple or list of tuples where each tuple
            denotes (filters, kernel_size, stride) for a convolutional layer.
            Defaults to None (no convolutional layers).
        fc_layer_params (optional): A tuple of fully connected layer sizes to apply
            after any convolutional layers. Defaults to (75, 40).
        dropout_layer_params (optional): A list of float values specifying the rate for
            dropout layers that follow each fully connected layer. Defaults to None.
        activation_fn (optional): Activation function (callable) to use in the network.
            Defaults to tf.keras.activations.relu.
        enable_last_layer_zero_initializer (optional): If True, enables zero
            initialization on the last fully connected layer (not the action projection
            layer). Defaults to False.
        name (optional): String name of the network. Defaults to 'ActorNetwork'.
    Raises:
        ValueError: If the action specification contains more than one action or if
            the action data type is not floating-point.
    This network builds an internal encoder to transform the observation(s)
    into feature representations and then applies a final projection layer
    to output actions in the range [-1, 1].
    """
    def __init__(self,
                observation_spec,
                action_spec,
                preprocessing_layers=None,
                preprocessing_combiner=None,
                conv_layer_params=None,
                fc_layer_params=(75, 40),
                dropout_layer_params=None,
                activation_fn=tf.keras.activations.relu,
                enable_last_layer_zero_initializer=False,
                name='ActorNetwork'):
        
        super(ActorNetwork, self).__init__(
            input_tensor_spec=observation_spec, state_spec=(), name=name
        )

        self._action_spec = action_spec
        flat_action_spec = tf.nest.flatten(action_spec)

        if len(flat_action_spec) > 1:
            raise ValueError('Only a single action is supported by this network')
        self._single_action_spec = flat_action_spec[0]

        if self._single_action_spec.dtype not in [ tf.float32, tf.float64 ]:
            raise ValueError('Only float actions are supported by this network.')
        
        kernel_initializer = tf.keras.initializers.VarianceScaling(
            scale=1./3., mode='fan_in', distribution='uniform'
        )

        self._encoder = encoding_network.EncodingNetwork(
            observation_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=False
        )

        initializer = tf.keras.initializers.RandomUniform(
            minval=-0.003, maxval=0.003
        )

        self._action_projection_layer = tf.keras.layers.Dense(
            flat_action_spec[0].shape.num_elements(),
            activation=tf.keras.activations.tanh,
            kernel_initializer=initializer,
            name='action'
        )

    def call(self, observation, step_type=(), network_state=()):
        outer_rank = nest_utils.get_outer_rank(observation, self.input_tensor_spec)
        batch_squash = utils.BatchSquash(outer_rank)
        observations = tf.nest.map_structure(batch_squash.flatten, observation)

        state, network_state = self._encoder(
            observations, step_type=step_type, network_state=network_state
        )
        actions = self._action_projection_layer(state)
        actions = common_utils.scale_to_spec(actions, self._single_action_spec)
        actions = batch_squash.unflatten(actions)

        return tf.nest.pack_sequence_as(self._action_spec, [actions])