Inicialización de pesos en las capas --- 0:00 min
===

* Última modificación: Marzo 7, 2022 | YouTube

* Adaptado de: https://keras.io/api/layers/initializers/

Importación de librerías
---

In [1]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import tensorflow as tf

tf.__version__

'2.8.0'

Uso de inicializadores
--

In [2]:
#
# Opción 1
#
layer = tf.keras.layers.Dense(
    units=64,
    kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
    bias_initializer=tf.keras.initializers.Zeros(),
)

In [3]:
#
# Opción 2
#
layer = tf.keras.layers.Dense(
    units=64, kernel_initializer="random_normal", bias_initializer="zeros"
)

Inicializadores disponibles
--

**RandomNormal**

In [4]:
initializer = tf.keras.initializers.RandomNormal(
    mean=0.0,
    stddev=1.0,
)
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-0.48938656,  0.4790997 ],
       [-0.7081518 ,  0.09535532]], dtype=float32)>

In [5]:
initializer = tf.keras.initializers.RandomNormal(
    mean=0.0,
    stddev=1.0,
)
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**RandomUniform**

In [6]:
initializer = tf.keras.initializers.RandomUniform(
    minval=0.0,
    maxval=1.0,
)
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.99648523, 0.46839   ],
       [0.16169655, 0.143978  ]], dtype=float32)>

In [7]:
initializer = tf.keras.initializers.RandomUniform(
    minval=0.0,
    maxval=1.0,
)
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**TruncateNormal**

In [8]:
#
# Descarta los valores por fuera de dos desviaciones estándar
#
initializer = tf.keras.initializers.TruncatedNormal(
    mean=0.0,
    stddev=1.0,
)
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 0.07993031, -0.6222534 ],
       [-0.10998045, -1.5632824 ]], dtype=float32)>

In [9]:
initializer = tf.keras.initializers.TruncatedNormal(
    mean=0.0,
    stddev=1.0,
)
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**Zeros**

In [10]:
initializer = tf.keras.initializers.Zeros()
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 0.]], dtype=float32)>

In [11]:
initializer = tf.keras.initializers.Zeros()
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**Ones**

In [12]:
initializer = tf.keras.initializers.Ones()
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[1., 1.],
       [1., 1.]], dtype=float32)>

In [13]:
initializer = tf.keras.initializers.Ones()
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**GlorotNormal**

In [14]:
#
# Muestras aleatorias de una distribución normal truncada, centrada en 0 y con
# una stddev = sqrt(2 / (fan_in + fan_out)), donde fan_in es el número de
# neuronas de entrada y fan_out es el número de neuronas de salida
#
initializer = tf.keras.initializers.GlorotNormal()
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 1.0747541 , -0.9629612 ],
       [-0.23607732, -0.02812083]], dtype=float32)>

In [15]:
initializer = tf.keras.initializers.GlorotNormal()
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**GlorotUniform**

In [16]:
#
# Muestras aleatorias de una distribución uniforme en (-limit, limit) donde
# limit es limit = sqrt(6 / (fan_in + fan_out))
#
initializer = tf.keras.initializers.GlorotUniform()
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-0.621108  ,  0.5849397 ],
       [ 0.40255153, -0.707111  ]], dtype=float32)>

In [17]:
initializer = tf.keras.initializers.GlorotUniform()
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**HeNormal**

In [18]:
#
# Muestras aleatorias de una distribución normal truncada con centro en 0 y
# stddev = sqrt(2 / fan_in)
#
initializer = tf.keras.initializers.HeNormal()
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 1.2207036, -2.129395 ],
       [ 0.7974296,  1.0268037]], dtype=float32)>

In [19]:
initializer = tf.keras.initializers.HeNormal()
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**HeUniform**

In [20]:
#
# Muestras aleatorias de una distribución uniforme en (-limit, limit) donde
# limit es limit = sqrt(6 / fan_in)
#
initializer = tf.keras.initializers.HeUniform()
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 1.362542  ,  0.54803836],
       [-0.74073255, -1.5826889 ]], dtype=float32)>

In [21]:
initializer = tf.keras.initializers.HeUniform()
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**Identity**

In [22]:
initializer = tf.keras.initializers.Identity()
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[1., 0.],
       [0., 1.]], dtype=float32)>

In [23]:
initializer = tf.keras.initializers.Identity()
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**Orthogonal**

In [24]:
initializer = tf.keras.initializers.Orthogonal()
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 0.6444479 ,  0.7646481 ],
       [ 0.7646481 , -0.64444804]], dtype=float32)>

In [25]:
initializer = tf.keras.initializers.Orthogonal()
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**Constant**

In [26]:
initializer = tf.keras.initializers.Constant(3.0)
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[3., 3.],
       [3., 3.]], dtype=float32)>

In [27]:
initializer = tf.keras.initializers.Constant(3.0)
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**VarianceScaling**

In [28]:
#
# Inicializador adaptable al tamaño de los tensores de pesos.
# Los valores son muestreados de una distribución normal
# (distribution="truncated_normal" o "untruncated_normal") con
# media cero y stddev = sqrt(scale / n), donde n puede ser:
#
#   * mode="fan_in": número de neuronas de la capa de entrada.
#   * mode="fan_out": número de neuronas de la capa de salida.
#   * mode="fan_avg": promedio del número de neuronas de entrada y salida.
#
# Los valores también pueden ser muestrados de una distribución
# uniforme con limit = sqrt(3 * scale / n).
#
initializer = tf.keras.initializers.VarianceScaling(
    scale=0.1,
    mode="fan_in",
    distribution="uniform",
)
initializer(
    shape=(2, 2),
)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-0.04169232, -0.3864935 ],
       [ 0.31988037,  0.19993657]], dtype=float32)>

In [29]:
initializer = tf.keras.initializers.VarianceScaling(
    scale=0.1,
    mode="fan_in",
    distribution="uniform",
)
layer = tf.keras.layers.Dense(
    3,
    kernel_initializer=initializer,
)

**Inicializadores personalizados**

In [30]:
def my_init(shape, dtype=None):
    return tf.random.normal(
        shape,
        dtype=dtype,
    )


layer = tf.keras.layers.Dense(
    64,
    kernel_initializer=my_init,
)

In [31]:
class ExampleRandomNormal(tf.keras.initializers.Initializer):
    def __init__(self, mean, stddev):
        self.mean = mean
        self.stddev = stddev

    def __call__(self, shape, dtype=None):
        return tf.random.normal(
            shape,
            mean=self.mean,
            stddev=self.stddev,
            dtype=dtype,
        )

    def get_config(self):
        return {
            "mean": self.mean,
            "stddev": self.stddev,
        }