Capa IntegerLookup --- 0:00 min
===

* Última modificación: Marzo 7, 2022 | YouTube

* Adaptado de: https://keras.io/api/layers/preprocessing_layers/categorical/integer_lookup/

Importación de librerías
---

In [1]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import tensorflow as tf

tf.__version__

'2.8.0'

Ejemplos de uso
---

Mapea características enteras a rangos continuos.

In [2]:
#
# Crea una capa con vocabulario conocido
#
vocab = [12, 36, 1138, 42]
data = tf.constant(
    [
        [12, 1138, 42],
        [42, 1000, 36],
    ]
)
layer = tf.keras.layers.IntegerLookup(
    vocabulary=vocab,
)
layer(data)

<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[1, 3, 4],
       [4, 0, 2]])>

In [3]:
#
# Crea una capa con vocabulario adaptado
#
data = tf.constant(
    [
        [12, 1138, 42],
        [42, 1000, 36],
    ]
)
layer = tf.keras.layers.IntegerLookup()
layer.adapt(data)
layer.get_vocabulary()

[-1, 42, 1138, 1000, 36, 12]

In [4]:
#
# Múltiples OOV índices
#
vocab = [12, 36, 1138, 42]
data = tf.constant(
    [
        [12, 1138, 42],
        [37, 1000, 36],
    ]
)
layer = tf.keras.layers.IntegerLookup(
    vocabulary=vocab,
    num_oov_indices=2,
)
layer(data)

<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[2, 4, 5],
       [1, 0, 3]])>

In [5]:
#
# One-hot
#
vocab = [12, 36, 1138, 42]
data = tf.constant([12, 36, 1138, 42, 7])
layer = tf.keras.layers.IntegerLookup(
    vocabulary=vocab,
    output_mode="one_hot",
)
layer(data)

<tf.Tensor: shape=(5, 5), dtype=float32, numpy=
array([[0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.]], dtype=float32)>

In [6]:
#
# Multi-hot
#
vocab = [12, 36, 1138, 42]
data = tf.constant(
    [
        [12, 1138, 42, 42],
        [42, 7, 36, 7],
    ]
)
layer = tf.keras.layers.IntegerLookup(
    vocabulary=vocab,
    output_mode="multi_hot",
)
layer(data)

<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[0., 1., 0., 1., 1.],
       [1., 0., 1., 0., 1.]], dtype=float32)>

In [7]:
#
# Token count
#
vocab = [12, 36, 1138, 42]
data = tf.constant(
    [
        [12, 1138, 42, 42],
        [42, 7, 36, 7],
    ]
)
layer = tf.keras.layers.IntegerLookup(
    vocabulary=vocab,
    output_mode="count",
)
layer(data)

<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[0., 1., 0., 1., 2.],
       [2., 0., 1., 0., 1.]], dtype=float32)>

In [8]:
#
# Salida TF-IDF
#
vocab = [12, 36, 1138, 42]
idf_weights = [0.25, 0.75, 0.6, 0.4]
data = tf.constant(
    [
        [12, 1138, 42, 42],
        [42, 7, 36, 7],
    ]
)
layer = tf.keras.layers.IntegerLookup(
    output_mode="tf_idf",
    vocabulary=vocab,
    idf_weights=idf_weights,
)
layer(data)

<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[0.  , 0.25, 0.  , 0.6 , 0.8 ],
       [1.  , 0.  , 0.75, 0.  , 0.4 ]], dtype=float32)>

In [9]:
#
# Búsqueda inversa
#
vocab = [12, 36, 1138, 42]
data = tf.constant(
    [
        [1, 3, 4],
        [4, 0, 2],
    ]
)
layer = tf.keras.layers.IntegerLookup(
    vocabulary=vocab,
    invert=True,
)
layer(data)

<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[  12, 1138,   42],
       [  42,   -1,   36]])>