https://keras.io/api/layers/attention_layers/attention/

In [1]:
import tensorflow as tf

In [2]:
from tensorflow import keras

In [3]:
query_input = tf.keras.Input(
    shape = (None, ),
    dtype = 'int32',
    )

In [4]:
value_input = tf.keras.Input(
    shape = (None, ),
    dtype = 'int32',
    )

In [5]:
token_embedding = tf.keras.layers.Embedding(
    input_dim = 1000,
    output_dim = 64,
    )

In [6]:
query_embeddings = token_embedding(query_input)

In [7]:
value_embeddings = token_embedding(value_input)

In [8]:
query_value_attention_seq = tf.keras.layers.Attention()(
    [
        query_embeddings,
        value_embeddings,
    ]
    )

In [9]:
query_value_attention = tf.keras.layers.GlobalAveragePooling1D()(
    query_value_attention_seq,
    )

In [12]:
model = tf.keras.Model(
    inputs = [
        query_input,
        value_input,
    ],
    outputs = query_value_attention
    )

In [13]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding (Embedding)          (None, None, 64)     64000       ['input_1[0][0]',                
                                                                  'input_2[0][0]']                
                                                                                                  
 attention (Attention)          (None, None, 64)     0           ['embedding[0][0]',          

# test with dat

In [14]:
import numpy as np

In [21]:
model.inputs

[<KerasTensor: shape=(None, None) dtype=int32 (created by layer 'input_1')>,
 <KerasTensor: shape=(None, None) dtype=int32 (created by layer 'input_2')>]

In [27]:
query_data = np.random.randint(
    100,
    size=((5,10)),
    )
query_data

array([[97, 47,  9, 54, 94, 42, 45, 39, 89, 81],
       [ 4, 45, 67, 94, 25, 18, 13, 90, 93, 57],
       [13, 21, 78, 59, 78, 83, 50, 51, 36, 81],
       [29, 48,  4, 39, 31, 96, 39,  5, 40, 91],
       [91,  4, 94,  2, 76, 15, 96, 58, 38, 40]])

In [32]:
value_data = np.random.randint(
    100,
    size=((5,10)),
)

value_data

array([[65, 42, 68, 42, 64, 26, 31, 49,  7, 21],
       [48, 30, 75, 52, 17, 66, 49, 87, 35, 90],
       [88, 82, 74, 34, 81, 46, 25, 86, 55, 60],
       [86, 21, 93, 95, 23, 13, 82,  7, 87, 66],
       [17, 20, 18,  7, 58, 72, 66, 51, 23, 55]])

In [33]:
x = [
    query_data,
    value_data,
]

In [35]:
y = model.predict(x)



In [36]:
y

array([[ 6.89369347e-03,  8.79287533e-03, -6.83840015e-04,
        -1.77517645e-02,  4.08895407e-03,  2.56739883e-03,
        -9.00619198e-03, -6.93041924e-03, -2.24864506e-03,
         1.13645298e-02,  1.57058500e-02,  6.11115899e-03,
        -4.28590749e-04,  1.09027857e-02, -1.00573357e-02,
         1.21797500e-02, -1.80692188e-02,  9.84489452e-05,
        -1.18698236e-02,  1.17561659e-02,  9.85429343e-03,
        -3.61977005e-03,  8.21386639e-04, -2.50515970e-03,
         1.50050619e-03, -3.10167030e-04, -5.59354341e-03,
         9.72165633e-03,  1.23127857e-02, -6.77626405e-04,
        -1.15508754e-02,  2.25424441e-03, -7.75272120e-03,
         8.43576435e-03,  1.30589101e-02, -4.46171174e-03,
        -6.59923162e-03, -1.30670727e-03, -3.07497033e-03,
         2.50749802e-03,  1.59555022e-02, -1.24135660e-02,
         6.79527503e-03, -3.45776929e-03,  1.66326426e-02,
         1.88657548e-02,  4.42145485e-03, -2.45717214e-03,
         2.90108100e-03,  4.69363574e-03, -2.03316449e-0

In [37]:
y.shape

(5, 64)

# end