https://keras.io/api/layers/attention_layers/additive_attention/

In [1]:
import tensorflow as tf

In [2]:
from tensorflow import keras

In [3]:
query_input = tf.keras.layers.Input(
    shape = (100,),
    dtype = 'int32',
    )

In [4]:
value_input = tf.keras.layers.Input(
    shape = (100,),
    dtype = 'int32',
    )

In [5]:
token_embedding = tf.keras.layers.Embedding(
    input_dim = 10000,
    output_dim = 20,
    )

In [6]:
query_embeddings = token_embedding(
    query_input,
    )

In [7]:
value_embeddings = token_embedding(
    value_input,
    )

# attention layer

In [8]:
query_value_attention_seq = tf.keras.layers.AdditiveAttention()(
    [
        query_embeddings,
        value_embeddings,        
    ]
    )

In [9]:
query_value_attention_seq

<KerasTensor: shape=(None, 100, 20) dtype=float32 (created by layer 'additive_attention')>

In [10]:
output_layer = tf.keras.layers.Concatenate()(
    [
        query_embeddings,
        query_value_attention_seq,
    ]
    )

In [11]:
model = tf.keras.Model(
    inputs = [
        query_input,
        value_input,
    ],
    outputs = output_layer,
    )

In [12]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 100)]        0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 100)]        0           []                               
                                                                                                  
 embedding (Embedding)          (None, 100, 20)      200000      ['input_1[0][0]',                
                                                                  'input_2[0][0]']                
                                                                                                  
 additive_attention (AdditiveAt  (None, 100, 20)     20          ['embedding[0][0]',          

# test with dat

In [13]:
import numpy as np

In [14]:
model.inputs

[<KerasTensor: shape=(None, 100) dtype=int32 (created by layer 'input_1')>,
 <KerasTensor: shape=(None, 100) dtype=int32 (created by layer 'input_2')>]

In [15]:
query_x = np.random.rand(
    138, 100
    )
query_x.shape

(138, 100)

In [16]:
value_x = np.random.rand(
    138, 100
    )
value_x.shape

(138, 100)

In [17]:
x = [
    query_x,
    value_x,
]

In [18]:
y = model.predict(x)



In [19]:
y[0].shape

(100, 40)

In [20]:
y[1].shape

(100, 40)

In [21]:
y[1][0]

array([ 0.04045716,  0.0343726 ,  0.01133993,  0.02722334,  0.01381728,
        0.00960364,  0.01077826,  0.03809568,  0.02761099,  0.04245938,
       -0.02111628, -0.00386981,  0.03077605, -0.04683714, -0.01664792,
        0.02786559,  0.0355078 , -0.01177136,  0.01471904,  0.00146531,
        0.04047493,  0.03437011,  0.01133969,  0.02722749,  0.01381977,
        0.00960745,  0.01078263,  0.03809407,  0.02762427,  0.042459  ,
       -0.02112268, -0.00387083,  0.0307835 , -0.04685447, -0.01665088,
        0.02786847,  0.03553004, -0.01177467,  0.01472022,  0.00146516],
      dtype=float32)

# end