## EmbeddingLayer

In [32]:
import tensorflow as tf
import numpy as np

In [33]:
layer = tf.keras.layers.Layer()

In [34]:
vocab_size = 100
hidden_size = 8
shared_weights = layer.add_weight(
    name="weights",
    shape=[vocab_size, hidden_size],
    initializer=tf.random_normal_initializer(mean=0., stddev=hidden_size**-0.5))
shared_weights.shape

TensorShape([100, 8])

In [35]:
inputs = np.array(([[1,0,2], [2, 3, 4]])).astype(np.int32)
inputs.shape

(2, 3)

### Embedding

 inputs: An int64 tensor with shape [batch_size, length]

- `embeddings = tf.gather(shared_weights, inputs)`
- `embeddings *= tf.expand_dims(mask, -1)` # mask inputs 0
- `embeddings *= hidden_size ** 0.5`

In [36]:
shared_weights[1]

<tf.Tensor: shape=(8,), dtype=float32, numpy=
array([ 0.08372386,  0.05048609,  0.16432548,  0.4777148 ,  0.31101075,
       -0.30477154,  0.03502798, -0.5772347 ], dtype=float32)>

In [37]:
shared_weights[0]

<tf.Tensor: shape=(8,), dtype=float32, numpy=
array([-0.22813846, -0.2596496 , -0.38707775,  0.272889  , -0.03881969,
        0.21126126, -0.42600307, -0.3824733 ], dtype=float32)>

In [38]:
embeddings = tf.gather(shared_weights, inputs)
embeddings

<tf.Tensor: shape=(2, 3, 8), dtype=float32, numpy=
array([[[ 0.08372386,  0.05048609,  0.16432548,  0.4777148 ,
          0.31101075, -0.30477154,  0.03502798, -0.5772347 ],
        [-0.22813846, -0.2596496 , -0.38707775,  0.272889  ,
         -0.03881969,  0.21126126, -0.42600307, -0.3824733 ],
        [-0.3283202 ,  0.49591866,  0.35914728, -0.25279596,
          0.05099809,  0.06368607,  0.45409086,  0.43724275]],

       [[-0.3283202 ,  0.49591866,  0.35914728, -0.25279596,
          0.05099809,  0.06368607,  0.45409086,  0.43724275],
        [-0.10552198,  0.66613936, -0.05832133, -0.29209968,
          0.5103657 , -0.01987784, -0.10523101,  0.10913604],
        [ 0.06957933,  0.39914945,  0.34985012, -0.42831638,
          0.20860526, -0.25281206,  0.4204362 ,  0.2888742 ]]],
      dtype=float32)>

In [39]:
mask = tf.cast(tf.not_equal(inputs, 0), embeddings.dtype)
mask

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 0., 1.],
       [1., 1., 1.]], dtype=float32)>

In [40]:
tf.expand_dims(mask, -1)

<tf.Tensor: shape=(2, 3, 1), dtype=float32, numpy=
array([[[1.],
        [0.],
        [1.]],

       [[1.],
        [1.],
        [1.]]], dtype=float32)>

In [41]:
embeddings *= tf.expand_dims(mask, -1)
embeddings

<tf.Tensor: shape=(2, 3, 8), dtype=float32, numpy=
array([[[ 0.08372386,  0.05048609,  0.16432548,  0.4777148 ,
          0.31101075, -0.30477154,  0.03502798, -0.5772347 ],
        [-0.        , -0.        , -0.        ,  0.        ,
         -0.        ,  0.        , -0.        , -0.        ],
        [-0.3283202 ,  0.49591866,  0.35914728, -0.25279596,
          0.05099809,  0.06368607,  0.45409086,  0.43724275]],

       [[-0.3283202 ,  0.49591866,  0.35914728, -0.25279596,
          0.05099809,  0.06368607,  0.45409086,  0.43724275],
        [-0.10552198,  0.66613936, -0.05832133, -0.29209968,
          0.5103657 , -0.01987784, -0.10523101,  0.10913604],
        [ 0.06957933,  0.39914945,  0.34985012, -0.42831638,
          0.20860526, -0.25281206,  0.4204362 ,  0.2888742 ]]],
      dtype=float32)>

In [42]:
embeddings *= hidden_size ** 0.5
embeddings

<tf.Tensor: shape=(2, 3, 8), dtype=float32, numpy=
array([[[ 0.23680682,  0.14279622,  0.46478263,  1.3511815 ,
          0.8796712 , -0.86202407,  0.09907408, -1.6326662 ],
        [-0.        , -0.        , -0.        ,  0.        ,
         -0.        ,  0.        , -0.        , -0.        ],
        [-0.92862976,  1.4026698 ,  1.0158219 , -0.71501493,
          0.14424439,  0.18013139,  1.2843629 ,  1.2367092 ]],

       [[-0.92862976,  1.4026698 ,  1.0158219 , -0.71501493,
          0.14424439,  0.18013139,  1.2843629 ,  1.2367092 ],
        [-0.29846123,  1.8841267 , -0.16495763, -0.82618266,
          1.4435322 , -0.05622302, -0.29763824,  0.30868334],
        [ 0.19680007,  1.1289651 ,  0.98952556, -1.2114617 ,
          0.59002477, -0.7150605 ,  1.1891731 ,  0.81705964]]],
      dtype=float32)>

### Linear

inputs: A float32 tensor with shape [batch_size, length, hidden_size]

- `x = tf.reshape(inputs, [-1, hidden_size])`
- `logits = tf.matmul(x, shared_weights, transpose_b=True)`

In [43]:
batch_size = tf.shape(inputs)[0]
length = tf.shape(inputs)[1]

In [44]:
batch_size, length

(<tf.Tensor: shape=(), dtype=int32, numpy=2>,
 <tf.Tensor: shape=(), dtype=int32, numpy=3>)

In [45]:
embeddings.shape

TensorShape([2, 3, 8])

In [46]:
inputs.shape

(2, 3)

In [48]:
# inputs: A float32 tensor with shape [batch_size, length, hidden_size]
# here we use embeddins
x = tf.reshape(embeddings, [-1, hidden_size])
x.shape

TensorShape([6, 8])

In [50]:
shared_weights.shape

TensorShape([100, 8])

In [52]:
logits = tf.matmul(x, shared_weights, transpose_b=True)
logits.shape

TensorShape([6, 100])

### FFN-Layer

In [53]:
# hidden_size: int, output dim of hidden layer.
# filter_size: int, filter size for the inner (first) dense layer.
# relu_dropout: float, dropout rate for training.
hidden_size = 8
filter_size = 4
relu_dropout = 0.5

In [65]:
filter_dense_layer = tf.keras.layers.Dense(filter_size, use_bias=True, activation=tf.nn.relu, name="filter_layer")

In [66]:
output_dense_layer = tf.keras.layers.Dense(hidden_size, use_bias=True, name="output_layer")

In [76]:
embeddings.shape

TensorShape([2, 3, 8])

In [77]:
output = filter_dense_layer(embeddings)
output.shape

TensorShape([2, 3, 4])

In [78]:
# training
output = tf.nn.dropout(output, rate=relu_dropout)
output.shape

TensorShape([2, 3, 4])

In [79]:
# evaluate
output = filter_dense_layer(embeddings)
output = output_dense_layer(output)
output.shape

TensorShape([2, 3, 8])

### AttentionLayer