In [32]:
!touch 1-self_attention.py
!chmod +x *.py

In [30]:
import tensorflow as tf

class RNNEncoder(tf.keras.layers.Layer):
    """ encoder for machine translation """

    def __init__(self, vocab, embedding, units, batch):
        """
        *********************************************
        *****************Constructor*****************
        *********************************************
        @vocab: is an integer representing the size
                of the input vocabulary
        @embedding: is an integer representing the
                    dimensionality of the embedding vector
        @units: is an integer representing the number
                of hidden units in the RNN cell
        @batch: is an integer representing the batch size
        """
        super(RNNEncoder, self).__init__()
        self.vocab = vocab
        
        self.embedding = tf.keras.layers.Embedding(vocab, embedding)
        self.units = units
        self.batch = batch
        
        self.gru = tf.keras.layers.GRU(
                    units,
                    kernel_initializer="glorot_uniform",
                    recurrent_initializer="glorot_uniform",
                    return_sequences=True,
                    return_state=True
                    )

    def initialize_hidden_state(self):
        """
        nitializes the hidden states for
        the RNN cell to a tensor of zeros
        """
        return tf.zeros((self.batch, self.units))

    def call(self, x, initial):
        """
        calls the encoders layers
        """
        embading = self.embedding(x)
        outputs = self.gru(embading, initial_state=initial)
        return outputs

In [31]:
#!/usr/bin/env python3

import numpy as np
import tensorflow as tf
#RNNEncoder = __import__('0-rnn_encoder').RNNEncoder

encoder = RNNEncoder(1024, 128, 256, 32)
print(encoder.batch)
print(encoder.units)
print(type(encoder.embedding))
print(type(encoder.gru))

initial = encoder.initialize_hidden_state()
print(initial)
x = tf.convert_to_tensor(np.random.choice(1024, 320).reshape((32, 10)))
outputs, hidden = encoder(x, initial)
print(outputs)
print(hidden)

32
256
<class 'tensorflow.python.keras.layers.embeddings.Embedding'>
<class 'tensorflow.python.keras.layers.recurrent.GRU'>
Tensor("zeros_14:0", shape=(32, 256), dtype=float32)
Tensor("rnn_encoder_5/gru_14/transpose_1:0", shape=(32, 10, 256), dtype=float32)
Tensor("rnn_encoder_5/gru_14/while/Exit_3:0", shape=(32, 256), dtype=float32)


In [37]:
import tensorflow as tf

class SelfAttention(tf.keras.layers.Layer):
    """ calculate the attention for machine translation """

    def __init__(self, units):
        """
        *********************************************
        *****************Constructor*****************
        *********************************************
        @units: is an integer representing the number
                of hidden units in the alignment model
        """
        super(SelfAttention, self).__init__()
        # a Dense layer with units units, to be applied to
        # the previous decoder hidden state
        self.W = tf.keras.layers.Dense(units)
        # a Dense layer with units units, to be applied to
        # the encoder hidden states
        self.U = tf.keras.layers.Dense(units)
        # a Dense layer with 1 units, to be applied to the
        # tanh of the sum of the outputs of W and U
        self.V = tf.keras.layers.Dense(1)

    def call(self, s_prev, hidden_states):
        """
        **********************************************************
        *****************calls the decoder layers*****************
        **********************************************************
        @s_prev: is a tensor of shape (batch, units) containing the
                 previous decoder hidden state
        @hidden_states: is a tensor of shape (batch, input_seq_len,
                        units)containing the outputs of the encoder
        Returns:
                context: is a tensor of shape (batch, units) that
                         contains the context vector for the decoder
                weights: is a tensor of shape (batch, input_seq_len, 1)
                         that contains the attention weights
        """
        s_prev = tf.expand_dims(s_prev, 1)
        e = self.V(tf.nn.tanh(self.W(s_prev) + self.U(hidden_states)))
        weights = tf.nn.softmax(e, axis=1)
        context = weights * hidden_states
        context = tf.reduce_sum(context, axis=1)
        return context, weights




In [38]:
#!/usr/bin/env python3

import numpy as np
import tensorflow as tf
#SelfAttention = __import__('1-self_attention').SelfAttention

attention = SelfAttention(256)
print(attention.W)
print(attention.U)
print(attention.V)
s_prev = tf.convert_to_tensor(np.random.uniform(size=(32, 256)), preferred_dtype='float32')
hidden_states = tf.convert_to_tensor(np.random.uniform(size=(32, 10, 256)), preferred_dtype='float32')
context, weights = attention(s_prev, hidden_states)
print(context)
print(weights)

<tensorflow.python.keras.layers.core.Dense object at 0x7f5a0ffc6ba8>
<tensorflow.python.keras.layers.core.Dense object at 0x7f5a0ffc69e8>
<tensorflow.python.keras.layers.core.Dense object at 0x7f5a0f267390>
Tensor("self_attention_1/Sum:0", shape=(32, 256), dtype=float32)
Tensor("self_attention_1/transpose_1:0", shape=(32, 10, 1), dtype=float32)
