In [1]:
from keras.datasets import imdb
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences

In [5]:
vocab_size = 10000
max_len = 500
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

print(max(map(len, X_train)))
print(sum(map(len, X_train))/len(X_train))

X_train_padded = pad_sequences(X_train, maxlen=max_len)
X_test_padded = pad_sequences(X_test, maxlen=max_len)

2494
238.71364


# Bahdanau Attention

In [8]:
import tensorflow as tf
from keras import Model
from keras.layers import Dense

In [6]:
class BahdanauAttention(Model):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)
        
    def call(self, values, query):
        # query shape == (batch_size, hidden size)
        # hidden_with_time_axis shape == (batch_size, 1, hidden size)
        # score 계산을 위해 뒤에서 할 덧셈을 위해서 차원을 변경해줍니다.
        hidden_with_time_axis = tf.expand_dims(query, 1)
        
        # score shape == (batch_size, max_length, 1)
        # we get 1 at the last axis because we are applying score to self.V
        # the shape of the tensor before applying self.V is (batch_size, max_length, units)
        score = self.V(tf.nn.tanh(
            self.W1(values) + self.W2(hidden_with_time_axis)))
        
        # attention_weights : (batch_size, max_length, 1)
        attention_weights = tf.nn.softmax(score, axis=1)
        
        # context_vector shape after sum == (batch_size, hidden_size)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        
        return context_vector, attention_weights

### super(BahdanauAttention, self).__init__()
 - super() 라는 함수는 super class 즉, 부모 클래스의 임시적인 객체를 반환하여 부모클래스의 메소드를 사용할 수 있게 하는 것.
 - ``super(BahdanauAttention, self)`` : returns a temporary object of the superclass, which in this case is ``tf.keras.Model``, as ``BahdanauAttention`` is a subclass of tf.keras.Model.
 - ``super(BahdanauAttention, self).__init__()`` :  calls the constructor of the superclass ``(tf.keras.Model)``. This is necessary to ensure that the initialization code in the base class ``(tf.keras.Model)`` is executed.
 - In summary, the line ``super(BahdanauAttention, self).__init__()`` in the ``BahdanauAttention`` class ensures that the class inherits and initializes all necessary properties and methods from its parent class ``tf.keras.Model``.

### call
 - ``call`` method is called in the ``BahdanauAttention`` class when you call the instance of the ``BahdanauAttention`` class.
 - By subclassing the `Model` class: in that case, you should define your
    layers in `__init__()` and you should implement the model's forward pass
    in `call()`. (https://www.tensorflow.org/api_docs/python/tf/keras/Model)

### tf.layers.Dense
 - Dense implements the operation: ``output = activation(dot(input, kernel) + bias)`` , Belows are all attributes of Dense.
  - ``activation`` is the element-wise activation function passed as the activation argument.
  - ``kernel`` is a weights matrix created by the layer.
  - ``bias`` is a bias vector created by the layer (only applicable if use_bias is True). 
 - If the input to the layer has a rank greater than 2, then Dense computes the dot product between the inputs and the kernel along the last axis of the inputs and axis 0 of the kernel (using tf.tensordot). For example, if input has dimensions (batch_size, d0, d1), then we create a kernel with shape (d1, units), and the kernel operates along axis 2 of the input, on every sub-tensor of shape (1, 1, d1) (there are batch_size * d0 such sub-tensors). The output in this case will have shape (batch_size, d0, units). (https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense?hl=en)