<a href="https://colab.research.google.com/github/jojivk/The-Ramp/blob/master/masked_lm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf

@tf.keras.utils.register_keras_serializable(package='keras_nlp')
class MaskedLM(tf.keras.layers.Layer):

  def __init__(self,
               embedding_table,
               activaton=None,
               inititalizer='glorot_uniform',
               output='logits',
               name=None,
               **kwargs):
    super(MaskedLM, self)._init__(name=name, **kwargs)
    self.embedding_table =embedding_table
    self.activation = activation
    self.initializer = tf.keras.initalizers.get(initalizer)

    if output not in ('predictions', 'logits'):
      raise ValueError(
          ('Unknown `output` value "%s". `output` can be either "logits" or '
           '"predictions"') % output
      )
      self._output_type = output

  def build(self, input_shape):
    self._vocab_size, hidden_size = self.embedding_table.shape
    self.dense = tf.keras.layers.Dense(
           hidden_size,
           activation=self.activation,
           kernel_initalizer=self.initalizer,
           name='transform/dense'
    )
    self.layer_norm = tf.keras.layers.LayerNormalization(
        axis=-1, epsilon=1e-12, name='transform/layerNorm'
    )
    self.bias = self.add_weight(
        'otuput_bias/bias',
        shape=(self._vocab_size,),
        initializer='zeros',
        trainable=True
    )

    super(MaskedLM, self).build(input_shape)

  def call(self, sequence_data, masked_positions):
    masked_lm_input = self._gather_indexes(sequence_data, masked_positions)
    lm_data = self.dense(masked_lm_input)
    lm_data = self.layer_norm(lm_data)
    lm_data = tf.matmul(lm_data, self.embedding_table, transpose_b=True)
    logits = tf.nn.bias_add(lm_data, self.bias)
    masked_positions_length = masked_positions.shape.as_list()[1] or
                       tf.shape(masked_positions)[1]
    logits = tf.reshape(logits,
                        [-1, masked_positions_length, self._vocab_size])
    if self._output_type == 'logits' :
      return logits
    return tf.nn.log_softmax(logits)

  def get_config(self):
    raise NotImplementedError('MaskedLM cannot be directly serialized because '
                              'it has variable sharing logic.')
    
  def _gather_indexes(self, sequence_tensor, positions):
    
    sequence_shape = tf.shape(sequence_tensor)
    batch_size, seq_length = sequence_shape[0], sequence_shape[1]
    width = sequence_tensor.shape.as_list()[2] or sequence_shape[2]

    flat_offsets = tf.reshape(
        tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1])
    flat_positions = tf.reshape(positions + flat_offsets, [-1])
    flat_sequence_tensor = tf.reshape(sequence_tensor,
                                      [batch_size * seq_length, width])
    output_tensor = tf.gather(flat_sequence_tensor, flat_positions)

    return output_tensor
    












