In [None]:
import math
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
from keras import initializers, layers
import keras.backend as K
import sys

class ContextGating(layers.Layer):   
    def __init__(self, **kwargs):        
        super(ContextGating, self).__init__(**kwargs)

    def build(self, input_shape):
        self.gating_weights = self.add_weight(name='kernel_W1',
                                      shape=(input_shape[-1], input_shape[-1]),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(input_shape[-1])),
                                      trainable=True)
        self.gating_biases = self.add_weight(name='kernel_B1',
                                      shape=(input_shape[-1],),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(input_shape[-1])),
                                      trainable=True)        
        super(ContextGating, self).build(input_shape)  

    def call(self, inputs):        
        gates = K.dot(inputs, self.gating_weights)
        gates += self.gating_biases
        gates = tf.sigmoid(gates)
        activation = tf.multiply(inputs,gates)
        return activation
    def compute_output_shape(self, input_shape):
        return tuple(input_shape)

class NetVLAD(layers.Layer):  
    def __init__(self, feature_size, max_samples, cluster_size, output_dim, **kwargs):        
        self.feature_size = feature_size
        self.max_samples = max_samples
        self.output_dim = output_dim
        self.cluster_size = cluster_size
        super(NetVLAD, self).__init__(**kwargs)

    def build(self, input_shape):
        self.cluster_weights = self.add_weight(name='kernel_W1',
                                      shape=(self.feature_size, self.cluster_size),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.feature_size)),
                                      trainable=True)
        self.cluster_biases = self.add_weight(name='kernel_B1',
                                      shape=(self.cluster_size,),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.feature_size)),
                                      trainable=True)
        self.cluster_weights2 = self.add_weight(name='kernel_W2',
                                      shape=(1,self.feature_size, self.cluster_size),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.feature_size)),
                                      trainable=True)
        self.hidden1_weights = self.add_weight(name='kernel_H1',
                                      shape=(self.cluster_size*self.feature_size, self.output_dim),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.cluster_size)),
                                      trainable=True)        
        super(NetVLAD, self).build(input_shape) 

    def call(self, reshaped_input):
        """Forward pass of a NetVLAD block.
        Args:
        reshaped_input: If your input is in that form:
        'batch_size' x 'max_samples' x 'feature_size'
        It should be reshaped in the following form:
        'batch_size*max_samples' x 'feature_size'
        by performing:
        reshaped_input = tf.reshape(input, [-1, features_size])
        Returns:
        vlad: the pooled vector of size: 'batch_size' x 'output_dim'
        """
        
        activation = K.dot(reshaped_input, self.cluster_weights)        
        activation += self.cluster_biases        
        activation = tf.nn.softmax(activation)
        activation = tf.reshape(activation,
                [-1, self.max_samples, self.cluster_size])
        a_sum = tf.reduce_sum(activation,-2,keep_dims=True)        
        a = tf.multiply(a_sum,self.cluster_weights2)        
        activation = tf.transpose(activation,perm=[0,2,1])
        
        reshaped_input = tf.reshape(reshaped_input,[-1,
            self.max_samples, self.feature_size])
        vlad = tf.matmul(activation,reshaped_input)
        vlad = tf.transpose(vlad,perm=[0,2,1])
        vlad = tf.subtract(vlad,a)
        vlad = tf.nn.l2_normalize(vlad,1)
        vlad = tf.reshape(vlad,[-1, self.cluster_size*self.feature_size])
        vlad = tf.nn.l2_normalize(vlad,1)
        vlad = K.dot(vlad, self.hidden1_weights)
        return vlad

    def compute_output_shape(self, input_shape):
        return tuple([None, self.output_dim])




            