In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, BatchNormalization, Activation, Dropout, Dense, Embedding
from tensorflow.keras.regularizers import l2
from tensorflow.keras.initializers import zeros, glorot_normal, glorot_uniform

In [2]:
class Linear(Layer):
    
    def __init__(self, use_bias=False, reg_l2=0.0, **kwargs):
        self.use_bias = use_bias
        self.reg_l2 = reg_l2
        super(Linear, self).__init__(**kwargs)
        
    def build(self, input_shape):
        self.dense = Dense(1, use_bias=False, kernel_regularizer=l2(self.reg_l2))
        
        if self.use_bias:
            self.bias = self.add_weight(name='linear_bias', shape=(1,), initializer=zeros())
        super(Linear, self).build(input_shape)
        
    def call(self, inputs):
        # [batch_size * feat_num * emb_size, batch_size * feat_num]
        sparsez_emb, dense_value = inputs
        linear_logit = tf.reduce_sum(sparsez_emb, axis=1) + self.dense(dense_value)
        if self.use_bias:
            linear_logit += self.bias
            
        return linear_logit
    
    def compute_output_shape(self, input_shape):
        return (None, 1)

In [3]:
class FM(Layer):
    
    def __init__(self, **kwargs):
        super(FM, self).__init__(**kwargs)
    
    def build(self, input_shape):
        super(FM, self).build(input_shape)
        
    def call(self, inputs):
        # batch_size * feat_num * emb_size
        embXvalue = inputs
        square_of_sum = tf.square(tf.reduce_sum(embXvalue, axis=1))
        sum_of_square = tf.reduce_sum(embXvalue * embXvalue, axis=1)
        fm_logit = 0.5 * tf.reduce_sum(square_of_sum - sum_of_square, axis=1, keepdims=True)

        return fm_logit
    
    def compute_output_shape(self, input_shape):
        return (None, 1)

In [4]:
class AFMLayer(Layer):
    
    def __init__(self, attention_factor=4, reg_l2=0.0, dropout_rate=0.0, **kwargs):
        self.attention_factor = attention_factor
        self.reg_l2 = reg_l2
        self.dropout_rate = dropout_rate
        super(AFMLayer, self).__init__(**kwargs)
    
    def build(self, input_shape):
        emb_size = input_shape[-1]
        self.attention_W = self.add_weight(name="attention_W", 
                                          shape=(emb_size, self.attention_factor), 
                                          initializer=glorot_normal(), 
                                          regularizer=l2(self.reg_l2))
        self.attention_b = self.add_weight(name="attention_b", 
                                          shape=(self.attention_factor,), 
                                          initializer=zeros())
        self.projection_h = self.add_weight(name="projection_h",
                                           shape=(self.attention_factor, 1),
                                           initializer=glorot_normal())
        self.projection_p = self.add_weight(name="projection_p",
                                           shape=(emb_size, 1),
                                           initializer=glorot_normal())
        self.dropout = Dropout(self.dropout_rate)
        super(AFMLayer, self).build(input_shape)
        
    def call(self, inputs):
        field_size = inputs.shape[1]
        vij = []
        for i in range(field_size):
            for j in range(i+1, field_size):
                vij.append(tf.expand_dims(tf.multiply(inputs[:,i], inputs[:,j]), axis=1))
                
        attention_inputs = tf.concat(vij, axis=1)
        attention_temp = tf.nn.relu(tf.nn.bias_add(
            tf.tensordot(attention_inputs, self.attention_W, axes=(-1,0)),   self.attention_b))
        attention_score = tf.nn.softmax(tf.tensordot(attention_temp, self.projection_h, axes=(-1,0)), axis=1)
        attention_output = tf.reduce_sum(tf.multiply(attention_inputs, attention_score), axis=1)
        attention_output = self.dropout(attention_output)
        
        afm_logit = tf.tensordot(attention_output, self.projection_p, axes=(-1, 0))
        return afm_logit
    
    def compute_output_shape(self, input_shape):
        return (None, 1)

In [5]:
class BiInteractionPooling(Layer):
    
    def __init__(self, **kwargs):
        super(BiInteractionPooling, self).__init__(**kwargs)
    
    def build(self, input_shape):
        super(BiInteractionPooling, self).build(input_shape)
        
    def call(self, inputs):
        # batch_size * feat_num * emb_size
        embXvalue = inputs
        square_of_sum = tf.square(tf.reduce_sum(embXvalue, axis=1))
        sum_of_square = tf.reduce_sum(embXvalue * embXvalue, axis=1)
        cross_term = 0.5 * (square_of_sum - sum_of_square)

        return cross_term
    
    def compute_output_shape(self, input_shape):
        return (None, input_shape[-1])

In [6]:
class FFMLayer(Layer):
    
    def __init__(self, field_size, feat_size, emb_size=4, reg_l2=0.0, **kwargs):
        self.field_size = field_size
        self.feat_size = feat_size
        self.emb_size = emb_size
        self.reg_l2 = reg_l2
        super(FFMLayer, self).__init__(**kwargs)
    
    def build(self, input_shape):
        self.V = {i: Embedding(self.feat_size, self.emb_size, embeddings_regularizer=l2(self.reg_l2)) 
                  for i in range(self.field_size)}
        super(FFMLayer, self).build(input_shape)
        
    def call(self, inputs):
        # batch_size * feat_num
        feat_index = inputs
        ffm_logit = []
        for i in range(self.field_size):
            for j in range(i+1, self.field_size):
                vi_fj = self.V[j](feat_index[:, i])
                vj_fi = self.V[i](feat_index[:, j])
                w = tf.reduce_sum(tf.multiply(vi_fj, vj_fi), axis=-1, keepdims=True)
                ffm_logit.append(w)

        ffm_logit = tf.reduce_sum(ffm_logit, axis=0)
        return ffm_logit
    
    def compute_output_shape(self, input_shape):
        return (None, 1)

In [7]:
class DNN(Layer):
    
    def __init__(self, hidden_units=[128, 128], reg_l2=0.0, dropout_rate=0.0, use_bn=False, **kwargs):
        self.hidden_units = hidden_units
        self.reg_l2 = reg_l2
        self.dropout_rate = dropout_rate
        self.use_bn = use_bn
        super(DNN, self).__init__(**kwargs)
    
    def build(self, input_shape):
        input_size = input_shape[-1]
        hidden_units = [input_size] + self.hidden_units
        self.kernels = [self.add_weight(name='kernel'+str(i),
                                        shape=(hidden_units[i], hidden_units[i+1]),
                                        initializer=glorot_uniform(),
                                        regularizer=l2(self.reg_l2),
                                       ) for i in range(len(self.hidden_units))]
        self.bias = [self.add_weight(name='bias'+str(i),
                                        shape=(self.hidden_units[i],),
                                        initializer=zeros(),
                                       ) for i in range(len(self.hidden_units))]
        if self.use_bn:
            self.bn_layers = [BatchNormalization() for _ in range(len(self.hidden_units))]
        self.activation_layers = [Activation('relu') for _ in range(len(self.hidden_units))]
        self.dropout_layers = [Dropout(self.dropout_rate) for _ in range(len(self.hidden_units))]
        
        super(DNN, self).build(input_shape)
        
    def call(self, inputs):
        # batch_size * feat_num
        for i in range(len(self.hidden_units)):
            dnn_output = tf.nn.bias_add(tf.matmul(inputs, self.kernels[i]), self.bias[i])
            if self.use_bn:
                dnn_output = self.bn_layers[i](dnn_output)
            dnn_output = self.activation_layers[i](dnn_output)
            dnn_output = self.dropout_layers[i](dnn_output)
            inputs = dnn_output
            
        return dnn_output
    
    def compute_output_shape(self, input_shape):
        shape = input_shape[:-1] + self.hidden_units[-1]
        return shape

In [8]:
class CrossNet(Layer):
    
    def __init__(self, layer_num=2, reg_l2=0.0, **kwargs):
        self.layer_num = layer_num
        self.reg_l2 = reg_l2
        super(CrossNet, self).__init__(**kwargs)
    
    def build(self, input_shape):
        input_size = input_shape[-1]
        self.kernels = [self.add_weight(name='kernel'+str(i),
                                       shape=(input_size, 1),
                                       initializer=glorot_uniform(),
                                       regularizer=l2(self.reg_l2),
                                       ) for i in range(self.layer_num)]
        self.bias = [self.add_weight(name='bias'+str(i),
                                    shape=(input_size, 1),
                                    initializer=zeros(),
                                    ) for i in range(self.layer_num)]
        super(CrossNet, self).build(input_shape)
        
    def call(self, inputs):
        # batch_size * feat_num
        x_0 = tf.expand_dims(inputs, axis=-1)
        x_l = x_0
        for i in range(self.layer_num):
            dot = tf.matmul(x_0, tf.matmul(tf.transpose(x_l, perm=[0,2,1]), self.kernels[i]))
            x_l = dot + self.bias[i] + x_l
        
        output = tf.squeeze(x_l, axis=-1)
        return output
    
    def compute_output_shape(self, input_shape):
        return input_shape

In [32]:
class CIN(Layer):

    def __init__(self, layer_size=[128, 128], activation='relu', split_half=True, reg_l2=1e-5, **kwargs):
        self.layer_size = layer_size
        self.split_half = split_half
        self.activation = activation
        self.reg_l2 = reg_l2
        super(CIN, self).__init__(**kwargs)

    def build(self, input_shape):
        self.field_nums = [int(input_shape[1])]
        self.filters = []
        self.bias = []
        for i, size in enumerate(self.layer_size):

            self.filters.append(self.add_weight(name='filter' + str(i),
                                                shape=[1, self.field_nums[-1] * self.field_nums[0], size],
                                                dtype=tf.float32, 
                                                initializer=glorot_uniform(),
                                                regularizer=l2(self.reg_l2)))

            self.bias.append(self.add_weight(name='bias' + str(i), 
                                             shape=[size], 
                                             dtype=tf.float32,
                                             initializer=zeros()))

            if self.split_half:
                self.field_nums.append(size // 2)
            else:
                self.field_nums.append(size)

        self.activation_layers = [Activation(self.activation) for _ in self.layer_size]

        super(CIN, self).build(input_shape)

    def call(self, inputs):

        dim = int(inputs.get_shape()[-1])
        hidden_nn_layers = [inputs]
        final_result = []

        split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2)
        for idx, layer_size in enumerate(self.layer_size):
            split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2)

            dot_result_m = tf.matmul(split_tensor0, split_tensor, transpose_b=True)

            dot_result_o = tf.reshape(dot_result_m, shape=[dim, -1, self.field_nums[0] * self.field_nums[idx]])

            dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2])

            curr_out = tf.nn.conv1d(dot_result, filters=self.filters[idx], stride=1, padding='VALID')

            curr_out = tf.nn.bias_add(curr_out, self.bias[idx])

            curr_out = self.activation_layers[idx](curr_out)

            curr_out = tf.transpose(curr_out, perm=[0, 2, 1])

            if self.split_half:
                if idx != len(self.layer_size) - 1:
                    next_hidden, direct_connect = tf.split(
                        curr_out, 2 * [layer_size // 2], 1)
                else:
                    direct_connect = curr_out
                    next_hidden = 0
            else:
                direct_connect = curr_out
                next_hidden = curr_out

            final_result.append(direct_connect)
            hidden_nn_layers.append(next_hidden)

        result = tf.concat(final_result, axis=1)
        result = tf.reduce_sum(result, -1)

        return result

    def compute_output_shape(self, input_shape):
        if self.split_half:
            featuremap_num = sum(self.layer_size[:-1]) // 2 + self.layer_size[-1]
        else:
            featuremap_num = sum(self.layer_size)
        return (None, featuremap_num)