In [39]:
import tensorflow as tf
import numpy as np

In [46]:
class EmbeddingLayer(tf.keras.layers.Layer):
    def __init__(self,user_feat_num,user_field_num,
                 ad_feat_num,ad_field_num,
                 context_feat_num,context_field_num,emb_dim,*args,**kwargs):
        super(EmbeddingLayer,self).__init__(*args,**kwargs)
        self.user_feat_num=user_feat_num
        self.user_field_num=user_field_num
        self.ad_feat_num=ad_feat_num
        self.ad_field_num=ad_field_num
        self.context_feat_num=context_feat_num
        self.context_field_num=context_field_num
        self.emb_dim=emb_dim

        self.user_feat_emb_layer=tf.keras.layers.Embedding(input_dim=self.user_feat_num+1,output_dim=self.emb_dim)
        self.ad_feat_emb_layer=tf.keras.layers.Embedding(input_dim=self.ad_feat_num+1,output_dim=self.emb_dim)
        self.context_feat_emb_layer=tf.keras.layers.Embedding(input_dim=self.context_feat_num+1,output_dim=self.emb_dim)

    def call(self, inputs, **kwargs):
        # user_feat_batch: [batch_size, user_field_num]
        # user_behaviors_batch: [batch_size, seq_len, ad_field_dim]
        # ad_feat_batch: [batch_size, ad_field_dim]
        # context_feat_batch: [batch_size, context_field_num]
        user_feat_batch,user_behaviors_batch,ad_feat_batch,context_feat_batch=inputs

        user_emb=self.user_feat_emb_layer(user_feat_batch) # [batch_size, user_field_num, emb_dim]
        user_emb=tf.reshape(user_emb,shape=[-1,self.user_field_num*self.emb_dim]) # [batch_size, user_field_num*emb_dim]

        user_behaviors_emb=self.ad_feat_emb_layer(user_behaviors_batch) # [batch_size, seq_len, ad_field_dim, emb_dim]
        seq_len=user_behaviors_batch.shape[1]
        user_behaviors_emb=tf.reshape(user_behaviors_emb,shape=[-1, seq_len,self.ad_field_num*self.emb_dim]) # [batch_size, seq_len, ad_field_dim*emb_dim]

        ad_emb=self.ad_feat_emb_layer(ad_feat_batch) # [batch_size, ad_field_num, emb_dim]
        ad_emb=tf.reshape(ad_emb,shape=[-1,self.ad_field_num*self.emb_dim]) # [batch_size, ad_field_num*emb_dim]

        context_emb=self.context_feat_emb_layer(context_feat_batch) # [batch_size, context_field_num, emb_dim]
        context_emb=tf.reshape(context_emb,shape=[-1,self.context_field_num*self.emb_dim]) # [batch_size, context_field_num*emb_dim]

        return (user_emb,user_behaviors_emb,ad_emb,context_emb)

user_feat_num=3
user_field_num=3
ad_feat_num=4
ad_field_num=2
context_feat_num=5
context_field_num=3
emb_dim=6
batch_size=10
seq_len=7

user_feat_batch=np.random.randint(1,1+user_feat_num,size=[batch_size,user_field_num]).astype(np.int32)

user_behaviors=[]
for _ in range(batch_size):
    seq=np.random.choice(range(1,1+ad_feat_num),size=[np.random.randint(1,1+seq_len),2]).astype(np.int32).tolist()
    user_behaviors.append(seq)
user_behaviors=tf.keras.preprocessing.sequence.pad_sequences(user_behaviors,maxlen=seq_len,padding="post")

ad_feat_batch=np.random.randint(1,1+ad_feat_num,size=[batch_size,ad_field_num]).astype(np.int32)
context_feat_batch=np.random.randint(1,1+context_feat_num,size=[batch_size,context_field_num]).astype(np.int32)

emb_layer=EmbeddingLayer(user_feat_num=user_feat_num,
user_field_num=user_field_num,
ad_feat_num=ad_feat_num,
ad_field_num=ad_field_num,
context_feat_num=context_feat_num,
context_field_num=context_field_num,emb_dim=emb_dim)

inputs=(user_feat_batch,user_behaviors,ad_feat_batch,context_feat_batch)
user_emb,user_behaviors_emb,ad_emb,context_emb=emb_layer(inputs)
print("user_emb.shape",user_emb.shape)
print("user_behaviors_emb.shape",user_behaviors_emb.shape)
print("ad_emb.shape",ad_emb.shape)
print("context_emb.shape",context_emb.shape)

user_emb.shape (10, 18)
user_behaviors_emb.shape (10, 7, 12)
ad_emb.shape (10, 12)
context_emb.shape (10, 18)


In [62]:
import math
class PRelu:
    def __init__(self,alpha=0.2):
        self.alpha=alpha

    def __call__(self,x, *args, **kwargs):
        return tf.nn.leaky_relu(x,alpha=self.alpha)

class Dice:
    def __init__(self,epsilon=10e-8,alpha=0.2,decay=0.99):
        self.epsilon=epsilon
        self.alpha=alpha
        self.variable_mean=None
        self.variable_var=None
        self.ema=tf.train.ExponentialMovingAverage(decay=decay)

    def __call__(self,x,training=True, *args, **kwargs):

        mean=tf.reduce_mean(x,axis=0,keepdims=True) # 根据batch计算平均，在在InterestLayer中，结果为[1, seq_len, emb_dim*3]
        var=tf.math.reduce_variance(x,axis=0,keepdims=True) # 根据batch计算平均，在InterestLayer中，结果为[1, seq_len, emb_dim*3]
        # 因为ema只接受variable作为参数，但mean是一个Tensor，所以需要将mean转为Variable
        # 但不可以使用直接使用tf.Variable(mean)，因为该操作会导致每次layer的call函数运行时都创建一个新的Variable，如果这样做的话，ema每次都会去追踪一个新的变量的滑动平均值
        # 在测试过程中，tf.Variable(mean)也是一个新的变量，ema并不知道这个变量的滑动平均值，所以对他的滑动平均就会返回一个None
        # 因此需要在函数外生成唯一的Varaible，每次对其进行assign，保证对于ema来说，输入的变量都是同一个
        if self.variable_mean is None:
            print("initialize variable_mean and variable_var")
            self.variable_mean=tf.Variable(tf.zeros_like(mean))
            self.variable_var=tf.Variable(tf.zeros_like(var))
        if training:
            self.variable_mean.assign(mean)
            self.variable_var.assign(var)
            self.ema.apply([self.variable_mean,self.variable_var])
        else:
            mean=self.ema.average(self.variable_mean)
            var=self.ema.average(self.variable_var)
        p=-tf.divide(x-mean,tf.sqrt(var+self.epsilon)) # 在InterestLayer中，结果为[batch_size, seq_len, emb_dim*3]
        p=tf.divide(1,1+tf.pow(math.e,p)) # 在InterestLayer中，结果为[batch_size, seq_len, emb_dim*3]
        return tf.multiply(p,x)+tf.multiply(tf.multiply(self.alpha,1-p),x)

    # 错误写法
    # def __call__(self,x,training=True, *args, **kwargs):
    #
    #     mean=tf.reduce_mean(x,axis=0,keepdims=True) # 根据batch计算平均，在在InterestLayer中，结果为[1, seq_len, emb_dim*3]
    #     var=tf.math.reduce_variance(x,axis=0,keepdims=True) # 根据batch计算平均，在InterestLayer中，结果为[1, seq_len, emb_dim*3]
    #
    #     print("initialize variable_mean and variable_var")
    #     variable_mean=tf.Variable(mean)
    #     variable_var=tf.Variable(var)
    #     if training:
    #         self.ema.apply([variable_mean,variable_var])
    #     else:
    #         mean=self.ema.average(variable_mean) # 这时会返回None
    #         var=self.ema.average(variable_var)
    #     p=-tf.divide(x-mean,tf.sqrt(var+self.epsilon)) # 在InterestLayer中，结果为[batch_size, seq_len, emb_dim*3]
    #     p=tf.divide(1,1+tf.pow(math.e,p)) # 在InterestLayer中，结果为[batch_size, seq_len, emb_dim*3]
    #     return tf.multiply(p,x)+tf.multiply(tf.multiply(self.alpha,1-p),x)

class InterestLayer(tf.keras.layers.Layer):
    def __init__(self,attention_cells,units_list,*args,**kwargs):
        super(InterestLayer,self).__init__(*args,**kwargs)
        self.dense_layers=[tf.keras.layers.Dense(units=units,activation=None) for units in units_list]
        self.attention_cells=attention_cells
        self.scoring_layer=tf.keras.layers.Dense(units=1,activation=None)

    def call(self, inputs, training=True, **kwargs):
        # user_behaviors: [batch_size, seq_len, ad_field_num]
        # user_behaviors_emb: [batch_size, seq_len, emb_dim]
        # ad_emb: [batch_size, emb_dim]
        user_behaviors,user_behaviors_emb,ad_emb=inputs

        # calc attention scores
        # seq_len=user_behaviors.shape[1]
        # emb_dim=ad_emb.shape[-1]
        # ad_emb=tf.reshape(tf.tile(ad_emb,multiples=[1,seq_len]),shape=[-1,seq_len,emb_dim]) #[batch_size, seq_len, emb_dim]
        ad_emb=tf.broadcast_to(tf.expand_dims(ad_emb,axis=1),shape=user_behaviors_emb.shape) #[batch_size, seq_len, emb_dim]
        out_product=tf.multiply(user_behaviors_emb,ad_emb) #[batch_size, seq_len, emb_dim]

        dense_inputs=tf.concat([user_behaviors_emb,out_product,ad_emb],axis=-1) # [batch_size, seq_len, emb_dim*3]
        for dense_layer,attention_cell in zip(self.dense_layers,self.attention_cells):
            dense_inputs=dense_layer(dense_inputs)
            dense_inputs=attention_cell(dense_inputs,training=training)

        # dense_inputs: [batch_size, seq_len, units]
        attention_scores=self.scoring_layer(dense_inputs) # [batch_size, seq_len, 1]
        interest_emb=tf.multiply(attention_scores,user_behaviors_emb) # [batch_size, seq_len, emb_dim]

        # mask
        mask=tf.expand_dims(tf.where(tf.not_equal(user_behaviors[:,:,0],0),x=1.,y=0.),axis=-1) # [batch_size, seq_len, 1]
        interest_emb=tf.multiply(interest_emb,mask)
        interest_emb=tf.reduce_sum(interest_emb,axis=1) # [batch_size, emb_dim]
        return interest_emb


inputs=(user_behaviors,user_behaviors_emb,ad_emb)
attention_cells=[PRelu()]
units_list=[36]
interest_layer=InterestLayer(attention_cells=attention_cells,units_list=units_list)
interest_emb = interest_layer(inputs)
interest_emb = interest_layer(inputs,training=False)
print(interest_emb.shape)

(10, 12)


In [63]:
class DIN(tf.keras.Model):
    def __init__(self,user_feat_num,user_field_num,
                 ad_feat_num,ad_field_num,
                 context_feat_num,context_field_num,emb_dim,
                 dense_units_list,dense_cells,
                 attention_units_list,attention_cells,
                 *args,**kwargs):
        super(DIN,self).__init__(*args,**kwargs)
        self.emb_layer=EmbeddingLayer(user_feat_num=user_feat_num,user_field_num=user_field_num,
                                      ad_feat_num=ad_feat_num,ad_field_num=ad_field_num,
                                      context_feat_num=context_feat_num,context_field_num=context_field_num,
                                      emb_dim=emb_dim)
        self.interest_layer=InterestLayer(units_list=attention_units_list,attention_cells=attention_cells)
        self.dense_layers=[tf.keras.layers.Dense(units=units,activation=None) for units in dense_units_list]
        self.dense_cells=dense_cells
        self.scoring_layer=tf.keras.layers.Dense(units=2,activation=None)

    def call(self, inputs, training=True, mask=None):
        _,user_behaviors,_,_=inputs
        user_emb,user_behaviors_emb,ad_emb,context_emb=self.emb_layer(inputs)
        interest_emb=self.interest_layer((user_behaviors,user_behaviors_emb,ad_emb),training=training)

        dense_inputs=tf.concat([user_emb,interest_emb,context_emb],axis=1)
        for dense_layer,dense_cell in zip(self.dense_layers,self.dense_cells):
            dense_inputs=dense_layer(dense_inputs)
            dense_inputs=dense_cell(dense_inputs)
        output=self.scoring_layer(dense_inputs)

        return output

user_feat_num=3
user_field_num=3
ad_feat_num=4
ad_field_num=2
context_feat_num=5
context_field_num=3
emb_dim=6
batch_size=10
seq_len=7
user_feat_batch=np.random.randint(1,1+user_feat_num,size=[batch_size,user_field_num]).astype(np.int32)

user_behaviors=[]
for _ in range(batch_size):
    seq=np.random.choice(range(1,1+ad_feat_num),size=[np.random.randint(1,1+seq_len),2]).astype(np.int32).tolist()
    user_behaviors.append(seq)
user_behaviors=tf.keras.preprocessing.sequence.pad_sequences(user_behaviors,maxlen=seq_len,padding="post")

ad_feat_batch=np.random.randint(1,1+ad_feat_num,size=[batch_size,ad_field_num]).astype(np.int32)
context_feat_batch=np.random.randint(1,1+context_feat_num,size=[batch_size,context_field_num]).astype(np.int32)

din=DIN(user_feat_num=user_feat_num,
user_field_num=user_field_num,
ad_feat_num=ad_feat_num,
ad_field_num=ad_field_num,
context_feat_num=context_feat_num,
context_field_num=context_field_num,emb_dim=emb_dim,
        attention_units_list=[36],attention_cells=[Dice()],
        dense_units_list=[200,80],dense_cells=[Dice(),Dice()])

inputs=(user_feat_batch,user_behaviors,ad_feat_batch,context_feat_batch)
output=din(inputs)
output=din(inputs,training=False)
print(output.shape)

initialize variable_mean and variable_var
initialize variable_mean and variable_var
initialize variable_mean and variable_var
(10, 2)
