In [1]:

import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as nn

from layer import attention, dice, AUGRU
from utils import sequence_mask

array([0, 1])

In [None]:
class Base(tf.keras.Model):
    #
    def __init__(self, user_count, item_count, cate_count, cate_list,
                       user_dim, item_dim, cate_dim,
                       dim_layers):
        """
        user_count: user 数量
        item_count：商品数量
        cate_count：商品类别数量
        cate_list：商品类别列表
        user_dim，item_dim，cate_dim：用户，商品，商品类别的嵌入维度
        """
        super(Base, self).__init__()
        self.item_dim = item_dim
        self.cate_dim = cate_dim
        
        # 执行嵌入操作
        self.user_emb = nn.Embedding(user_count, user_dim)
        self.item_emb = nn.Embedding(item_count, item_dim)
        self.cate_emb = nn.Embedding(cate_count, cate_dim)
        
        # 初始化变量
        self.item_bias= tf.Variable(tf.zeros([item_count]), trainable=True)
        self.cate_list = cate_list
        
        # 初始化一些工具层
        self.hist_bn = nn.BatchNormalization()
        self.hist_fc = nn.Dense(item_dim+cate_dim)
        
        # 初始化一些全连接层，将embedding的结果直接传入全连接层。直接调用self.fc(x)即可
        self.fc = tf.keras.Sequential()
        self.fc.add(nn.BatchNormalization())
        for dim_layer in dim_layers[:-1]:
            self.fc.add(nn.Dense(dim_layer, activation='sigmoid'))
        self.fc.add(nn.Dense(dim_layers[-1], activation=None))

    # 生成各种embedding
    def get_emb(self, user, item, history):
        # 假设每个user的history序列长度均为 len_his
        # 根据user=[2,1,4]获取对应的嵌入结果。
        user_emb = self.user_emb(user)  # (None,user_dim)
        
        # 根据 item=[3,4,5]获取对应的嵌入结果。
        item_emb = self.item_emb(item)    # (None,item_dim)
        
        # tf.gather() 根据索引，获取item对应的类别，并得到其embedding结果
        item_cate_emb = self.cate_emb(tf.gather(self.cate_list, item)) # (None,cate_dim)
        item_join_emb = tf.concat([item_emb, item_cate_emb], -1)  # 合并商品和类别embedding (None, item_dim + cate_dim)
        
        # 获取item 对应的item_bias
        item_bias= tf.gather(self.item_bias, item)
    
        # 取出history(np.array格式)中的所有的item_id 对应的embedding 
        hist_emb = self.item_emb(history)     #  (None,len_his,item_dim)
        
         # tf.gather() 根据索引，获取 history中的item 对应的类别，并得到其embedding结果
        hist_cate_emb = self.cate_emb(tf.gather(self.cate_list, history))    #  (None,len_his,cate_dim)
        hist_join_emb = tf.concat([hist_emb, hist_cate_emb], -1)    # (None,len_his,cate_dim+item_dim)

        return user_emb, item_join_emb, item_bias, hist_join_emb

    def call(self, user, item, history, length):
        """
        user, item: 用户，商品id
        history: 用户的交互序列
        length: [4.7,9,6,....,]一共batch_size个，记录用户history的有效长度(history个数)
        """
        
        # 通过传入参数，获取某一个batch的embedding 结果
        user_emb, item_join_emb, item_bias, hist_join_emb = self.get_emb(user, item, history)

        # 因为每个user的history有效长度不一样，因此我们需要通过mask机制，取出有效部分
        # 为了方便理解，假定max(length)=20,即history的序列长度也为20
        # length[0]=4,所以tf.sequence_mask对第一个user 生成[1,1,1,1,0,0...0]（共20个元素）
        hist_mask = tf.sequence_mask(length, max(length), dtype=tf.float32)     # (None,20)
        # tf.tile对指定维度进行复制
        hist_mask = tf.tile(tf.expand_dims(hist_mask, -1), (1,1,self.item_dim+self.cate_dim))  # (None,20,item_dim+cate_dim)
        
        # 通过 tf.math.multiply 将无效的部分掩盖（与0相乘）
        hist_join_emb = tf.math.multiply(hist_join_emb, hist_mask) # (None,20,item_dim+cate_dim)
        
        # 以上完成嵌入，以下(1)(2)进行sumpooling
        #（1） sum
        hist_join_emb = tf.reduce_sum(hist_join_emb, 1) # (None,item_dim+cate_dim)
        
        # （2）求均值，将用户求和的结果/对应的序列长度
        hist_join_emb = tf.math.divide(hist_join_emb, tf.cast(tf.tile(tf.expand_dims(length, -1),
                                                      [1,self.item_dim+self.cate_dim]), tf.float32))   # (None,item_dim + cate_dim)

        # 将sumpooling的结果BatchNormalization，然后丢入全连接层，得到最终的hist_hid_emb：shape = # (None,item_dim+cate_dim)
        hist_hid_emb = self.hist_fc(self.hist_bn(hist_join_emb))
        
        # 将所有的embedding结果concat得到 join_emb
        join_emb = tf.concat([user_emb, item_join_emb, hist_hid_emb], -1) # (None,user_dim+ item_dim+cate_dim+ item_dim+cate_dim)

        # 将join_emb传入全连接层
        output = tf.squeeze(self.fc(join_emb)) + item_bias
        logit = tf.keras.activations.sigmoid(output)

        return output, logit

In [None]:
# 关键在于attention的实现
class attention(tf.keras.layers.Layer):
    def __init__(self, keys_dim, dim_layers):
        super(attention, self).__init__()
        self.keys_dim = keys_dim

        self.fc = tf.keras.Sequential()
        for dim_layer in dim_layers[:-1]:
            self.fc.add(nn.Dense(dim_layer, activation='sigmoid'))
        self.fc.add(nn.Dense(dim_layers[-1], activation=None))

    def call(self, queries, keys, keys_length):
        queries = tf.tile(tf.expand_dims(queries, 1), [1, tf.shape(keys)[1], 1])
        # outer product ?
        din_all = tf.concat([queries, keys, queries-keys, queries*keys], axis=-1)
        outputs = tf.transpose(self.fc(din_all), [0,2,1])

        # Mask
        key_masks = tf.sequence_mask(keys_length, max(keys_length), dtype=tf.bool)  # [B, T]
        key_masks = tf.expand_dims(key_masks, 1)
        paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)
        outputs = tf.where(key_masks, outputs, paddings)  # [B, 1, T]

        # Scale
        outputs = outputs / (self.keys_dim ** 0.5)

        # Activation
        outputs = tf.keras.activations.softmax(outputs, -1)  # [B, 1, T]

        # Weighted sum
        outputs = tf.squeeze(tf.matmul(outputs, keys))  # [B, H]

        return outputs


In [None]:
class DIN(Base):
    def __init__(self, user_count, item_count, cate_count, cate_list,
                       user_dim, item_dim, cate_dim,
                       dim_layers):
        super(DIN, self).__init__(user_count, item_count, cate_count, cate_list,
                                  user_dim, item_dim, cate_dim,
                                  dim_layers)

        self.hist_at = attention(item_dim+cate_dim, dim_layers)

        self.fc = tf.keras.Sequential()
        self.fc.add(nn.BatchNormalization())
        for dim_layer in dim_layers[:-1]:
            self.fc.add(nn.Dense(dim_layer, activation=None))
            self.fc.add(dice(dim_layer))
        self.fc.add(nn.Dense(dim_layers[-1], activation=None))

    def call(self, user, item, history, length):
        user_emb, item_join_emb, item_bias, hist_join_emb = self.get_emb(user, item, history)

        hist_attn_emb = self.hist_at(item_join_emb, hist_join_emb, length)
        hist_attn_emb = self.hist_fc(self.hist_bn(hist_attn_emb))

        join_emb = tf.concat([user_emb, item_join_emb, hist_attn_emb], -1)

        output = tf.squeeze(self.fc(join_emb)) + item_bias
        logit = tf.keras.activations.sigmoid(output)

        return output, logit