In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as layer
from sklearn.preprocessing import LabelEncoder
# from tensorflow.keras.models import Model
from tensorflow.keras import Model
import tensorflow.keras.backend as K
from sklearn.model_selection import StratifiedKFold

In [2]:
train = pd.read_csv('./data/criteo_sampled_data.csv')
train.head()

Unnamed: 0,label,I1,I2,I3,I4,I5,I6,I7,I8,I9,...,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26
0,0,1.0,1,5.0,0.0,1382.0,4.0,15.0,2.0,181.0,...,e5ba7672,f54016b9,21ddcdc9,b1252a9d,07b5194c,,3a171ecb,c5c50484,e8b83407,9727dd16
1,0,2.0,0,44.0,1.0,102.0,8.0,2.0,2.0,4.0,...,07c540c4,b04e4670,21ddcdc9,5840adea,60f6221e,,3a171ecb,43f13e8b,e8b83407,731c3655
2,0,2.0,0,1.0,14.0,767.0,89.0,4.0,2.0,245.0,...,8efede7f,3412118d,,,e587c466,ad3062eb,3a171ecb,3b183c5c,,
3,0,,893,,,4392.0,,0.0,0.0,0.0,...,1e88c74f,74ef3502,,,6b3a5ca6,,3a171ecb,9117a34a,,
4,0,3.0,-1,,0.0,2.0,0.0,3.0,0.0,0.0,...,1e88c74f,26b3c7a7,,,21c9516a,,32c7478e,b34f3128,,


In [3]:
# train.info()

In [4]:
cols = train.columns[1:]

In [5]:
dense_feats = [f for f in cols if f[0] == 'I']
sparse_feats = [f for f in cols if f[0] == 'C']

In [6]:
def process_dense_feats(data, feats):
    d = data.copy()
    d = d[feats].fillna(0)
    for f in feats:
        d[f] = d[f].apply(lambda x: np.log(x+1) if x>-1 else -1)
    return d
data_dense = process_dense_feats(train, dense_feats)
    

In [7]:
def process_sparse_feats(data, feats):
    d = data.copy()
    d = d[feats].fillna('-1')
    for f in feats:
        d[f] = LabelEncoder().fit_transform(d[f])
    return d
data_sparse = process_sparse_feats(train, sparse_feats)

In [8]:
total_data = pd.concat([data_dense, data_sparse], axis=1)
total_data['label'] = train['label']

In [9]:
# 如果你只是想对流经该层的数据做个变换，而这个变换本身没有什么需要学习的参数，那么直接用Lambda Layer是最合适的了

In [10]:
# 获取类别型特征的大小
sparse_feat_config= dict()
for col in sparse_feats:
    sparse_feat_config[col] = total_data[col].nunique()

In [11]:
# 构造验证集和训练集
train_data = total_data.loc[:500000-1]
valid_data = total_data.loc[500000:]

train_dense_x = [train_data[f].values for f in dense_feats]#  train_data[dense_feats] 
train_sparse_x = [train_data[f].values for f in sparse_feats] # train_data[sparse_feats] # 
train_label = train_data['label'].values
train_label = tf.cast(train_label, tf.int32)

val_dense_x = [valid_data[f].values for f in dense_feats] # valid_data[dense_feats]   
val_sparse_x = [valid_data[f].values for f in sparse_feats] # valid_data[sparse_feats]
val_label = valid_data['label'].values
val_label = tf.cast(val_label, tf.int32)


In [12]:
# 构造训练集和测试集
def make_data(total_data,idx):
    train_data = total_data.loc[idx,:]
    train_dense_x = [train_data[f].values for f in dense_feats]
    train_sparse_x = [train_data[f].values for f in sparse_feats]
    train_label = train_data['label'].values
    return train_sparse_x,train_dense_x,train_label

# 写法一
继承layer,定义不同功能的层

In [13]:
# 独立层：嵌入层
class Embedding_dense(tf.keras.layers.Layer):
    def __init__(self,sparse_feat_config, embeding_shape):
        super(Embedding_dense, self).__init__()
        # l2正则化
        self.reg_1 = tf.keras.regularizers.l2(0.1)
        self.embed_first = {}
        self.sparse_feat_config = sparse_feat_config
        self.embeding_shape = embeding_shape
        self.sparse_feat = list(sparse_feat_config.keys())
        for key, value in self.sparse_feat_config.items():
            self.embed_first[key] = layer.Embedding(value+1,self.embeding_shape, 
                                                    embeddings_regularizer=self.reg_1, 
                                                    name='embed'+key)
    def call(self,x_sparse):
        embed_lookup_first = []
        for i,key in enumerate(self.sparse_feat):

            _embed = self.embed_first[key](x_sparse[i])

            embed_lookup_first.append(_embed)

        return embed_lookup_first
    
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'sparse_feat_config': self.sparse_feat_config,
            'embeding_shape': self.embeding_shape,
        })
        return config
# t = Embedding_dense(sparse_feat_config,1)
# y = t(inputs_sparse)    

In [15]:
# 独立层：一阶交叉
class firsr_cross_dense(tf.keras.layers.Layer):
    def __init__(self,sparse_feat_config):
        super(firsr_cross_dense, self).__init__()
        self.sparse_feat_config = sparse_feat_config
        self.sparse_feat_embedding = Embedding_dense(self.sparse_feat_config,1)
        self.dense_out_first = layer.Dense(1,name='dense_feat_first')
    
    def call(self,x_sparse,x_dense):
        embed_lookup_first = self.sparse_feat_embedding(x_sparse)

        fst_order_sparse_layer  = layer.Add()(embed_lookup_first)  
        # dense 处理
        fst_order_dense_layer = layer.Concatenate(axis=1)(x_dense)  
        fst_order_dense_layer  = self.dense_out_first(fst_order_dense_layer)
        # 合并，产生最终的一阶特征处理结果
        linear_part = layer.Add()([fst_order_sparse_layer,fst_order_dense_layer])
        linear_part = layer.Flatten()(linear_part)
        return linear_part

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'sparse_feat_config': self.sparse_feat_config,
        })
        return config    
# firsr_cross_dense(sparse_feat_config)(train_sparse_x,train_dense_x)    

In [16]:
# 独立层：二阶交叉
class second_cross_dense(tf.keras.layers.Layer):
    def __init__(self):
        super(second_cross_dense, self).__init__()

    def call(self,embed_lookup_second):
        # 连接结果(None,n,k) n为特征数，k为嵌入维度
        concat_sparse_kd_embed = layer.Concatenate(axis=1)(embed_lookup_second) 
        
        # 先求和在平方
        # sum_1 = K.sum(concat_sparse_kd_embed,axis=1,keepdims=True)
        # 这样写不是层的概念，因此我们借助lambda层实现
        sum_kd_embed = layer.Lambda(lambda x: K.sum(x, axis=1))(concat_sparse_kd_embed)
        square_sum_kd_embed = layer.Multiply()([sum_kd_embed,sum_kd_embed])
        
        # 先求平方在求和
        square_kd_embed = layer.Multiply()([concat_sparse_kd_embed,concat_sparse_kd_embed])
        sum_square_kd_embed = layer.Lambda(lambda x: K.sum(x,axis=1))(square_kd_embed)
        
        # 0.5*sum(subtract)
        sub = layer.Subtract()([square_sum_kd_embed,sum_square_kd_embed])
        sub = layer.Lambda(lambda x: K.sum(x,axis=1,keepdims=True))(sub)
        snd_order_sparse_layer = layer.Lambda(lambda x: x*0.5)(sub)
        
        return snd_order_sparse_layer

# second_cross_dense()(embed_lookup_second)    

In [18]:
# 独立层：DNN
class DNN(tf.keras.layers.Layer):
    def __init__(self,dnn_config=[128,64,1]):
        super(DNN, self).__init__()
        self.dnn_layers_config = dnn_config
        self.dnn_layers = []
        for s in self.dnn_layers_config:
            self.dnn_layers.append(layer.Dense(s))
            
    def call(self,embed_lookup_second):
        embed_lookup_second = layer.Concatenate(axis=1)(embed_lookup_second) 
        fc_layer = layer.Flatten()(embed_lookup_second)
        for i,_ in enumerate(self.dnn_layers):
            fc_layer = self.dnn_layers[i](fc_layer)
        
        return fc_layer
    
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'dnn_layers_config': self.dnn_layers_config,
        })
        return config

In [23]:
class DeepFM:
    def __init__(self,sparse_feat_config,dense_feats):
        
        self.sparse_feat_config= sparse_feat_config
        self.inputs_sparse, self.inputs_dense = self.build_input(sparse_feat_config,dense_feats)
        
        self.firsr_cross_dense = firsr_cross_dense(self.sparse_feat_config)
        self.Embedding_dense = Embedding_dense(self.sparse_feat_config,8)
        self.second_cross_dense = second_cross_dense()
        self.DNN = DNN([128,64,1])
        
        self.deepFm =  self.build_model() 
    def build_input(self,sparse_feat_config,dense_feats):
        inputs_sparse = []
        inputs_dense = []
        for key in sparse_feat_config:
            inputs_sparse.append(layer.Input(shape=(1,),name=key))
        for key in dense_feats:
            inputs_dense.append(layer.Input(shape=(1,),name=key))
        
        return inputs_sparse, inputs_dense

    def build_model(self):
        linear_part = self.firsr_cross_dense(self.inputs_sparse, self.inputs_dense) 
        embed_lookup =  self.Embedding_dense(self.inputs_sparse) 
        snd_order_sparse_layer = self.second_cross_dense(embed_lookup)
        fc_layer = self.DNN(embed_lookup)
        
        output_layer = layer.Add()([linear_part, snd_order_sparse_layer, fc_layer])
        output_layer = layer.Activation('sigmoid')(output_layer)
        model = Model(self.inputs_sparse + self.inputs_dense, outputs=output_layer)
        model.compile(optimizer = tf.keras.optimizers.RMSprop(learning_rate=1e-3),
                      loss= 'binary_crossentropy',
                      metrics=['AUC'])
        return model
    

    def train(self,train_data,train_label,valid_data, valid_label,batch_size,epochs,callbacks):
        self.deepFm.fit(train_data,train_label,
                  batch_size=batch_size, epochs=epochs, verbose=1, 
                  validation_data=(valid_data, valid_label),
                  callbacks = callbacks
                 )


In [25]:
# 五折交叉 + 提前停止 + 保存模型
skf = StratifiedKFold(n_splits = 5, random_state=1996, shuffle = True)
for idx, (train_idx, val_idx) in enumerate(skf.split(total_data,total_data['label'])):
    print('fold:',idx)
    K.clear_session()
    train_sparse_x,train_dense_x,train_label = make_data(total_data,train_idx)
    val_sparse_x,val_dense_x,val_label = make_data(total_data,val_idx) 
    # 定义回调
    
    # 保存模型
    file_path = f'./model/{idx}.h5'

    checkpoint = tf.keras.callbacks.ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True,save_weights_only=True, mode='min')
    # metric 不提高时，减小学习率
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=1, min_lr=0.0001, verbose=1)
    # val_loss 连续两次提升小于 1e-2，提前停止
    earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=2,verbose=1, mode='auto')
    callbacks = [checkpoint, reduce_lr, earlystopping]

    # 初始化模型
    deepfm = DeepFM(sparse_feat_config, dense_feats)
    deepfm.train(train_sparse_x+train_dense_x,train_label,
                 val_sparse_x+val_dense_x,val_label,
                128,1, callbacks=callbacks)    

fold: 0


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 00001: val_loss improved from inf to 0.70909, saving model to ./model/0.h5
fold: 1
Epoch 00001: val_loss improved from inf to 0.71248, saving model to ./model/1.h5
fold: 2
Epoch 00001: val_loss improved from inf to 0.71068, saving model to ./model/2.h5
fold: 3
Epoch 00001: val_loss improved from inf to 0.71336, saving model to ./model/3.h5
fold: 4
Epoch 00001: val_loss improved from inf to 0.71458, saving model to ./model/4.h5


In [22]:
# 模型的加载与预测
deepfm = DeepFM(sparse_feat_config, dense_feats).build_model()
deepfm.load_weights('./model/0.h5')
deepfm.predict(val_sparse_x+val_dense_x)

array([[0.33034733],
       [0.31418633],
       [0.24944091],
       ...,
       [0.75203234],
       [0.83707786],
       [0.73838204]], dtype=float32)

# 写法二
直接继承Model

In [12]:
class DeepFM(Model):
    def __init__(self, sparse_feat_config):
        super(DeepFM, self).__init__()
        # 一阶交叉:包括dense_feat和sparse_feat
        self.reg_1 = tf.keras.regularizers.l2(0.5)
        self.embed_first = []
#         self.embed_lookup_first = []
        self.sparse_feat_config = sparse_feat_config
        for key, value in self.sparse_feat_config.items():
            self.embed_first.append(layer.Embedding(value,1, 
                                                    embeddings_regularizer=self.reg_1, 
                                                    name='embed'+key))
        self.dense_out_first = layer.Dense(1,name='dense_feat_first')
        
        # 二阶交叉：只对sparse交叉
        self.embed_second = []
#         self.embed_lookup_second = []
        self.reg_2 = tf.keras.regularizers.l2(0.5)
        for key, value in self.sparse_feat_config.items():
            self.embed_second.append(layer.Embedding(value,8,
                                     embeddings_regularizer = self.reg_2,
                                     name = 'embed' + key))
            
        
        # DNN部分
        self.dnn_layers_out = [128,64,1]
        self.dnn_layers = []
        for s in self.dnn_layers_out:
            self.dnn_layers.append(layer.Dense(s))
        
        
    def call(self, x_sparse,x_dense):
        # 一阶交叉，最终linear_part(None,1)
        # sparse处理
        embed_lookup_first = []
        for i,key in enumerate(self.sparse_feat_config):
            t = tf.cast(x_sparse[key],tf.int32)
            _embed = self.embed_first[i](t)
            embed_lookup_first.append(_embed)
        fst_order_sparse_layer  = layer.Add()(embed_lookup_first)
        # dense 处理
        fst_order_dense_layer  = self.dense_out_first(tf.cast(x_dense, tf.float32))
        # 合并，产生最终的一阶特征处理结果
        linear_part = layer.Add()([fst_order_sparse_layer,fst_order_dense_layer])
        
        # 二阶交叉,最终snd_order_sparse_layer(None,1)
        embed_lookup_second = []
        for i,key in enumerate(self.sparse_feat_config):
            t = tf.cast(x_sparse[key], tf.int32)
            _embed = self.embed_second[i](t)
            # _embed:(None,8) -> (None,1,8)
            embed_lookup_second.append(tf.expand_dims(_embed,axis=1))
        # wrong
#         print(len(embed_lookup_second))
        # 连接结果(None,n,k) n为特征数，k为嵌入维度
        concat_sparse_kd_embed = layer.Concatenate(axis=1)(embed_lookup_second) 
#         print(concat_sparse_kd_embed.shape)
        
        # 先求和在平方
        # sum_1 = K.sum(concat_sparse_kd_embed,axis=1,keepdims=True)
        # 这样写不是层的概念，因此我们借助lambda层实现
        sum_kd_embed = layer.Lambda(lambda x: K.sum(x, axis=1))(concat_sparse_kd_embed)
        square_sum_kd_embed = layer.Multiply()([sum_kd_embed,sum_kd_embed])
        
        # 先求平方在求和
        square_kd_embed = layer.Multiply()([concat_sparse_kd_embed,concat_sparse_kd_embed])
        sum_square_kd_embed = layer.Lambda(lambda x: K.sum(x,axis=1))(square_kd_embed)
        
        # 0.5*sum(subtract)
        sub = layer.Subtract()([square_sum_kd_embed,sum_square_kd_embed])
        sub = layer.Lambda(lambda x: K.sum(x,axis=1,keepdims=True))(sub)
        snd_order_sparse_layer = layer.Lambda(lambda x: x*0.5)(sub)
       
        # DNN部分: 最终fc_layer(None,1)
#         print(concat_sparse_kd_embed.shape)
        fc_layer = layer.Flatten()(concat_sparse_kd_embed)
#         fc_layer = flatten_sparse_embed
        for i,_ in enumerate(self.dnn_layers):
            fc_layer = self.dnn_layers[i](fc_layer)
        
        # 输出
        output_layer = layer.Add()([linear_part, snd_order_sparse_layer, fc_layer])
        output_layer = layer.Activation('sigmoid')(output_layer)
#         print(linear_part[:5], snd_order_sparse_layer[:5], fc_layer[:5])
#         print(output_layer)

        return output_layer

In [14]:
num_epoch = 1
optimizer  = tf.keras.optimizers.SGD(learning_rate = 1e-3)
loss_fn = tf.keras.losses.BinaryCrossentropy() 
deepfm = DeepFM(sparse_feat_config)
batch_size = 25600
for e in range(num_epoch):
    loss = []
    for i in range(len(train_data)//batch_size ):
        with tf.GradientTape() as tape:
            beging_i = batch_size*i
            end_i = batch_size*(i+1)
#             print(beging_i,end_i)
#             print(len(train_sparse_x.loc[beging_i:end_i,:]))
            y_pred =  deepfm(train_sparse_x.loc[beging_i:end_i-1,:], train_dense_x.loc[beging_i:end_i-1,:])
#             print(train_label[beging_i:end_i],y_pred)
            _loss = loss_fn(train_label[beging_i:end_i],y_pred)
        loss.append(_loss)
        grads = tape.gradient(loss,deepfm.trainable_variables)   
        optimizer.apply_gradients(zip(grads,deepfm.trainable_variables))
#     tt = 
#     print(loss)
    y_pred_val =  deepfm(val_sparse_x, val_dense_x)
    val_loss = loss_fn(val_label,y_pred_val)
    print('train loss',tf.reduce_mean(loss).numpy(),'valid loss',val_loss.numpy())

train loss 0.9574868 valid loss 0.94290245
