In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import *
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from google.colab import drive
drive.mount("/content/drive/")

Mounted at /content/drive/


In [3]:
cd /content/drive/MyDrive/'Colab Notebooks'/DeepFM

/content/drive/MyDrive/Colab Notebooks/DeepFM


In [4]:
ls

criteo_sampled_data.csv  xDeepFM.ipynb


# Read in the dataset

In [5]:
data = pd.read_csv('criteo_sampled_data.csv')

In [6]:
data.head()

Unnamed: 0,label,I1,I2,I3,I4,I5,I6,I7,I8,I9,...,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26
0,0,1.0,1,5.0,0.0,1382.0,4.0,15.0,2.0,181.0,...,e5ba7672,f54016b9,21ddcdc9,b1252a9d,07b5194c,,3a171ecb,c5c50484,e8b83407,9727dd16
1,0,2.0,0,44.0,1.0,102.0,8.0,2.0,2.0,4.0,...,07c540c4,b04e4670,21ddcdc9,5840adea,60f6221e,,3a171ecb,43f13e8b,e8b83407,731c3655
2,0,2.0,0,1.0,14.0,767.0,89.0,4.0,2.0,245.0,...,8efede7f,3412118d,,,e587c466,ad3062eb,3a171ecb,3b183c5c,,
3,0,,893,,,4392.0,,0.0,0.0,0.0,...,1e88c74f,74ef3502,,,6b3a5ca6,,3a171ecb,9117a34a,,
4,0,3.0,-1,,0.0,2.0,0.0,3.0,0.0,0.0,...,1e88c74f,26b3c7a7,,,21c9516a,,32c7478e,b34f3128,,


In [7]:
cols = data.columns.values

# Data Preprocessing

## Define dense feature set, and sparse feature set. 

In [8]:
dense_feats = [f for f in cols if f[0] == "I"]
sparse_feats = [f for f in cols if f[0] == "C"]

In [None]:
sparse_feats

## Process the dense feature set

In [10]:
def process_dense_feats(data, feats):
    d = data.copy()
    d = d[feats].fillna(0.0)
    for f in feats:
        d[f] = d[f].apply(lambda x: np.log(x+1) if x > -1 else -1)
    
    return d

In [11]:
data_dense = process_dense_feats(data, dense_feats)

In [12]:
data_dense.head()

Unnamed: 0,I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13
0,0.693147,0.693147,1.791759,0.0,7.23201,1.609438,2.772589,1.098612,5.204007,0.693147,1.098612,0.0,1.098612
1,1.098612,0.0,3.806662,0.693147,4.634729,2.197225,1.098612,1.098612,1.609438,0.693147,0.693147,0.0,1.609438
2,1.098612,0.0,0.693147,2.70805,6.64379,4.49981,1.609438,1.098612,5.505332,0.693147,1.386294,1.386294,3.828641
3,0.0,6.795706,0.0,0.0,8.387768,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.386294,-1.0,0.0,0.0,1.098612,0.0,1.386294,0.0,0.0,0.693147,0.693147,0.0,0.0


## Process the sparse feature set

In [13]:
from sklearn.preprocessing import LabelEncoder

In [14]:
def process_sparse_feats(data, feats):
    d = data.copy()
    d = d[feats].fillna("-1")
    for f in feats:
        label_encoder = LabelEncoder()
        d[f] = label_encoder.fit_transform(d[f])
        
    return d

In [15]:
data_sparse = process_sparse_feats(data, sparse_feats)

In [19]:
data_sparse

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,...,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26
0,470,261,203952,41641,38,6,8961,63,2,16515,...,9,3439,213,3,4954,0,3,24768,52,14364
1,470,498,90258,22218,38,13,5957,19,2,4195,...,0,2465,213,1,60664,0,3,8432,52,10835
2,170,24,2223,65253,38,6,8067,19,2,5767,...,6,738,0,0,143786,9,3,7344,0,0
3,470,93,137623,15635,38,13,1935,19,2,23623,...,1,1648,0,0,67107,0,3,18107,0,0
4,612,368,162265,83638,38,2,7067,19,2,8071,...,1,556,0,0,21257,0,2,22439,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
599995,23,66,172718,61471,38,0,577,196,2,11967,...,9,812,213,2,60254,0,2,30065,1,6169
599996,23,120,160098,68483,38,6,7567,7,2,12556,...,8,1194,0,0,39464,0,5,10799,0,0
599997,470,40,170810,7850,95,13,6953,19,2,24655,...,9,2695,0,0,24566,9,3,22603,0,0
599998,673,83,114264,83007,38,13,5212,19,2,14104,...,9,1929,27,3,125582,0,3,7906,1,6920


In [16]:
total_data = pd.concat([data_dense, data_sparse], axis=1)

In [19]:
total_data

Unnamed: 0,I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,...,C18,C19,C20,C21,C22,C23,C24,C25,C26,label
0,0.693147,0.693147,1.791759,0.000000,7.232010,1.609438,2.772589,1.098612,5.204007,0.693147,...,3439,213,3,4954,0,3,24768,52,14364,0
1,1.098612,0.000000,3.806662,0.693147,4.634729,2.197225,1.098612,1.098612,1.609438,0.693147,...,2465,213,1,60664,0,3,8432,52,10835,0
2,1.098612,0.000000,0.693147,2.708050,6.643790,4.499810,1.609438,1.098612,5.505332,0.693147,...,738,0,0,143786,9,3,7344,0,0,0
3,0.000000,6.795706,0.000000,0.000000,8.387768,0.000000,0.000000,0.000000,0.000000,0.000000,...,1648,0,0,67107,0,3,18107,0,0,0
4,1.386294,-1.000000,0.000000,0.000000,1.098612,0.000000,1.386294,0.000000,0.000000,0.693147,...,556,0,0,21257,0,2,22439,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
599995,0.000000,0.000000,4.465908,1.791759,7.864420,4.605170,1.386294,3.135494,3.135494,0.000000,...,812,213,2,60254,0,2,30065,1,6169,1
599996,0.693147,0.693147,1.098612,1.098612,7.071573,2.833213,0.693147,2.833213,2.833213,0.693147,...,1194,0,0,39464,0,5,10799,0,0,0
599997,0.000000,1.791759,0.693147,1.098612,8.349484,4.779123,1.945910,2.397895,3.713572,0.000000,...,2695,0,0,24566,9,3,22603,0,0,0
599998,0.000000,1.791759,4.290459,2.197225,7.864804,3.713572,0.693147,2.708050,2.708050,0.000000,...,1929,27,3,125582,0,3,7906,1,6920,0


In [18]:
total_data['label'] = data['label']

# xDeepFM

## Linear model 

### Dense feature

In [20]:
import tensorflow as tf

In [22]:
dense_inputs = []
for f in dense_feats:
    _input = Input([1], name=f)
    
    dense_inputs.append(_input)

In [27]:
dense_inputs

[<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I1')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I2')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I3')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I4')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I5')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I6')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I7')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I8')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I9')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I10')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I11')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I12')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'I13')>]

In [23]:
concat_dense_inputs = Concatenate(axis=1)(dense_inputs)
fst_order_dense_layer = Dense(1)(concat_dense_inputs)

In [24]:
concat_dense_inputs

<KerasTensor: shape=(None, 13) dtype=float32 (created by layer 'concatenate')>

### sparse feature

In [25]:
sparse_inputs = []
for f in sparse_feats:
    _input = Input([1], name=f)
    sparse_inputs.append(_input)

In [26]:
sparse_1d_embed = []
for i, _input in enumerate(sparse_inputs):
    f = sparse_feats[i]
    voc_size = total_data[f].nunique()
    _embed = Flatten()(Embedding(voc_size, 1, embeddings_regularizer=tf.keras.regularizers.l2(0.5))(_input))
    sparse_1d_embed.append(_embed)

In [27]:
fst_order_sparse_layer = Add()(sparse_1d_embed)

In [28]:
fst_order_sparse_layer

<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'add')>

### Concatenate the sparse feature and the dense feature

In [29]:
linear_part = Add()([fst_order_dense_layer, fst_order_sparse_layer])

## Compressed Interaction Network

In [30]:
D = 8

In [31]:
# the 2nd order interaction between sparse features
sparse_kd_embed = []
for i, _input in enumerate(sparse_inputs):
    f = sparse_feats[i]
    voc_size = total_data[f].nunique()
    _embed = Embedding(voc_size, D, embeddings_regularizer=tf.keras.regularizers.l2(0.7))(_input)
    sparse_kd_embed.append(_embed)

In [32]:
sparse_kd_embed

[<KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding_26')>,
 <KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding_27')>,
 <KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding_28')>,
 <KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding_29')>,
 <KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding_30')>,
 <KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding_31')>,
 <KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding_32')>,
 <KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding_33')>,
 <KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding_34')>,
 <KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding_35')>,
 <KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding_36')>,
 <KerasTensor: shape=(None, 1, 8) dtype=float32 (created by layer 'embedding

In [33]:
# Construct feature map
input_feature_map = Concatenate(axis=1)(sparse_kd_embed)

In [35]:
def compressed_interaction_net(x0, xl, D, n_filters):
    """
    @param x0: original input
    @param xl: the first layer's input
    @param D: embedding dim
    @param n_filters: the number of filters of the compressed network
    """
    # x0 has m features, x1 has h features.
    
    # 1. split x0 and x1 to D columns according to the k dimension, 
    x0_cols = tf.split(x0, D, axis=-1)  # ?, m, D
    xl_cols = tf.split(xl, D, axis=-1)  # ?, h, D
    
    assert len(x0_cols)==len(xl_cols), print("error shape!")
    
    # 2. tranverse D columns, calculate the out product of the ith column of x0 and x1
    # and save the result into the feature_map
    feature_maps = []
    for i in range(D):
        # transpose_b=True , transpose x0_cols[i] 
        feature_map = tf.matmul(xl_cols[i], x0_cols[i], transpose_b=True)  # outer product ?, h, m
        feature_map = tf.expand_dims(feature_map, axis=-1)  # ?, h, m, 1
        feature_maps.append(feature_map)
    
    # 3. obtain the tensor with the dimension of  h × m × D 
    feature_maps = Concatenate(axis=-1)(feature_maps)  # ?, h, m, D
    
    # 3. the compress network
    x0_n_feats = x0.get_shape()[1]  # m
    xl_n_feats = xl.get_shape()[1]  # h
    reshaped_feature_maps = Reshape(target_shape=(x0_n_feats * xl_n_feats, D))(feature_maps)  # ?, h*m, D
    transposed_feature_maps = tf.transpose(reshaped_feature_maps, [0, 2, 1])  # ?, D, h*m
    
    # Conv1D：use n_filters kernels, step =1
    new_feature_maps = Conv1D(n_filters, kernel_size=1, strides=1)(transposed_feature_maps)  # ?, D, n_filters
    # ensure the result is D dimensional.
    new_feature_maps = tf.transpose(new_feature_maps, [0, 2, 1])  # ?, n_filters, D
    
    return new_feature_maps

In [36]:
def build_cin(x0, D=8, n_layers=3, n_filters=12):
    """
    construct CIN networks
    @param x0: the original input feature maps: ?, m, D
    @param D: feature embedding dimension
    @param n_layers: the number of layers of CIN network 
    @param n_filters: the number of feature_maps of the output of the CIN
    """
#     # cin layers
#     cin_layers = []
    # save the result of the cin sum pooling
    pooling_layers = []
    xl = x0
    for layer in range(n_layers):
        xl = compressed_interaction_net(x0, xl, D, n_filters)
#         cin_layers.append(xl)
        # sum pooling
        pooling = Lambda(lambda x: K.sum(x, axis=-1))(xl)
        pooling_layers.append(pooling)
    
    # concatenate the output of pooling layers
    output = Concatenate(axis=-1)(pooling_layers)
    
    return output

In [37]:
cin_layer = build_cin(input_feature_map)

In [38]:
cin_layer

<KerasTensor: shape=(None, 36) dtype=float32 (created by layer 'concatenate_5')>

## DNN Part

In [39]:
embed_inputs = Flatten()(Concatenate(axis=-1)(sparse_kd_embed))

In [40]:
fc_layer = Dropout(0.5)(Dense(128, activation='relu')(embed_inputs))
fc_layer = Dropout(0.3)(Dense(128, activation='relu')(fc_layer))
fc_layer_output = Dropout(0.1)(Dense(128, activation='relu')(fc_layer))

In [41]:
fc_layer_output

<KerasTensor: shape=(None, 128) dtype=float32 (created by layer 'dropout_2')>

## Output Layer

In [42]:
linear_part

<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'add_1')>

In [43]:
cin_layer

<KerasTensor: shape=(None, 36) dtype=float32 (created by layer 'concatenate_5')>

In [44]:
fc_layer_output

<KerasTensor: shape=(None, 128) dtype=float32 (created by layer 'dropout_2')>

In [45]:
concat_layer = Concatenate()([linear_part, cin_layer, fc_layer_output])

In [46]:
output_layer = Dense(1, activation='sigmoid')(concat_layer)

In [None]:
output_layer

<tf.Tensor 'dense_4/Identity:0' shape=(None, 1) dtype=float32>

## compile model

In [47]:
model = Model(dense_inputs+sparse_inputs, output_layer)

In [None]:
# plot_model(model, "xdeepfm.png")

In [None]:
model.summary()

In [49]:
model.compile(optimizer="adam", 
              loss="binary_crossentropy", 
              metrics=["binary_crossentropy", tf.keras.metrics.AUC(name='auc')])

## training

In [50]:
train_data = total_data.loc[:500000-1]
valid_data = total_data.loc[500000:]

In [51]:
train_dense_x = [train_data[f].values for f in dense_feats]
train_sparse_x = [train_data[f].values for f in sparse_feats]

In [52]:
train_label = [train_data['label'].values]

In [53]:
val_dense_x = [valid_data[f].values for f in dense_feats]
val_sparse_x = [valid_data[f].values for f in sparse_feats]

In [54]:
val_label = [valid_data['label'].values]

In [56]:
model.fit(train_dense_x+train_sparse_x, 
          train_label, epochs=5, batch_size=256,
          validation_data=(val_dense_x+val_sparse_x, val_label)
         )

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f5242956040>