In [1]:
import os
import shutil
import sys
import glob
import pickle
import numpy as np
import pandas as pd
import random
import torch
from sklearn.model_selection import train_test_split

from pathlib import Path

In [2]:
SEED = 42
if SEED is not None:
    np.random.seed(SEED)
    random.seed(SEED)
    torch.manual_seed(SEED)

In [3]:
from deepctr_torch.inputs import SparseFeat, DenseFeat,get_feature_names

## Чтение данных

In [4]:
DATA_PATH = Path('data') / 'criteo-part'

TRAIN_PATH = str(DATA_PATH / 'train.csv')

In [5]:
train = pd.read_csv(TRAIN_PATH)

In [6]:
train.head()

Unnamed: 0,_c0,_c1,_c2,_c3,_c4,_c5,_c6,_c7,_c8,_c9,...,_c31,_c32,_c33,_c34,_c35,_c36,_c37,_c38,_c39,id
0,1,0.0,-1,,,1465.0,0.0,17.0,0.0,4.0,...,e5f8f18f,,,f3ddd519,,32c7478e,b34f3128,,,12
1,1,0.0,1,20.0,16.0,1548.0,93.0,42.0,32.0,912.0,...,1f868fdd,21ddcdc9,a458ea53,7eee76d1,,32c7478e,9af06ad9,9d93af03,cdfe5ab7,26
2,0,8.0,0,15.0,20.0,115.0,24.0,8.0,23.0,24.0,...,1304f63b,21ddcdc9,b1252a9d,07b2853e,,32c7478e,94bde4f2,010f6491,09b76f8d,39
3,1,88.0,319,,4.0,5.0,4.0,89.0,40.0,88.0,...,bbf70d82,,,16e2e3b3,,32c7478e,d859b4dd,,,41
4,0,0.0,53,,10.0,6550.0,98.0,34.0,11.0,349.0,...,fa0643ee,21ddcdc9,b1252a9d,0094bc78,,32c7478e,29ece3ed,001f3601,402185f3,85


In [7]:
train = train.rename(columns=lambda x: x.replace('_', '').upper())

In [8]:
train.head()

Unnamed: 0,C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,...,C31,C32,C33,C34,C35,C36,C37,C38,C39,ID
0,1,0.0,-1,,,1465.0,0.0,17.0,0.0,4.0,...,e5f8f18f,,,f3ddd519,,32c7478e,b34f3128,,,12
1,1,0.0,1,20.0,16.0,1548.0,93.0,42.0,32.0,912.0,...,1f868fdd,21ddcdc9,a458ea53,7eee76d1,,32c7478e,9af06ad9,9d93af03,cdfe5ab7,26
2,0,8.0,0,15.0,20.0,115.0,24.0,8.0,23.0,24.0,...,1304f63b,21ddcdc9,b1252a9d,07b2853e,,32c7478e,94bde4f2,010f6491,09b76f8d,39
3,1,88.0,319,,4.0,5.0,4.0,89.0,40.0,88.0,...,bbf70d82,,,16e2e3b3,,32c7478e,d859b4dd,,,41
4,0,0.0,53,,10.0,6550.0,98.0,34.0,11.0,349.0,...,fa0643ee,21ddcdc9,b1252a9d,0094bc78,,32c7478e,29ece3ed,001f3601,402185f3,85


In [9]:
num_columns = ['C{}'.format(i) for i in range(1, 14)]
cat_columns = ['C{}'.format(i) for i in range(14, 40)]
target = ['C0']
len(num_columns), len(cat_columns)

(13, 26)

In [10]:
X, y = train.drop(columns=['C0', 'ID']), train['C0']

# Data preprocessing

In [11]:
X[num_columns] = X[num_columns].fillna(0)
X[cat_columns] = X[cat_columns].fillna('-1')

In [12]:
from sklearn.preprocessing import MinMaxScaler
import category_encoders as ce

In [13]:
X_encoded = X.copy()

In [14]:
mms = MinMaxScaler(feature_range=(0, 1))

In [15]:
X_encoded[num_columns] = mms.fit_transform(X_encoded[num_columns])

In [16]:
encoder = ce.OrdinalEncoder(cols=cat_columns)

In [17]:
X_encoded = encoder.fit_transform(X_encoded)

  elif pd.api.types.is_categorical(cols):


In [18]:
X_train_encoded, X_valid_encoded, y_train, y_valid = train_test_split(X_encoded, y, stratify=y, random_state=SEED)

In [19]:
X_train_encoded.head()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,...,C30,C31,C32,C33,C34,C35,C36,C37,C38,C39
597429,0.0,0.033123,0.000427,0.0,0.0005553482,0.000669,0.000202,0.004752,0.001966,0.0,...,5,213,2,4,7150,1,1,229,20,166
1768963,0.0,0.000136,0.000412,0.021665,0.0005553482,0.002609,0.000101,0.005741,0.00445,0.0,...,9,152,20,2,94823,1,4,56,12,44
1913948,0.0,0.000861,0.000137,0.007982,0.0008473158,0.000129,0.003737,0.001386,0.002949,0.0,...,1,232,15,3,14484,1,3,261,3,191
266546,0.0,0.000181,7.6e-05,0.007982,2.668111e-06,0.0,0.0,0.001782,0.000362,0.0,...,7,118,1,1,275,4,2,112,1,1
800178,0.000519,0.000181,3.1e-05,0.0,7.623174e-07,0.0,0.000303,0.000198,5.2e-05,0.222222,...,1,33,36,3,226291,1,1,4950,4,3822


In [20]:
sparse_features = [SparseFeat(feat, X_encoded[feat].nunique() + 1) for feat in cat_columns]
# or using hashing
# sparse_features = [SparseFeat(feat, vocabulary_size=1000, embedding_dim=4, use_hash=True)  for feat in cat_columns]
dense_features = [DenseFeat(feat, 1) for feat in num_columns]

In [21]:
fixlen_feature_columns = sparse_features + dense_features

In [22]:
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [23]:
train_model_input = {name:X_train_encoded[name] for name in feature_names}

In [24]:
test_model_input = {name:X_valid_encoded[name] for name in feature_names}

# Model training

In [25]:
from deepctr_torch.models import DeepFM, CCPM, PNN, WDL, MLR, NFM, AFM, DCN, DCNMix, xDeepFM, AutoInt, ONN, FiBiNET
from deepctr_torch.callbacks import EarlyStopping, ModelCheckpoint

from sklearn.metrics import roc_auc_score, log_loss

In [26]:
def get_metrics(groups) -> pd.DataFrame:
    metrics =  pd.DataFrame.from_records(groups, columns=groups.keys(), index=list(all_metrics.values())[0].keys())
    metrics.index.name = 'metric'
    return metrics

def get_ate(groups, control_name) -> pd.DataFrame:
    """Get Average Treatment Effect
    groups - dictionary where keys - names of models, values - dicts of pairs <metric_name>, <metric_value>
    control_name - name of baseline model
    
    return pd.DataFrame (rows corresponds to metrics, cols corresponds to models and ATE with respect to control)
    """
    metrics = get_metrics(groups)
    return metrics.subtract(metrics[control_name], axis='index').drop(columns=control_name) * 100


all_metrics = {}

In [27]:
def test_model(model, train_model_input, test_model_input, y_train, y_valid, batch_size=4096 * 2):
    model_name = model.__class__.__name__
    
    model.compile("adam", "binary_crossentropy",
              metrics=["binary_crossentropy", "auc"])
    
    es = EarlyStopping(monitor='val_auc', min_delta=0.03, verbose=1, patience=1, mode='max')
    mdckpt = ModelCheckpoint(filepath = f'{model_name}.ckpt', monitor='val_auc', mode='max', save_best_only=True, save_weights_only=True)
    
    history = model.fit(train_model_input, y_train,
                        batch_size=batch_size, epochs=15, verbose=2, validation_split=0.2, callbacks=[es, mdckpt])
    
    model.load_state_dict(torch.load(f'{model_name}.ckpt'))
    
    y_valid_predicted = model.predict(test_model_input, batch_size=batch_size).squeeze()
    
    return {
        'roc-auc': roc_auc_score(y_valid, y_valid_predicted),
        'log-loss': log_loss(y_valid, y_valid_predicted)        
    }

In [28]:
device = 'cuda:0'

## CCPM (Convolutional Click Prediction Model)
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/CCPM.png" alt="drawing" width="900"/>

In [29]:
ccpm_model = CCPM(linear_feature_columns, sparse_features, dnn_dropout=0.5, device=device)

In [30]:
all_metrics['CCPM'] = test_model(ccpm_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
44s - loss:  0.5213 - binary_crossentropy:  0.5212 - auc:  0.7058 - val_binary_crossentropy:  0.4903 - val_auc:  0.7505
Epoch 2/15
43s - loss:  0.4642 - binary_crossentropy:  0.4642 - auc:  0.7855 - val_binary_crossentropy:  0.4889 - val_auc:  0.7540
Epoch 00002: early stopping


## PNN (Product-based Neural Network)
<!-- ![pic](https://deepctr-torch.readthedocs.io/en/latest/_images/PNN.png) -->
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/PNN.png" alt="drawing" width="900"/>

In [31]:
pnn_model = PNN(dnn_feature_columns, dnn_dropout=0.5, device=device)

In [32]:
all_metrics['PNN'] = test_model(pnn_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
33s - loss:  0.5254 - binary_crossentropy:  0.5254 - auc:  0.6984 - val_binary_crossentropy:  0.4867 - val_auc:  0.7626
Epoch 2/15
33s - loss:  0.4251 - binary_crossentropy:  0.4251 - auc:  0.8303 - val_binary_crossentropy:  0.5072 - val_auc:  0.7432
Epoch 00002: early stopping


## Wide & Deep
<!-- ![pic](https://deepctr-torch.readthedocs.io/en/latest/_images/WDL.png) -->
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/WDL.png" alt="drawing" width="900"/>

In [33]:
wdl_model = WDL(linear_feature_columns, dnn_feature_columns, dnn_dropout=0.5, device=device)

In [34]:
all_metrics['WDL'] = test_model(wdl_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
37s - loss:  0.5074 - binary_crossentropy:  0.5074 - auc:  0.7259 - val_binary_crossentropy:  0.4793 - val_auc:  0.7673
Epoch 2/15
35s - loss:  0.4115 - binary_crossentropy:  0.4114 - auc:  0.8412 - val_binary_crossentropy:  0.5093 - val_auc:  0.7426
Epoch 00002: early stopping


## DeepFM
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/DeepFM.png" alt="drawing" width="900"/>

In [35]:
deep_fm_model = DeepFM(linear_feature_columns, dnn_feature_columns, dnn_dropout=0.5, device=device)

In [36]:
all_metrics['DeepFM'] = test_model(deep_fm_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
36s - loss:  0.5040 - binary_crossentropy:  0.5039 - auc:  0.7285 - val_binary_crossentropy:  0.4783 - val_auc:  0.7680
Epoch 2/15
36s - loss:  0.4145 - binary_crossentropy:  0.4145 - auc:  0.8380 - val_binary_crossentropy:  0.5050 - val_auc:  0.7463
Epoch 00002: early stopping


## MLR(Mixed Logistic Regression/Piece-wise Linear Model)
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/MLR.png" alt="drawing" width="900"/>

In [37]:
mlr_model = MLR(linear_feature_columns, dnn_feature_columns, device=device)

In [38]:
all_metrics['MLR'] = test_model(mlr_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
55s - loss:  0.5399 - binary_crossentropy:  0.5398 - auc:  0.7000 - val_binary_crossentropy:  0.5079 - val_auc:  0.7343
Epoch 2/15
55s - loss:  0.4903 - binary_crossentropy:  0.4903 - auc:  0.7612 - val_binary_crossentropy:  0.4938 - val_auc:  0.7490
Epoch 00002: early stopping


## NFM (Neural Factorization Machine)
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/NFM.png" alt="drawing" width="900"/>

In [39]:
nfm_model = NFM(linear_feature_columns, dnn_feature_columns, dnn_dropout=0.5, device=device)

In [40]:
all_metrics['NFM'] = test_model(nfm_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
36s - loss:  0.5149 - binary_crossentropy:  0.5148 - auc:  0.7137 - val_binary_crossentropy:  0.4853 - val_auc:  0.7604
Epoch 2/15
35s - loss:  0.4489 - binary_crossentropy:  0.4488 - auc:  0.8057 - val_binary_crossentropy:  0.4864 - val_auc:  0.7615
Epoch 00002: early stopping


## AFM (Attentional Factorization Machine)
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/AFM.png" alt="drawing" width="900"/>

In [41]:
afm_model = AFM(linear_feature_columns, sparse_features, afm_dropout=0.5, device=device)

In [42]:
all_metrics['AFM'] = test_model(afm_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
39s - loss:  0.5387 - binary_crossentropy:  0.5387 - auc:  0.7013 - val_binary_crossentropy:  0.5057 - val_auc:  0.7365
Epoch 2/15
39s - loss:  0.4857 - binary_crossentropy:  0.4857 - auc:  0.7629 - val_binary_crossentropy:  0.4897 - val_auc:  0.7521
Epoch 00002: early stopping


## DCN (Deep & Cross Network)
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/DCN.png" alt="drawing" width="900"/>

In [43]:
dcn_model = DCN(linear_feature_columns, dnn_feature_columns, dnn_dropout=0.5, device=device)

In [44]:
all_metrics['DCN'] = test_model(dcn_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch




Epoch 1/15
37s - loss:  0.5069 - binary_crossentropy:  0.5068 - auc:  0.7281 - val_binary_crossentropy:  0.4796 - val_auc:  0.7654




Epoch 2/15
37s - loss:  0.4240 - binary_crossentropy:  0.4240 - auc:  0.8299 - val_binary_crossentropy:  0.5102 - val_auc:  0.7417
Epoch 00002: early stopping




## DCN-Mix (Improved Deep & Cross Network with mix of experts and matrix kernel)
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/DCN-Mix.png" alt="drawing" width="900"/>

In [45]:
dcn_mix_model = DCNMix(linear_feature_columns, dnn_feature_columns, dnn_dropout=0.5, device=device)

In [46]:
all_metrics['DCNMix'] = test_model(dcn_mix_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch




Epoch 1/15
38s - loss:  0.5059 - binary_crossentropy:  0.5058 - auc:  0.7301 - val_binary_crossentropy:  0.4776 - val_auc:  0.7683




Epoch 2/15
38s - loss:  0.3972 - binary_crossentropy:  0.3971 - auc:  0.8523 - val_binary_crossentropy:  0.5070 - val_auc:  0.7449
Epoch 00002: early stopping




## xDeepFM
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/xDeepFM.png" alt="drawing" width="900"/>

In [47]:
xdeep_fm_model = xDeepFM(linear_feature_columns, dnn_feature_columns, dnn_dropout=0.5, device=device)

In [48]:
all_metrics['xDeepFM'] = test_model(xdeep_fm_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
61s - loss:  0.5006 - binary_crossentropy:  0.5006 - auc:  0.7346 - val_binary_crossentropy:  0.4772 - val_auc:  0.7693
Epoch 2/15
61s - loss:  0.3923 - binary_crossentropy:  0.3922 - auc:  0.8571 - val_binary_crossentropy:  0.5171 - val_auc:  0.7424
Epoch 00002: early stopping


## AutoInt(Automatic Feature Interaction)
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/AutoInt.png" alt="drawing" width="900"/>

In [49]:
auto_int_model = AutoInt(linear_feature_columns, dnn_feature_columns, dnn_dropout=0.5, device=device)

In [50]:
all_metrics['AutoInt'] = test_model(auto_int_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
41s - loss:  0.5083 - binary_crossentropy:  0.5083 - auc:  0.7274 - val_binary_crossentropy:  0.4789 - val_auc:  0.7670
Epoch 2/15
41s - loss:  0.4062 - binary_crossentropy:  0.4061 - auc:  0.8444 - val_binary_crossentropy:  0.5085 - val_auc:  0.7442
Epoch 00002: early stopping


## FiBiNET(Feature Importance and Bilinear feature Interaction NETwork)
<img src="https://deepctr-torch.readthedocs.io/en/latest/_images/FiBiNET.png" alt="drawing" width="900"/>

In [53]:
fibinet_interaction_model = FiBiNET(linear_feature_columns, dnn_feature_columns, bilinear_type='interaction', dnn_dropout=0.5, device=device)

In [54]:
all_metrics['FiBiNET-inter'] = test_model(fibinet_interaction_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
94s - loss:  0.5137 - binary_crossentropy:  0.5136 - auc:  0.7216 - val_binary_crossentropy:  0.4804 - val_auc:  0.7668
Epoch 2/15
93s - loss:  0.4161 - binary_crossentropy:  0.4161 - auc:  0.8393 - val_binary_crossentropy:  0.5001 - val_auc:  0.7519
Epoch 00002: early stopping


In [55]:
fibinet_all_model = FiBiNET(linear_feature_columns, dnn_feature_columns, bilinear_type='all', dnn_dropout=0.5, device=device)

In [56]:
all_metrics['FiBiNET-all'] = test_model(fibinet_all_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
85s - loss:  0.5123 - binary_crossentropy:  0.5122 - auc:  0.7232 - val_binary_crossentropy:  0.4798 - val_auc:  0.7673
Epoch 2/15
86s - loss:  0.4145 - binary_crossentropy:  0.4144 - auc:  0.8404 - val_binary_crossentropy:  0.5032 - val_auc:  0.7498
Epoch 00002: early stopping


In [57]:
fibinet_each_model = FiBiNET(linear_feature_columns, dnn_feature_columns, bilinear_type='each', dnn_dropout=0.5, device=device)

In [58]:
all_metrics['FiBiNET-each'] = test_model(fibinet_each_model, train_model_input, test_model_input, y_train.values, y_valid)

cuda:0
Train on 2198958 samples, validate on 549740 samples, 269 steps per epoch
Epoch 1/15
86s - loss:  0.5142 - binary_crossentropy:  0.5141 - auc:  0.7214 - val_binary_crossentropy:  0.4806 - val_auc:  0.7666
Epoch 2/15
86s - loss:  0.4170 - binary_crossentropy:  0.4170 - auc:  0.8384 - val_binary_crossentropy:  0.5028 - val_auc:  0.7506
Epoch 00002: early stopping


# Результаты

In [59]:
get_metrics(all_metrics)

Unnamed: 0_level_0,CCPM,PNN,WDL,DeepFM,MLR,NFM,AFM,DCN,DCNMix,xDeepFM,AutoInt,FiBiNET-inter,FiBiNET-all,FiBiNET-each
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
roc-auc,0.755444,0.764055,0.768638,0.769213,0.750845,0.762868,0.753727,0.76676,0.769508,0.770449,0.768322,0.768353,0.768803,0.768037
log-loss,0.487051,0.485082,0.477694,0.476714,0.492128,0.484571,0.487891,0.477922,0.476031,0.475585,0.477263,0.478585,0.478062,0.478823


In [60]:
get_ate(all_metrics, control_name='DeepFM')

Unnamed: 0_level_0,CCPM,PNN,WDL,MLR,NFM,AFM,DCN,DCNMix,xDeepFM,AutoInt,FiBiNET-inter,FiBiNET-all,FiBiNET-each
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
roc-auc,-1.376905,-0.515805,-0.057509,-1.836797,-0.634478,-1.548611,-0.245365,0.029485,0.123626,-0.089078,-0.086008,-0.040978,-0.117594
log-loss,1.033721,0.836858,0.098019,1.541409,0.785764,1.117735,0.120876,-0.068253,-0.112821,0.054898,0.187136,0.134855,0.210907


In [61]:
get_ate(all_metrics, control_name='FiBiNET-all')

Unnamed: 0_level_0,CCPM,PNN,WDL,DeepFM,MLR,NFM,AFM,DCN,DCNMix,xDeepFM,AutoInt,FiBiNET-inter,FiBiNET-each
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
roc-auc,-1.335889,-0.474789,-0.016493,0.041016,-1.795781,-0.593462,-1.507595,-0.204349,0.070501,0.164642,-0.048062,-0.045,-0.0766
log-loss,0.898888,0.702025,-0.036814,-0.134833,1.406576,0.650931,0.982902,-0.013957,-0.203086,-0.247654,-0.079935,0.0523,0.0761
