In [24]:
import pandas as pd
import tensorflow as tf
import datetime
import gc

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import log_loss, roc_auc_score

%run ./model/deepfm.ipynb
%run ./model/dcn.ipynb
%run ./model/mlr.ipynb
%run ./model/layers.ipynb

In [2]:
data = pd.read_csv('./data/sample.csv')

NUMERIC_COLS = ['I' + str(i) for i in range(1, 14)]
SPARSE_COLS = ['C' + str(i) for i in range(1, 27)]

data[NUMERIC_COLS] = data[NUMERIC_COLS].fillna(0)
data[SPARSE_COLS] = data[SPARSE_COLS].fillna('-1')

ss = StandardScaler()
data[NUMERIC_COLS] = ss.fit_transform(data[NUMERIC_COLS])

feat_dict = {}
feat_num = 0

sparse_index = data[SPARSE_COLS].copy()
dense_value = data[NUMERIC_COLS]
label = data['label'].values

for col in SPARSE_COLS:
    unique_val = data[col].unique()
    feat_dict[col] = dict(zip(unique_val, range(feat_num, len(unique_val)+feat_num)))
    feat_num += len(unique_val)
    sparse_index[col] = data[col].map(feat_dict[col])

In [26]:
deepfm = MLR(len(SPARSE_COLS), feat_num, len(NUMERIC_COLS), 4)
deepfm.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'])
#deepfm.compile("adagrad", "binary_crossentropy", metrics=['binary_crossentropy'])

# log_dir = "/Users/zhanshanjun/jupyter-workspace/Recsys/Rank/logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

history = deepfm.fit(x = [sparse_index, dense_value],
                     y = label, 
                     batch_size=32,
                     epochs=1, 
                     verbose=2, 
                     validation_split=0.2,
#                      callbacks=[tensorboard_callback]
                    )





Train on 96000 samples, validate on 24000 samples
96000/96000 - 123s - loss: 0.4754 - binary_crossentropy: 0.4754 - val_loss: 0.4600 - val_binary_crossentropy: 0.4600


In [None]:
# fm   96000/96000 - 30s - loss: 0.4715 - binary_crossentropy: 0.4715 - val_loss: 0.4689 - val_binary_crossentropy: 0.4689

#ffm   96000/96000 - 2529s - loss: 0.4626 - binary_crossentropy: 0.4626 - val_loss: 0.4634 - val_binary_crossentropy: 0.4634

# dnn  96000/96000 - 33s - loss: 0.4605 - binary_crossentropy: 0.4605 - val_loss: 0.4538 - val_binary_crossentropy: 0.4538

# wdl    96000/96000 - 33s - loss: 0.4610 - binary_crossentropy: 0.4610 - val_loss: 0.4552 - val_binary_crossentropy: 0.4552

# deepfm   96000/96000 - 34s - loss: 0.4602 - binary_crossentropy: 0.4602 - val_loss: 0.4539 - val_binary_crossentropy: 0.4539

96000/96000 - 35s - loss: 0.4688 - binary_crossentropy: 0.4688 - val_loss: 0.4615 - val_binary_crossentropy: 0.4615
96000/96000 - 74s - loss: 0.4733 - binary_crossentropy: 0.4733 - val_loss: 0.4586 - val_binary_crossentropy: 0.4586
