In [1]:
import gc
import gzip
import itertools
import time
import warnings
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyarrow.parquet as pq
import seaborn as sns
import torch
from pytorch_optimizer import MADGRAD
from category_encoders import CountEncoder
from deepctr_torch.inputs import DenseFeat, SparseFeat, get_feature_names
from deepctr_torch.models import AFM, DCN, DIFM, AutoInt, DeepFM, xDeepFM
from deepctr_torch.callbacks import EarlyStopping, ModelCheckpoint
from matplotlib import pyplot as plt
from sklearn import base
from sklearn.metrics import confusion_matrix, log_loss, roc_auc_score
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
from typing_extensions import Self

warnings.filterwarnings("ignore")


2024-05-13 16:00:54.358799: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-13 16:00:54.359066: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-13 16:00:54.362713: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-13 16:00:54.393230: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
NAN_INT = 7535805


class LabelEncoder(base.BaseEstimator):
    """Label Encoder that groups infrequent values into one label.

    Attributes:
        min_obs (int): minimum number of observation to assign a label.
        label_encoders (list of dict): label encoders for columns
        label_maxes (list of int): maximum of labels for columns
    """

    def __init__(self, min_obs: int = 10):
        """Initialize the OneHotEncoder class object.

        Args:
            min_obs (int): minimum number of observation to assign a label.
        """

        self.min_obs = min_obs
        self.is_fitted = False

    def __repr__(self):
        return ("LabelEncoder(min_obs={})").format(self.min_obs)

    def _get_label_encoder_and_max(self, x: pd.Series) -> tuple[dict, int]:
        """Return a mapping from values and its maximum of a column to integer labels.

        Args:
            x (pandas.Series): a categorical column to encode.

        Returns:
            (tuple):
                - (dict): mapping from values of features to integers
                - (int): maximum label
        """

        # NaN cannot be used as a key for dict. Impute it with a random
        # integer.
        label_count = x.fillna(NAN_INT).value_counts()
        n_uniq = label_count.shape[0]

        label_count = label_count[label_count >= self.min_obs]
        n_uniq_new = label_count.shape[0]

        # If every label appears more than min_obs, new label starts from 0.
        # Otherwise, new label starts from 1 and 0 is used for all old labels
        # that appear less than min_obs.
        offset = 0 if n_uniq == n_uniq_new else 1

        label_encoder = pd.Series(np.arange(n_uniq_new) + offset, index=label_count.index)
        max_label = label_encoder.max()
        label_encoder = label_encoder.to_dict()

        return label_encoder, max_label

    def _transform_col(self, x: pd.Series, i: int) -> pd.Series:
        """Encode one categorical column into labels.

        Args:
            x (pandas.Series): a categorical column to encode
            i (int): column index

        Returns:
            (pandas.Series): a column with labels.
        """
        return x.fillna(NAN_INT).map(self.label_encoders[i]).fillna(0).astype(int)

    def fit(self, X: pd.DataFrame, y: pd.Series | None = None) -> Self:
        self.label_encoders = [None] * X.shape[1]
        self.label_maxes = [None] * X.shape[1]

        for i, col in enumerate(X.columns):
            (
                self.label_encoders[i],
                self.label_maxes[i],
            ) = self._get_label_encoder_and_max(X[col])

        self.is_fitted = True
        return self

    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """Encode categorical columns into label encoded columns

        Args:
            X (pandas.DataFrame): categorical columns to encode

        Returns:
            (pandas.DataFrame): label encoded columns
        """

        assert self.is_fitted, "fit() or fit_transform() must be called before transform()."

        X = X.copy()
        for i, col in enumerate(X.columns):
            X.loc[:, col] = self._transform_col(X[col], i)

        return X

    def fit_transform(self, X: pd.DataFrame, y: pd.Series | None = None) -> pd.DataFrame:
        """Encode categorical columns into label encoded columns

        Args:
            X (pandas.DataFrame): categorical columns to encode

        Returns:
            (pandas.DataFrame): label encoded columns
        """

        self.label_encoders = [None] * X.shape[1]
        self.label_maxes = [None] * X.shape[1]

        X = X.copy()
        for i, col in enumerate(X.columns):
            (
                self.label_encoders[i],
                self.label_maxes[i],
            ) = self._get_label_encoder_and_max(X[col])

            X.loc[:, col] = X[col].fillna(NAN_INT).map(self.label_encoders[i]).fillna(0).astype(int)

        self.is_fitted = True
        return X

In [3]:
path = "../input/web-ctr-prediction/"
train = pd.read_parquet(path + "train_sample_0.3_seed1119.parquet")
test = pd.read_parquet(path + "test.parquet")

## DeepFM

In [4]:
sparse_features = train.select_dtypes("object").columns.to_list()[1:]

In [5]:
lbe = LabelEncoder(10)
train[sparse_features] = lbe.fit_transform(train[sparse_features])
test[sparse_features] = lbe.transform(test[sparse_features])

for col in sparse_features:
    train[col] = train[col].astype(int)
    test[col] = test[col].astype(int)

train = train.fillna(0)
test = test.fillna(0)

In [6]:
dense_features = train.select_dtypes("int64").columns.to_list() + train.select_dtypes("float64").columns.to_list()
dense_features = dense_features[1:]

mms = MinMaxScaler(feature_range=(0, 1))
train[dense_features] = mms.fit_transform(train[dense_features])
test[dense_features] = mms.transform(test[dense_features])

In [7]:
def reduce_mem_usage(df: pd.DataFrame, verbose: bool = True) -> pd.DataFrame:
    """
    Iterate through all the columns of a dataframe and modify the data type to reduce memory usage.
    """
    numerics = ["int16", "int32", "int64", "float16", "float32", "float64"]
    start_mem = df.memory_usage().sum() / 1024**2

    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == "int":
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2

    if verbose:
        print(f"Mem. usage decreased to {end_mem:5.2f} Mb ({100 * (start_mem - end_mem) / start_mem:.1f}% reduction)")

    return df


train = reduce_mem_usage(train)
test = reduce_mem_usage(test)

Mem. usage decreased to 1035.50 Mb (73.5% reduction)
Mem. usage decreased to 372.23 Mb (73.1% reduction)


In [8]:
dnn_feature_columns = [
    SparseFeat(feat, vocabulary_size=train[feat].nunique(), embedding_dim=4) for i, feat in enumerate(sparse_features)
] + [DenseFeat(feat, 1) for feat in dense_features]

linear_feature_columns = [
    SparseFeat(feat, vocabulary_size=train[feat].nunique(), embedding_dim=4) for i, feat in enumerate(sparse_features)
] + [DenseFeat(feat, 1) for feat in dense_features]

feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=1119)

X = train.drop(columns=["Click"])
y = train["Click"]

oof_preds = np.zeros((X.shape[0],))
nn_preds = np.zeros((test.shape[0],))
scores = []

for fold, (train_idx, valid_idx) in enumerate(iterable=kfold.split(X=X, y=y), start=1):
    train_fold, valid_fold = train.iloc[train_idx], train.iloc[valid_idx]

    train_model_input = {name: train_fold[name] for name in feature_names}
    valid_model_input = {name: valid_fold[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}

    model = DeepFM(
        dnn_feature_columns=dnn_feature_columns,
        linear_feature_columns=linear_feature_columns,
        device=device,
        dnn_activation="prelu",
        dnn_use_bn=True,
        l2_reg_linear=0,
        l2_reg_embedding=0,
    )

    model.compile(
        MADGRAD(model.parameters(), lr=0.0001),
        "binary_crossentropy",
        metrics=["binary_crossentropy", "auc"],
    )

    es = EarlyStopping(
        monitor="val_binary_crossentropy",
        min_delta=0,
        verbose=1,
        patience=2,
        mode="min",
    )

    history = model.fit(
        train_model_input,
        train_fold["Click"].values,
        batch_size=4096,
        epochs=100,
        verbose=1,
        validation_data=(valid_model_input, valid_fold["Click"].values),
        callbacks=[es],
    )
    val_ans = model.predict(valid_model_input, batch_size=64).flatten()
    oof_preds[valid_idx] = val_ans
    nn_preds += model.predict(test_model_input, batch_size=64).flatten() / 5

    print(f"Fold{fold} test LogLoss: {log_loss(valid_fold['Click'].values, val_ans):.4f}")
    print(f"Fold{fold} test AUC: {roc_auc_score(valid_fold['Click'].values, val_ans):.4f}")
    
    scores.append(roc_auc_score(valid_fold["Click"].values, val_ans))
    del train_fold, valid_fold, model
    gc.collect()


print(np.mean(scores))


cuda:0
Train on 9984415 samples, validate on 2496104 samples, 2438 steps per epoch


2438it [02:24, 16.86it/s]


Epoch 1/20
156s - loss:  0.6318 - binary_crossentropy:  0.6318 - auc:  0.6902 - val_binary_crossentropy:  0.6273 - val_auc:  0.7011


2438it [02:24, 16.93it/s]


Epoch 2/20
155s - loss:  0.6278 - binary_crossentropy:  0.6278 - auc:  0.6982 - val_binary_crossentropy:  0.6231 - val_auc:  0.7055


2438it [02:29, 16.28it/s]


Epoch 3/20
161s - loss:  0.6203 - binary_crossentropy:  0.6203 - auc:  0.7078 - val_binary_crossentropy:  0.6182 - val_auc:  0.7089


2438it [02:31, 16.10it/s]


Epoch 4/20
163s - loss:  0.6171 - binary_crossentropy:  0.6171 - auc:  0.7117 - val_binary_crossentropy:  0.6161 - val_auc:  0.7126


2438it [02:33, 15.92it/s]


Epoch 5/20
164s - loss:  0.6152 - binary_crossentropy:  0.6152 - auc:  0.7145 - val_binary_crossentropy:  0.6142 - val_auc:  0.7169


2438it [02:29, 16.35it/s]


Epoch 6/20
161s - loss:  0.6132 - binary_crossentropy:  0.6132 - auc:  0.7171 - val_binary_crossentropy:  0.6123 - val_auc:  0.7189


2438it [02:25, 16.72it/s]


Epoch 7/20
157s - loss:  0.6117 - binary_crossentropy:  0.6117 - auc:  0.7192 - val_binary_crossentropy:  0.6106 - val_auc:  0.7203


2438it [02:26, 16.69it/s]


Epoch 8/20
157s - loss:  0.6105 - binary_crossentropy:  0.6105 - auc:  0.7209 - val_binary_crossentropy:  0.6158 - val_auc:  0.7198


2438it [02:23, 16.96it/s]


Epoch 9/20
155s - loss:  0.6093 - binary_crossentropy:  0.6093 - auc:  0.7225 - val_binary_crossentropy:  0.6077 - val_auc:  0.7236


2438it [02:24, 16.92it/s]


Epoch 10/20
155s - loss:  0.6080 - binary_crossentropy:  0.6080 - auc:  0.7240 - val_binary_crossentropy:  0.6121 - val_auc:  0.7247


2438it [02:24, 16.93it/s]


Epoch 11/20
155s - loss:  0.6071 - binary_crossentropy:  0.6071 - auc:  0.7252 - val_binary_crossentropy:  0.6057 - val_auc:  0.7261


2438it [02:25, 16.79it/s]


Epoch 12/20
156s - loss:  0.6064 - binary_crossentropy:  0.6064 - auc:  0.7262 - val_binary_crossentropy:  0.6069 - val_auc:  0.7256


2438it [02:23, 16.93it/s]


Epoch 13/20
155s - loss:  0.6056 - binary_crossentropy:  0.6056 - auc:  0.7272 - val_binary_crossentropy:  0.6044 - val_auc:  0.7284


2438it [02:25, 16.80it/s]


Epoch 14/20
156s - loss:  0.6050 - binary_crossentropy:  0.6050 - auc:  0.7279 - val_binary_crossentropy:  0.6047 - val_auc:  0.7278


2438it [02:26, 16.60it/s]


Epoch 15/20
158s - loss:  0.6042 - binary_crossentropy:  0.6042 - auc:  0.7288 - val_binary_crossentropy:  0.6062 - val_auc:  0.7279
Epoch 00015: early stopping
Fold1 test LogLoss: 0.6062
Fold1 test AUC: 0.7279
cuda:0
Train on 9984415 samples, validate on 2496104 samples, 2438 steps per epoch


2438it [02:24, 16.89it/s]


Epoch 1/20
155s - loss:  0.6305 - binary_crossentropy:  0.6305 - auc:  0.6921 - val_binary_crossentropy:  0.6226 - val_auc:  0.7036


2438it [02:25, 16.77it/s]


Epoch 2/20
156s - loss:  0.6290 - binary_crossentropy:  0.6290 - auc:  0.6968 - val_binary_crossentropy:  0.6238 - val_auc:  0.7049


2438it [02:24, 16.83it/s]


Epoch 3/20
156s - loss:  0.6199 - binary_crossentropy:  0.6199 - auc:  0.7081 - val_binary_crossentropy:  0.6176 - val_auc:  0.7101


2438it [02:24, 16.82it/s]


Epoch 4/20
156s - loss:  0.6169 - binary_crossentropy:  0.6169 - auc:  0.7120 - val_binary_crossentropy:  0.6148 - val_auc:  0.7137


2438it [02:24, 16.84it/s]


Epoch 5/20
156s - loss:  0.6149 - binary_crossentropy:  0.6149 - auc:  0.7148 - val_binary_crossentropy:  0.6136 - val_auc:  0.7165


2438it [02:25, 16.73it/s]


Epoch 6/20
157s - loss:  0.6132 - binary_crossentropy:  0.6132 - auc:  0.7172 - val_binary_crossentropy:  0.6154 - val_auc:  0.7177


2438it [02:25, 16.72it/s]


Epoch 7/20
157s - loss:  0.6116 - binary_crossentropy:  0.6116 - auc:  0.7191 - val_binary_crossentropy:  0.6101 - val_auc:  0.7200


2438it [02:27, 16.54it/s]


Epoch 8/20
158s - loss:  0.6103 - binary_crossentropy:  0.6103 - auc:  0.7210 - val_binary_crossentropy:  0.6092 - val_auc:  0.7213


2438it [02:27, 16.48it/s]


Epoch 9/20
159s - loss:  0.6091 - binary_crossentropy:  0.6091 - auc:  0.7226 - val_binary_crossentropy:  0.6100 - val_auc:  0.7227


2438it [02:25, 16.79it/s]


Epoch 10/20
156s - loss:  0.6079 - binary_crossentropy:  0.6079 - auc:  0.7241 - val_binary_crossentropy:  0.6077 - val_auc:  0.7238


2438it [02:22, 17.08it/s]


Epoch 11/20
154s - loss:  0.6068 - binary_crossentropy:  0.6068 - auc:  0.7253 - val_binary_crossentropy:  0.6062 - val_auc:  0.7252


2438it [02:22, 17.15it/s]


Epoch 12/20
153s - loss:  0.6061 - binary_crossentropy:  0.6061 - auc:  0.7264 - val_binary_crossentropy:  0.6071 - val_auc:  0.7256


2438it [02:22, 17.06it/s]


Epoch 13/20
154s - loss:  0.6054 - binary_crossentropy:  0.6054 - auc:  0.7273 - val_binary_crossentropy:  0.6057 - val_auc:  0.7269


2438it [02:23, 16.97it/s]


Epoch 14/20
155s - loss:  0.6047 - binary_crossentropy:  0.6047 - auc:  0.7281 - val_binary_crossentropy:  0.6054 - val_auc:  0.7262


2438it [02:23, 16.95it/s]


Epoch 15/20
155s - loss:  0.6040 - binary_crossentropy:  0.6040 - auc:  0.7288 - val_binary_crossentropy:  0.6038 - val_auc:  0.7284


2438it [02:23, 16.99it/s]


Epoch 16/20
154s - loss:  0.6036 - binary_crossentropy:  0.6036 - auc:  0.7294 - val_binary_crossentropy:  0.6047 - val_auc:  0.7287


2438it [02:23, 16.97it/s]


Epoch 17/20
155s - loss:  0.6029 - binary_crossentropy:  0.6029 - auc:  0.7302 - val_binary_crossentropy:  0.6046 - val_auc:  0.7290
Epoch 00017: early stopping
Fold2 test LogLoss: 0.6046
Fold2 test AUC: 0.7290
cuda:0
Train on 9984415 samples, validate on 2496104 samples, 2438 steps per epoch


2438it [02:24, 16.84it/s]


Epoch 1/20
156s - loss:  0.6314 - binary_crossentropy:  0.6314 - auc:  0.6906 - val_binary_crossentropy:  0.6241 - val_auc:  0.7038


2438it [02:23, 16.98it/s]


Epoch 2/20
155s - loss:  0.6308 - binary_crossentropy:  0.6308 - auc:  0.6946 - val_binary_crossentropy:  0.6274 - val_auc:  0.7037


2438it [02:22, 17.07it/s]


Epoch 3/20
154s - loss:  0.6217 - binary_crossentropy:  0.6217 - auc:  0.7056 - val_binary_crossentropy:  0.6183 - val_auc:  0.7092


2438it [02:26, 16.61it/s]


Epoch 4/20
158s - loss:  0.6185 - binary_crossentropy:  0.6185 - auc:  0.7096 - val_binary_crossentropy:  0.6157 - val_auc:  0.7125


2438it [02:27, 16.52it/s]


Epoch 5/20
159s - loss:  0.6165 - binary_crossentropy:  0.6165 - auc:  0.7127 - val_binary_crossentropy:  0.6154 - val_auc:  0.7136


2438it [02:28, 16.40it/s]


Epoch 6/20
160s - loss:  0.6145 - binary_crossentropy:  0.6145 - auc:  0.7154 - val_binary_crossentropy:  0.6138 - val_auc:  0.7171


2438it [02:25, 16.79it/s]


Epoch 7/20
156s - loss:  0.6130 - binary_crossentropy:  0.6130 - auc:  0.7173 - val_binary_crossentropy:  0.6114 - val_auc:  0.7194


2438it [02:25, 16.71it/s]


Epoch 8/20
157s - loss:  0.6119 - binary_crossentropy:  0.6119 - auc:  0.7189 - val_binary_crossentropy:  0.6137 - val_auc:  0.7208


2438it [02:24, 16.85it/s]


Epoch 9/20
156s - loss:  0.6110 - binary_crossentropy:  0.6110 - auc:  0.7203 - val_binary_crossentropy:  0.6102 - val_auc:  0.7197


2438it [02:25, 16.72it/s]


Epoch 10/20
157s - loss:  0.6099 - binary_crossentropy:  0.6099 - auc:  0.7216 - val_binary_crossentropy:  0.6118 - val_auc:  0.7225


2438it [02:27, 16.55it/s]


Epoch 11/20
158s - loss:  0.6091 - binary_crossentropy:  0.6091 - auc:  0.7226 - val_binary_crossentropy:  0.6087 - val_auc:  0.7232


2438it [02:26, 16.65it/s]


Epoch 12/20
157s - loss:  0.6085 - binary_crossentropy:  0.6085 - auc:  0.7236 - val_binary_crossentropy:  0.6091 - val_auc:  0.7237


2438it [02:24, 16.90it/s]


Epoch 13/20
155s - loss:  0.6078 - binary_crossentropy:  0.6078 - auc:  0.7244 - val_binary_crossentropy:  0.6064 - val_auc:  0.7259


2438it [02:26, 16.63it/s]


Epoch 14/20
158s - loss:  0.6070 - binary_crossentropy:  0.6070 - auc:  0.7253 - val_binary_crossentropy:  0.6067 - val_auc:  0.7246


2438it [02:30, 16.17it/s]


Epoch 15/20
162s - loss:  0.6065 - binary_crossentropy:  0.6065 - auc:  0.7259 - val_binary_crossentropy:  0.6080 - val_auc:  0.7260
Epoch 00015: early stopping
Fold3 test LogLoss: 0.6080
Fold3 test AUC: 0.7260
cuda:0
Train on 9984415 samples, validate on 2496104 samples, 2438 steps per epoch


2438it [02:27, 16.55it/s]


Epoch 1/20
158s - loss:  0.6309 - binary_crossentropy:  0.6309 - auc:  0.6913 - val_binary_crossentropy:  0.6258 - val_auc:  0.7018


2438it [02:27, 16.57it/s]


Epoch 2/20
158s - loss:  0.6272 - binary_crossentropy:  0.6272 - auc:  0.6980 - val_binary_crossentropy:  0.6206 - val_auc:  0.7052


2438it [02:26, 16.59it/s]


Epoch 3/20
158s - loss:  0.6188 - binary_crossentropy:  0.6188 - auc:  0.7095 - val_binary_crossentropy:  0.6170 - val_auc:  0.7113


2438it [02:27, 16.55it/s]


Epoch 4/20
158s - loss:  0.6156 - binary_crossentropy:  0.6156 - auc:  0.7139 - val_binary_crossentropy:  0.6144 - val_auc:  0.7146


2438it [02:27, 16.58it/s]


Epoch 5/20
158s - loss:  0.6133 - binary_crossentropy:  0.6133 - auc:  0.7170 - val_binary_crossentropy:  0.6112 - val_auc:  0.7185


2438it [02:26, 16.61it/s]


Epoch 6/20
158s - loss:  0.6114 - binary_crossentropy:  0.6114 - auc:  0.7195 - val_binary_crossentropy:  0.6103 - val_auc:  0.7197


2438it [02:28, 16.42it/s]


Epoch 7/20
160s - loss:  0.6101 - binary_crossentropy:  0.6101 - auc:  0.7213 - val_binary_crossentropy:  0.6090 - val_auc:  0.7218


2438it [02:27, 16.50it/s]


Epoch 8/20
159s - loss:  0.6090 - binary_crossentropy:  0.6090 - auc:  0.7228 - val_binary_crossentropy:  0.6138 - val_auc:  0.7191


2438it [02:27, 16.55it/s]


Epoch 9/20
158s - loss:  0.6079 - binary_crossentropy:  0.6079 - auc:  0.7242 - val_binary_crossentropy:  0.6080 - val_auc:  0.7232


2438it [02:26, 16.60it/s]


Epoch 10/20
158s - loss:  0.6068 - binary_crossentropy:  0.6068 - auc:  0.7254 - val_binary_crossentropy:  0.6079 - val_auc:  0.7243


2438it [02:27, 16.54it/s]


Epoch 11/20
159s - loss:  0.6060 - binary_crossentropy:  0.6060 - auc:  0.7265 - val_binary_crossentropy:  0.6059 - val_auc:  0.7258


2438it [02:26, 16.63it/s]


Epoch 12/20
158s - loss:  0.6054 - binary_crossentropy:  0.6054 - auc:  0.7273 - val_binary_crossentropy:  0.6075 - val_auc:  0.7234


2438it [02:26, 16.61it/s]


Epoch 13/20
158s - loss:  0.6048 - binary_crossentropy:  0.6048 - auc:  0.7281 - val_binary_crossentropy:  0.6050 - val_auc:  0.7272


2438it [02:27, 16.53it/s]


Epoch 14/20
159s - loss:  0.6041 - binary_crossentropy:  0.6041 - auc:  0.7288 - val_binary_crossentropy:  0.6066 - val_auc:  0.7268


2438it [02:30, 16.16it/s]


Epoch 15/20
162s - loss:  0.6036 - binary_crossentropy:  0.6036 - auc:  0.7296 - val_binary_crossentropy:  0.6070 - val_auc:  0.7283
Epoch 00015: early stopping
Fold4 test LogLoss: 0.6070
Fold4 test AUC: 0.7283
cuda:0
Train on 9984416 samples, validate on 2496103 samples, 2438 steps per epoch


2438it [02:30, 16.25it/s]


Epoch 1/20
161s - loss:  0.6314 - binary_crossentropy:  0.6314 - auc:  0.6907 - val_binary_crossentropy:  0.6234 - val_auc:  0.7022


1589it [01:36, 19.22it/s]

In [None]:
del test

## Ensemble

In [None]:
submission = pd.read_csv(path + "sample_submission.csv")
submission["Click"] = nn_preds
submission.to_csv("../output/5fold-ctr-deepfm.csv", index=False)

In [None]:
submission.head()