# STACKING 2 - Linear, FM, afm, CrossNet, CIN, DeepFM, xDeepFM

In [1]:
%load_ext autoreload
%autoreload 2

import os
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append(os.path.abspath('..'))
# ---------------------------------
from time import sleep
import numpy as np
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
import seaborn
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
import tensorflow as tf
# ---------------------------------
from tools import CV, Tuning, CVGetScore
from hyperopt import hp

In [2]:
y_train = pd.read_csv('../data/train.csv', index_col='id')['target'].values
print(y_train.shape)

(600000,)


In [3]:
def mk_mdf(mdict, rule_dict={}):
    df = list()
    for name, path in mdict.items():
        marr = np.load(path)
        df.append(pd.DataFrame(data=marr, columns=[f'{name}{i}' for i in range(marr.shape[1])]))
    df = pd.concat(df, axis=1)
    
    mdf = dict()
    for colname, rules in rule_dict.items():
        values = np.mean(df[rules].values, axis=1)
        mdf[colname] = values
    del df
    return pd.DataFrame(mdf)

In [4]:
! ls ../tmp/xdeepfm/

3973270008.csv	4293006264.csv	4293006264predict.npy  4293006264stacking1.npy


In [5]:
train_dict = {
    'lr': '../tmp/lr/2432137157stacking1.npy',
    'fm': '../tmp/fm/3366580204stacking1.npy',
    'afm': '../tmp/afm/1372484291stacking1.npy',
    'cross': '../tmp/cross/2072349059stacking1.npy',
    'cin': '../tmp/cin/1960993645stacking1.npy',
    'deepfm': '../tmp/deepfm/1615107092stacking1.npy',
    'xdeepfm': '../tmp/xdeepfm/4293006264stacking1.npy'
}


test_dict = {
    'lr': '../tmp/lr/2432137157predict.npy',
    'fm': '../tmp/fm/3366580204predict.npy',
    'afm': '../tmp/afm/1372484291predict.npy',
    'cross': '../tmp/cross/2072349059predict.npy',
    'cin': '../tmp/cin/1960993645predict.npy',
    'deepfm': '../tmp/deepfm/1615107092predict.npy',
    'xdeepfm': '../tmp/xdeepfm/4293006264predict.npy'
}


rule_dict = {
    'deepfm': [f'deepfm{i}' for i in range(10)],
    'cin':  [f'cin{i}' for i in range(5)],
    'fm':  [f'fm{i}' for i in range(5)],
    'cross1': ['cross0', 'cross1', 'cross3', 'cross4'],
    'cross2': ['cross2'],
    'lr': ['lr0', 'lr1', 'lr2', 'lr4'],
    'lr2': ['lr3'],
    'xdeepfm': [f'xdeepfm{i}' for i in range(5)]
}

x_train = mk_mdf(train_dict, rule_dict).values
x_test = mk_mdf(test_dict, rule_dict).values

# Catboost

In [6]:
from catboost import CatBoostClassifier

In [7]:
nflod = 40
nmodel = 1

# model params
model_param = {'learning_rate': 0.01,
               'random_seed': 2333, 
               'loss_function': 'Logloss',
               'eval_metric': 'AUC',
               'iterations': 10000,   # 1000
               'task_type': 'GPU',
               'early_stopping_rounds': 50}

# fit
fit_param = {'silent': True}
model_params = deepcopy(model_param)

# cv
model = CatBoostClassifier(**model_params)
cv = CV(model, nflod)

score, pred_arr = cv.fit(x=x_train,
                         y=y_train, 
                         metrics_func=roc_auc_score,
                         split_method=StratifiedKFold,
                         fit_params=fit_param,
                         eval_param=None,
                         use_proba=True, 
                         verbose=True,
                         fit_use_valid='eval_set',
                         output_oof_pred=True)

pred = cv.predict(x=x_test)

print('score: ', score)
del model, cv

folds 0 is done, score is 0.7816639022552737
folds 1 is done, score is 0.7912986967541183
folds 2 is done, score is 0.7913594238387135
folds 3 is done, score is 0.7875139009233086
folds 4 is done, score is 0.7917169074485347
folds 5 is done, score is 0.7908908993380368
folds 6 is done, score is 0.7901601150350329
folds 7 is done, score is 0.7900040180941592
folds 8 is done, score is 0.7885596832447226
folds 9 is done, score is 0.7842104088618197
folds 10 is done, score is 0.7886281363100179
folds 11 is done, score is 0.8004000772773329
folds 12 is done, score is 0.7870876867555018
folds 13 is done, score is 0.7910662162989508
folds 14 is done, score is 0.7861033909723624
folds 15 is done, score is 0.7899228587846123
folds 16 is done, score is 0.7908980557284773
folds 17 is done, score is 0.7926069141364007
folds 18 is done, score is 0.7952090945395982
folds 19 is done, score is 0.7951690041482526
folds 20 is done, score is 0.788779311406854
folds 21 is done, score is 0.7868541109344505

In [None]:
suck!