In [1]:
import warnings
warnings.filterwarnings('ignore')

import os, time, gc, random
import datatable as dt
import numpy as np
import janestreet

import xgboost as xgb
from sklearn.metrics import roc_auc_score, roc_curve, log_loss
from sklearn.model_selection import GroupKFold
from hyperopt import hp, fmin, tpe, Trials
from hyperopt.pyll.base import scope
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from joblib import dump, load

import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss, MSELoss
from torch.nn.modules.loss import _WeightedLoss
from torch.autograd import Variable
from torch.utils.data import DataLoader
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
# 随机种子
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(seed=42)

## Processing

In [3]:
%%time
print('Loading...')
train = dt.fread('/kaggle/working/input/train.csv').to_pandas()
print('Filling...')
features = [c for c in train.columns if 'feature' in c]
f_mean = train[features[1:]].mean()
train = train.loc[train.weight > 0].reset_index(drop = True)
train[features[1:]] = train[features[1:]].fillna(f_mean)
train['action'] = (train['resp'] > 0).astype('int')
print('Converting...')
f_mean = f_mean.values
np.save('f_mean.npy', f_mean)

Loading...
Filling...
Converting...
CPU times: user 17min 21s, sys: 25.9 s, total: 17min 47s
Wall time: 28.4 s


In [4]:
print('Modeling')
# 4 层Dense # 参数为啥这么选？
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.batch_norm0 = nn.BatchNorm1d(len(features))
        self.dropout0 = nn.Dropout(0.10143786981358652)

        hidden_size = 256
        self.dense1 = nn.Linear(len(features), 384)
        self.batch_norm1 = nn.BatchNorm1d(384)
        self.dropout1 = nn.Dropout(0.19720339053599725)

        self.dense2 = nn.Linear(384, 896)
        self.batch_norm2 = nn.BatchNorm1d(896)
        self.dropout2 = nn.Dropout(0.2703017847244654)

        self.dense3 = nn.Linear(896, 896)
        self.batch_norm3 = nn.BatchNorm1d(896)
        self.dropout3 = nn.Dropout(0.23148340929571917)

        self.dense4 = nn.Linear(896, 394)
        self.batch_norm4 = nn.BatchNorm1d(394)
        self.dropout4 = nn.Dropout(0.2357768967777311)

        self.dense5 = nn.Linear(394, 1)

        self.Relu = nn.ReLU(inplace=True)
        self.PReLU = nn.PReLU()
        self.LeakyReLU = nn.LeakyReLU(negative_slope=0.01, inplace=True)
        # self.GeLU = nn.GELU()
        self.RReLU = nn.RReLU()
    
    def forward(self, x):
        x = self.batch_norm0(x)
        x = self.dropout0(x)

        x = self.dense1(x)
        x = self.batch_norm1(x)
        x = x * F.sigmoid(x)
        x = self.dropout1(x)

        x = self.dense2(x)
        x = self.batch_norm2(x)
        x = x * F.sigmoid(x)
        x = self.dropout2(x)
        
        x = self.dense3(x)
        x = self.batch_norm3(x)
        x = x * F.sigmoid(x)
        x = self.dropout3(x)
        
        x = self.dense4(x)
        x = self.batch_norm4(x)
        x = x * F.sigmoid(x)
        x = self.dropout4(x)

        x = self.dense5(x)

        return x

Modeling


In [5]:
class MarketDataset:
    def __init__(self, df):
        self.features = df[features].values
        self.label = (df['resp'] > 0).astype('int').values.reshape(-1, 1)

    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        return {
            'features': torch.tensor(self.features[idx], dtype=torch.float),
            'label': torch.tensor(self.label[idx], dtype=torch.float)
        }

In [6]:
# 训练
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0

    for data in dataloader:
        optimizer.zero_grad()
        features = data['features'].to(device)
        label = data['label'].to(device)
        outputs = model(features)
        loss = loss_fn(outputs, label)
        loss.backward()
        optimizer.step()
        if scheduler:
            scheduler.step()
        final_loss += loss.item()
    final_loss /= len(dataloader)
    return final_loss

# 推理
def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    for data in dataloader:
        features = data['features'].to(device)
        with torch.no_grad():
            outputs = model(features)
        preds.append(outputs.sigmoid().detach().cpu().numpy())
    preds = np.concatenate(preds).reshape(-1)
    return preds

In [7]:
class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)
        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()
        return loss

In [8]:
class EarlyStopping:
    def __init__(self, patience=7, mode="max", delta=0.):
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        if self.mode == "min":
            self.val_score = np.Inf
        else:
            self.val_score = -np.Inf

    def __call__(self, epoch_score, model, model_path):

        if self.mode == "min":
            score = -1.0 * epoch_score
        else:
            score = np.copy(epoch_score)

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
        elif score < self.best_score: #  + self.delta
            self.counter += 1
            print('EarlyStopping counter: {} out of {}'.format(self.counter, self.patience))
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            # ema.apply_shadow()
            self.save_checkpoint(epoch_score, model, model_path)
            # ema.restore()
            self.counter = 0

    def save_checkpoint(self, epoch_score, model, model_path):
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            print('Validation score improved ({} --> {}). Saving model!'.format(self.val_score, epoch_score))
            torch.save(model.state_dict(), model_path)
        self.val_score = epoch_score

In [9]:
def utility_score_bincount(date, weight, resp, action):
    count_i = len(np.unique(date))
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
    u = np.clip(t, 0, 6) * np.sum(Pi)
    return u

In [10]:
batch_size = 4096
label_smoothing = 1e-2
learning_rate = 1e-3

start_time = time.time()
oof = np.zeros(len(train['action']))
gkf = GroupKFold(n_splits = 5)
for fold, (tr, te) in enumerate(gkf.split(train['action'].values, train['action'].values, train['date'].values)):
    train_set = MarketDataset(train.loc[tr])
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4)
    valid_set = MarketDataset(train.loc[te])
    valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False, num_workers=4)
    
    torch.cuda.empty_cache()
    device = torch.device("cuda:0")
    model = Model()
    model.to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = SmoothBCEwLogits(smoothing=label_smoothing)
    
    ckp_path = f'JSModel_{fold}.pth'
    
    es = EarlyStopping(patience=3, mode="max")
    for epoch in range(10):
        train_loss = train_fn(model, optimizer, None, loss_fn, train_loader, device)
        valid_pred = inference_fn(model, valid_loader, device)
        auc_score = roc_auc_score((train.loc[te]['resp'] > 0).astype('int').values.reshape(-1, 1), valid_pred)
        logloss_score = log_loss((train.loc[te]['resp'] > 0).astype('int').values.reshape(-1, 1), valid_pred)
        valid_pred = np.where(valid_pred >= 0.5, 1, 0).astype(int)
        u_score = utility_score_bincount(date=train.loc[te].date.values, weight=train.loc[te].weight.values, resp=train.loc[te].resp.values, action=valid_pred)

        print(f"FOLD{fold} EPOCH:{epoch:3}, train_loss:{train_loss:.5f}, u_score:{u_score:.5f}, auc:{auc_score:.5f}, logloss:{logloss_score:.5f}, "
              f"time: {(time.time() - start_time) / 60:.2f}min")
        
        es(auc_score, model, model_path=ckp_path)
        if es.early_stop:
            print("Early stopping")
            break

FOLD0 EPOCH:  0, train_loss:0.69331, u_score:1944.61904, auc:0.53405, logloss:0.69163, time: 0.36min
Validation score improved (-inf --> 0.5340498877324772). Saving model!
FOLD0 EPOCH:  1, train_loss:0.69091, u_score:1585.49528, auc:0.53132, logloss:0.69162, time: 0.62min
EarlyStopping counter: 1 out of 3
FOLD0 EPOCH:  2, train_loss:0.69025, u_score:1998.73372, auc:0.53089, logloss:0.69216, time: 0.86min
EarlyStopping counter: 2 out of 3
FOLD0 EPOCH:  3, train_loss:0.68956, u_score:1575.78973, auc:0.53250, logloss:0.69202, time: 1.11min
EarlyStopping counter: 3 out of 3
Early stopping
FOLD1 EPOCH:  0, train_loss:0.69357, u_score:198.16977, auc:0.52507, logloss:0.69251, time: 1.40min
Validation score improved (-inf --> 0.5250680495389416). Saving model!
FOLD1 EPOCH:  1, train_loss:0.69066, u_score:504.07485, auc:0.52809, logloss:0.69258, time: 1.65min
Validation score improved (0.5250680495389416 --> 0.5280923184354895). Saving model!
FOLD1 EPOCH:  2, train_loss:0.69005, u_score:572.391

In [11]:
models = []
for i in range(5):
    torch.cuda.empty_cache()
    device = torch.device("cuda:0")
    model = Model()
    model.to(device)
    model.eval()
    ckp_path = f'JSModel_{i}.pth'
    model.load_state_dict(torch.load(ckp_path))
    models.append(model)

In [12]:
f_mean = np.load('./f_mean.npy')

## Submit

In [13]:
env = janestreet.make_env()
env_iter = env.iter_test()

In [None]:
opt_th = 0.5
for (test_df, pred_df) in tqdm(env_iter):
    if test_df['weight'].item() > 0:
        x_tt = test_df.loc[:, features].values
        if np.isnan(x_tt[:, 1:].sum()):
            x_tt[:, 1:] = np.nan_to_num(x_tt[:, 1:]) + np.isnan(x_tt[:, 1:]) * f_mean
        pred = 0.
        
        for i, clf in enumerate(models):
            if i == 0:
                pred = clf(torch.tensor(x_tt, dtype=torch.float).to(device)).sigmoid().detach().cpu().numpy() / len(models)
            else:
                pred += clf(torch.tensor(x_tt, dtype=torch.float).to(device)).sigmoid().detach().cpu().numpy() / len(models)
        pred_df.action = np.where(pred >= opt_th, 1, 0).astype(int)
    else:
        pred_df.action = 0
    env.predict(pred_df)

0it [00:00, ?it/s]