In [1]:
import warnings
warnings.filterwarnings('ignore')

import os, time, gc, random
import datatable as dt
import numpy as np
import janestreet

import xgboost as xgb
from sklearn.metrics import roc_auc_score, roc_curve, log_loss
from sklearn.model_selection import GroupKFold
from hyperopt import hp, fmin, tpe, Trials
from hyperopt.pyll.base import scope
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from joblib import dump, load

import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss, MSELoss
from torch.nn.modules.loss import _WeightedLoss
from torch.autograd import Variable
from torch.utils.data import DataLoader
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(seed=42)

In [3]:
class MarketDataset:
    def __init__(self, df, train_features, train_labels):
        self.features = df[train_features].values
        self.label = (df[train_labels] > 0).astype(int).values

    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        return {
            'features': torch.tensor(self.features[idx], dtype=torch.float),
            'label': torch.tensor(self.label[idx], dtype=torch.float)
        }

In [4]:
class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)
        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()
        return loss

## Processing

In [5]:
%%time
train = dt.fread('/kaggle/working/input/train.csv').to_pandas()
train = train.query('date > 85').reset_index(drop=True)
train = train.loc[train.weight > 0].reset_index(drop = True)

features = [c for c in train.columns if 'feature' in c]
f_mean = train[features[1:]].mean()
train[features[1:]] = train[features[1:]].fillna(f_mean)
f_mean = f_mean.values

train['action'] = (train['resp'] > 0).astype('int')

resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp_4', 'resp']

CPU times: user 13min 52s, sys: 27.2 s, total: 14min 19s
Wall time: 24 s


In [6]:
class Model(nn.Module):
    def __init__(self, num_colunms, num_labels):
        super(Model, self).__init__()
        self.batch_norm0 = nn.BatchNorm1d(num_colunms)
        self.dropout0 = nn.Dropout(0.2)

        self.dense1 = nn.Linear(num_colunms, 384)
        self.batch_norm1 = nn.BatchNorm1d(384)
        self.dropout1 = nn.Dropout(0.2)

        self.dense2 = nn.Linear(384, 896)
        self.batch_norm2 = nn.BatchNorm1d(896)
        self.dropout2 = nn.Dropout(0.2)

        self.dense3 = nn.Linear(896, 896)
        self.batch_norm3 = nn.BatchNorm1d(896)
        self.dropout3 = nn.Dropout(0.2)

        self.dense4 = nn.Linear(896, 394)
        self.batch_norm4 = nn.BatchNorm1d(394)
        self.dropout4 = nn.Dropout(0.2)

        self.dense5 = nn.Linear(394, num_labels)

        self.Relu = nn.ReLU(inplace=True)
        self.PReLU = nn.PReLU()
        self.LeakyReLU = nn.LeakyReLU(negative_slope=0.01, inplace=True)
        # self.GeLU = nn.GELU()
        self.RReLU = nn.RReLU()
    
    def forward(self, x):
        x = self.batch_norm0(x)
        x = self.dropout0(x)

        x = self.dense1(x)
        x = self.batch_norm1(x)
        x = x * F.sigmoid(x)
        x = self.dropout1(x)

        x = self.dense2(x)
        x = self.batch_norm2(x)
        x = x * F.sigmoid(x)
        x = self.dropout2(x)
        
        x = self.dense3(x)
        x = self.batch_norm3(x)
        x = x * F.sigmoid(x)
        x = self.dropout3(x)
        
        x = self.dense4(x)
        x = self.batch_norm4(x)
        x = x * F.sigmoid(x)
        x = self.dropout4(x)

        x = self.dense5(x)

        return x

In [7]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0

    for data in dataloader:
        optimizer.zero_grad()
        features = data['features'].to(device)
        label = data['label'].to(device)
        outputs = model(features)
        loss = loss_fn(outputs, label)
        loss.backward()
        optimizer.step()
        if scheduler:
            scheduler.step()
        final_loss += loss.item()
    final_loss /= len(dataloader)
    return final_loss

In [None]:
epochs = 200
num_colunms = len(features)
num_labels = len(resp_cols)
batch_size = 4096
label_smoothing = 1e-2
learning_rate = 1e-3

train_set = MarketDataset(train, features, resp_cols)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4)

torch.cuda.empty_cache()
model = Model(num_colunms=num_colunms, num_labels=num_labels)
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = SmoothBCEwLogits(smoothing=label_smoothing)

start = time.time()
for epoch in range(epochs):
    train_loss = train_fn(model, optimizer, None, loss_fn, train_loader, device)
    end = time.time()
    print('Epoch:{}, Time:{:.2f}s, Loss {}'.format(epoch, end - start, train_loss))
    start = end

Epoch:0, Time:14.64s, Loss 0.6913047307170928
Epoch:1, Time:13.57s, Loss 0.6892876237009963
Epoch:2, Time:13.43s, Loss 0.6887070592492819
Epoch:3, Time:13.24s, Loss 0.6883050406662127
Epoch:4, Time:13.79s, Loss 0.6879889594080547
Epoch:5, Time:13.27s, Loss 0.6876410537709793
Epoch:6, Time:13.80s, Loss 0.6873247679322958
Epoch:7, Time:13.10s, Loss 0.686977336804072
Epoch:8, Time:13.17s, Loss 0.6867044504421452


## Submit

In [None]:
env = janestreet.make_env()
env_iter = env.iter_test()

In [None]:
th = 0.503
model.eval()
for (test_df, pred_df) in tqdm(env_iter):
    if test_df['weight'].item() > 0:
        x_tt = test_df.loc[:, features].values
        if np.isnan(x_tt[:, 1:].sum()):
            x_tt[:, 1:] = np.nan_to_num(x_tt[:, 1:]) + np.isnan(x_tt[:, 1:]) * f_mean
        pred = np.median(model(torch.tensor(x_tt, dtype=torch.float).to(device)).sigmoid().detach().cpu().numpy())
        pred_df.action = np.where(pred >= th, 1, 0).astype(int)
    else:
        pred_df.action = 0
    env.predict(pred_df)