In [5]:
import os
import gc
import math
import pickle
from tqdm import tqdm

import pandas as pd
import numpy as np

import scipy.signal
from scipy.stats import beta
from scipy import stats
from sklearn.preprocessing import StandardScaler
from pykalman import KalmanFilter

import torch
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Sampler

import pytorch_lightning as pl
from pytorch_lightning import LightningModule
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint


PAST_DATA_PATH = 'D:/ubiquant_cache'
SUPPLE_DATA_FILE = 'D:/data/ubiquant/supplemental_train.csv'
PREPO_PATH = './preprocessing'



def kalman_filter(series, tc):

    kf = KalmanFilter(transition_matrices = [1],
    observation_matrices = [1],
    initial_state_mean = 0,
    initial_state_covariance = 1,
    observation_covariance = 1,
    transition_covariance = tc)

    return kf.filter(series)[0].squeeze()

def butterworth_filter(series, f=0.1):

    if len(series) < 13:
        return series

    b, a = scipy.signal.butter(3, f)
    return scipy.signal.filtfilt(b, a, series)

def clip_corrs(corrs, thr_min=0.1, thr_max=0.4):

    signs = np.sign(corrs)
    corrs = np.clip(np.abs(corrs),thr_min,thr_max)
    corrs = signs * (corrs - thr_min)

    return corrs

class PastResponseRegressor(LightningModule):

    def __init__(self, input_width):

        super(PastResponseRegressor, self).__init__()

        hidden1 = input_width//2
        hidden2 = input_width//4

        self.dense1 = nn.Linear(input_width, hidden1)
        self.dense2 = nn.Linear(hidden1, hidden2)

        self.linear = nn.Linear(hidden2, 2)

    def forward(self, x):

        x = torch.relu(self.dense1(x))
        x = torch.relu(self.dense2(x))

        return self.linear(x)

class UbiquantDataSet(TensorDataset):
    
    def __init__(self, features, context, strat_corrs):

        super(UbiquantDataSet, self).__init__()

        self.features = features
        self.context = context
        self.strat_corrs = strat_corrs

    def __len__(self):

        return len(self.features)

    def __getitem__(self, ndx):

        return self.features[ndx], self.context[ndx], self.strat_corrs[ndx]

    def __del__(self):

        print('Dataset removed')

class UbiquantSampler(Sampler):

    def __init__(self, dataset, days_per_batch):

        self.time_ids = dataset.context[:,0].detach().cpu().numpy()
        self.days_per_batch = days_per_batch
        self.num_batches = math.ceil(len(np.unique(self.time_ids)) / days_per_batch)

    def __len__(self):

        return self.num_batches

    def __iter__(self):

        unique_time_ids = np.unique(self.time_ids,return_counts=True)[0]
        rand_idx = np.random.permutation(unique_time_ids)

        for i in np.arange(self.num_batches):

            if i+1 == self.num_batches:
                random_days = rand_idx[self.days_per_batch*i:]
            else:
                random_days = rand_idx[self.days_per_batch*i:self.days_per_batch*(i+1)]

            idx = np.where(np.isin(self.time_ids,random_days))[0]

            np.random.shuffle(idx)
            yield  idx

class UbiquantDataModule(pl.LightningDataModule):

    def __init__(self, features, context, strat_corrs, split):
        
        super(UbiquantDataModule, self).__init__()

        print('stacking past and supplemental data and copying to GPU...')

        time_ids = context[:,0]

        jump = int(split[-2])
        shift = int(split[-1])

        block_size = torch.div(torch.max(time_ids)+1, 20, rounding_mode='floor')
        block_time_ids = torch.div(time_ids, block_size, rounding_mode='floor')

        is_train = torch.remainder(block_time_ids + shift, jump) != 0

        self.train_ds = UbiquantDataSet(features[is_train], context[is_train], strat_corrs[is_train])
        self.val_ds   = UbiquantDataSet(features[~is_train], context[~is_train], strat_corrs[~is_train])

        print('train dataset size:', len(self.train_ds))
        print('val dataset size:', len(self.val_ds))

    def train_dataloader(self):

        return DataLoader(self.train_ds, shuffle=False, batch_sampler=UbiquantSampler(self.train_ds, 8))
    
    def val_dataloader(self):

        return DataLoader(self.val_ds, shuffle=False, batch_sampler=UbiquantSampler(self.val_ds, 61))


###############################################################################################################


class UbiquantMultiTask(LightningModule):

    def __init__(self, input_width):

        super(UbiquantMultiTask, self).__init__()

        hidden1 = int(input_width*1.2)
        hidden2 = int(input_width*0.6)

        self.batch_norm1 = nn.BatchNorm1d(input_width)
        self.batch_norm2 = nn.BatchNorm1d(hidden1)
        self.batch_norm3 = nn.BatchNorm1d(hidden2)

        self.dense1 = nn.Linear(input_width, hidden1)
        self.dense2 = nn.Linear(hidden1, hidden2)
        self.dense3 = nn.Linear(hidden2, hidden2)

        self.dropout1 = torch.nn.Dropout(p=0.5)
        self.dropout2 = torch.nn.Dropout(p=0.5)
        self.dropout3 = torch.nn.Dropout(p=0.5)

        self.linear = nn.Linear(hidden2, 6)

    def forward(self, x):

        x = self.batch_norm1(x)
        x = self.dense1(x)
        x = torch.relu(x)
        x = self.dropout1(x)

        x = self.batch_norm2(x)
        x = self.dense2(x)
        x = torch.relu(x)
        x = self.dropout2(x)

        x = self.batch_norm3(x)
        x = self.dense3(x)
        x = torch.relu(x)
        x = self.dropout3(x)

        return self.linear(x)

    def configure_optimizers(self):

        optimizer = torch.optim.Adam(self.parameters(), lr=3e-3)
        sccheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=1, factor=1/3, verbose=True, min_lr=1e-3)
        return [optimizer], {'scheduler': sccheduler, 'monitor': 'val_corr_resp'}

    def training_step(self, train_batch, batch_idx):

        x, context, strat_corrs = train_batch

        noisy_targets_responses = torch.column_stack((  torch.normal(mean=context[:,3],std=context[:,4]),
                                                        torch.normal(mean=context[:,5],std=context[:,6])))

        noisy_targets_strat_corrs = torch.normal(mean=strat_corrs,std=1)

        time_ids = context[:,0].reshape(-1,1)

        x, noisy_targets_responses, noisy_targets_strat_corrs, time_ids = self.blend(   x, 
                                                                                        noisy_targets_responses, 
                                                                                        noisy_targets_strat_corrs, 
                                                                                        time_ids, 
                                                                                        ab=0.2)

        noisy_targets_slow = noisy_targets_responses[:,0].reshape(-1,1)
        noisy_targets_fast = noisy_targets_responses[:,1].reshape(-1,1)

        logits = self.forward(x)
        estimate_resp = logits[:,0].reshape(-1,1)
        estimate_slow = logits[:,1].reshape(-1,1)
        estimate_fast = logits[:,2].reshape(-1,1)
        estimate_strat_corrs = logits[:,3:]

        noisy_targets_slow = torch.normal(mean=noisy_targets_slow,std=0.1*torch.abs(noisy_targets_slow))
        noisy_targets_fast = torch.normal(mean=noisy_targets_fast,std=0.1*torch.abs(noisy_targets_fast))
        noisy_targets_resp = torch.normal(mean=noisy_targets_slow+noisy_targets_fast,std=0.1*torch.abs(noisy_targets_slow+noisy_targets_fast))
        noisy_targets_strat_corrs = torch.normal(mean=noisy_targets_strat_corrs,std=1*torch.abs(noisy_targets_strat_corrs))

        noisy_stratcorr_0 = 1.*(noisy_targets_strat_corrs[:,0]>0)
        stratcorr_0_ce = F.binary_cross_entropy_with_logits(estimate_strat_corrs[:,0], noisy_stratcorr_0)

        noisy_stratcorr_1 = 1.*(noisy_targets_strat_corrs[:,1]>0)
        stratcorr_1_ce = F.binary_cross_entropy_with_logits(estimate_strat_corrs[:,1], noisy_stratcorr_1)

        noisy_stratcorr_2 = 1.*(noisy_targets_strat_corrs[:,2]>0)
        stratcorr_2_ce = F.binary_cross_entropy_with_logits(estimate_strat_corrs[:,2], noisy_stratcorr_2)

        noisy_corr_slow_loss, _ = self.day_metric_mean(estimate_slow, noisy_targets_slow, time_ids, clip_value=0.55, is_loss=True)
        noisy_corr_fast_loss, _ = self.day_metric_mean(estimate_fast, noisy_targets_fast, time_ids, clip_value=0.45, is_loss=True)
        noisy_corr_resp_loss, _ = self.day_metric_mean(estimate_resp, noisy_targets_resp, time_ids, clip_value=0.3, is_loss=True)

        loss_resp = noisy_corr_resp_loss + noisy_corr_slow_loss + noisy_corr_fast_loss
        loss_corr = stratcorr_0_ce + stratcorr_1_ce + stratcorr_2_ce

        return loss_resp + loss_corr

    def validation_step(self, val_batch, batch_idx):

        x, context, _ = val_batch
        
        estimate_resp = self(x)[:,0].reshape(-1,1)
        targets_resp = (context[:,3] + context[:,5]).reshape(-1,1)
        time_ids = context[:,0].reshape(-1,1)

        self.log('val_corr_resp', self.day_metric_mean(estimate_resp, targets_resp, time_ids)[0].item())

    def blend(self, feat, resp, corr, tids, ab=0.4):

        blended = [], [], [], []

        if len(feat) < 2:
            blended[0].append(feat)
            blended[1].append(resp)
            blended[2].append(corr)
            blended[3].append(tids)
        else:

            if len(feat) % 2 > 0:
                feat = feat[:-1]
                resp = resp[:-1]
                corr = corr[:-1]
                tids = tids[:-1]

            b = torch.tensor(beta.rvs(ab, ab, size=len(feat)//2), device='cuda', dtype=torch.float32).reshape(-1,1)

            blended[0].append(b * feat[::2] + (1-b) * feat[1::2])
            blended[1].append(b * resp[::2] + (1-b) * resp[1::2])
            blended[2].append(b * corr[::2] + (1-b) * corr[1::2])
            blended[3].append( torch.where(b > 0.5, tids[::2], tids[1::2]) )

        return torch.vstack(blended[0]), torch.vstack(blended[1]), torch.vstack(blended[2]), torch.vstack(blended[3])

    def day_metric_mean(self, estimates, targets, day_ids, clip_value=1, is_loss=False, method='pearson'):

        day_metrics = []
        day_weights = []

        for day_id in torch.unique(day_ids):

            idx = day_ids == day_id

            e = estimates[idx]
            t = targets[idx]

            if len(e) < 2:
                continue

            try:
                if is_loss:

                    if method=='pearson':
                        day_metric = -self.pearson_corr(e,t)
                        day_metric = torch.clip(day_metric, -clip_value, 1)
                    elif method=='mse':
                        day_metric = F.mse_loss(e, t)
                    else:
                        print('unknown loss function')
                else:
                    if method=='pearson':
                        day_metric = self.pearson_corr(e,t)
                    elif method=='mse':
                        day_metric = F.l1_loss(e, t)
                    else:
                        print('unknown loss function')
            except Exception as ex:
                print('day_metric_mean error:',ex)
                continue

            day_metrics.append(day_metric)

            length = torch.tensor(len(e), dtype=torch.float32, device=torch.device('cuda'), requires_grad=is_loss)
            day_weights.append(length)

        if len(day_metrics) == 0:

            return torch.tensor(0, device='cuda'), torch.tensor(0, device='cuda')

        day_metrics = torch.stack(day_metrics)
        day_weights = torch.stack(day_weights)

        return torch.sum(day_metrics * day_weights) / torch.sum(day_weights), torch.min(day_metrics)

    def pearson_corr(self, x, y):

        cos = nn.CosineSimilarity(dim=0, eps=1e-6)
        return cos(x - x.mean(dim=0, keepdim=True), y - y.mean(dim=0, keepdim=True))

In [2]:
torch.manual_seed(0)
np.random.seed(0)

print('reading supplemental_train...')
original_features = pd.read_csv(SUPPLE_DATA_FILE, dtype=np.float32, usecols=np.array(range(300))+3).values
original_context  = pd.read_csv(SUPPLE_DATA_FILE, dtype=np.float32, usecols=[1,2,303]).values
gc.collect()

context = np.empty((len(original_context),11), dtype=np.float32)
context[:] = np.nan
context[:,:3] = original_context

# to avoid clashing with past data
context[:,0] = context[:,0] + 1220
context[:,1] = context[:,1] + 3774

time_ids  = context[:,0]
strat_ids = context[:,1]
targets = context[:,2]

print('computing series...')
for strat_id in tqdm(np.unique(strat_ids)):

    idx = strat_ids==strat_id
    strat_targets = targets[idx]

    context[idx,3] = np.diff(kalman_filter(strat_targets.cumsum(),0.2),prepend=0)  # slow signal
    context[idx,4] = pd.Series(context[idx,3]).rolling(window=5, min_periods=1).std().to_numpy(dtype=np.float32)  # slow signal rolling std

    context[idx,5] = strat_targets - context[idx,3]  # fast signal
    context[idx,6] = pd.Series(context[idx,5]).rolling(window=5, min_periods=1).std().to_numpy(dtype=np.float32)  # fast signal rolling std

    context[idx,7] = np.roll(np.diff(butterworth_filter(strat_targets.cumsum(),0.2),prepend=0),1); context[0,7] = 0  # filtered signal lagged
    context[idx,8] = pd.Series(context[idx,7]).rolling(window=5, min_periods=1).std().to_numpy(dtype=np.float32)  # filtered signal lagged rolling std

    context[idx,9] = pd.Series(context[idx,7]).rolling(window=5, min_periods=1).mean().to_numpy(dtype=np.float32)  # filtered signal lagged rolling mean
    context[idx,10] = pd.Series(context[idx,9]).rolling(window=5, min_periods=1).std().to_numpy(dtype=np.float32)  # filtered signal lagged rolling mean rolling std

context = np.nan_to_num(context)

del original_context
gc.collect()

targets_slow = context[:,3]
targets_past = context[:,[7,9]]

print('scaling...')
scaler = pickle.load(open(PREPO_PATH+'/scaler_EVAL.pkl','rb'))
features = scaler.transform(original_features)

print('PCA...')
pca_fea = pickle.load(open(PREPO_PATH+'/pca_fea_EVAL.pkl','rb'))
features = pca_fea.transform(features).astype(np.float32)

print('loading past response regressor...')
regressor_path = PREPO_PATH+'/regressor_EVAL.ckpt'
regressor = PastResponseRegressor.load_from_checkpoint(regressor_path,input_width=features.shape[1])

print('computing past responses estimates...')
f = torch.from_numpy(features).to('cuda')
regressor.eval()
regressor.cuda()
estimate_targets_past = regressor(f).detach().cpu().numpy()

regressor.cpu()
del regressor
del f

degra = 3
red_num_pcs = 81
max_time_id = int(max(time_ids))+1
features_de = np.clip(np.round(features[:,:red_num_pcs] * degra), -2*degra, 2*degra)

time_index = np.array(range(max_time_id))
day_means = np.empty((max_time_id,original_features.shape[1]), dtype=np.float32);  day_means[:] = np.nan
pears_to_target = np.empty((max_time_id,(red_num_pcs+2)*2), dtype=np.float32);  pears_to_target[:] = np.nan
spear_to_target = np.empty((max_time_id,(red_num_pcs+2)*2), dtype=np.float32);  spear_to_target[:] = np.nan

print('computing day means and correlations to estimated past targets...')
for time_id in tqdm(np.unique(time_ids)):

    day_means[int(time_id)] = np.nan_to_num(np.mean(original_features[time_ids==time_id],axis=0), posinf=0., neginf=0.)

    features_daygroup_de  = features_de[time_ids==time_id]
    targs_daygroup_de = 1.*(estimate_targets_past[time_ids==time_id]>0)

    try:
        pears = np.corrcoef(np.hstack((targs_daygroup_de,features_daygroup_de)).T)[:,:2]
        pears_to_target[int(time_id)] = np.nan_to_num(pears, posinf=0., neginf=0.).flatten()
    except Exception as ex:
        pears_to_target[int(time_id)] = np.zeros((red_num_pcs+2)*2, dtype=np.float32)

    try:
        spear = stats.spearmanr(np.hstack((targs_daygroup_de,features_daygroup_de)))[0][:,:2].astype(np.float32)
        spear_to_target[int(time_id)] = np.nan_to_num(spear, posinf=0., neginf=0.).flatten()
    except Exception as ex:
        spear_to_target[int(time_id)] = np.zeros((red_num_pcs+2)*2, dtype=np.float32)

del original_features
gc.collect()

idx = ~np.isnan(day_means).any(axis=1)
time_index = time_index[idx]
day_means = day_means[idx]
pears_to_target = pears_to_target[idx]
spear_to_target = spear_to_target[idx]

pears_to_target = clip_corrs(pears_to_target)
spear_to_target = pears_to_target - clip_corrs(spear_to_target)

pca_day   = pickle.load(open(PREPO_PATH+'/pca_day_EVAL.pkl','rb'))
pca_pears = pickle.load(open(PREPO_PATH+'/pca_pears_EVAL.pkl','rb'))
pca_spear = pickle.load(open(PREPO_PATH+'/pca_spear_EVAL.pkl','rb'))

print('computing day and corr PCAs...')
day_means = pca_day.transform(day_means).astype(np.float32)
pears_to_target = pca_pears.transform(pears_to_target).astype(np.float32)
spear_to_target = pca_spear.transform(spear_to_target).astype(np.float32)

daily_feats = np.hstack(( day_means, pears_to_target, spear_to_target ))
features_ext = np.empty((len(features),daily_feats.shape[1]), dtype=np.float32);  features_ext[:] = np.nan

print('extending features with day features...')
for i in tqdm(range(len(time_index))):

    features_ext[time_ids==time_index[i]] = daily_feats[i]

features = np.hstack(( features,features_ext,estimate_targets_past ))

degra = 3
red_num_pcs = 3
features_de = np.clip(np.round(features[:,:red_num_pcs] * degra), -2*degra, 2*degra)

daily_strat_corrs = np.empty((int(max(strat_ids))+1,red_num_pcs), dtype=np.float32); daily_strat_corrs[:] = np.nan
daily_strat_index = np.array(range(len(daily_strat_corrs)))

print('computing strat correlations to target...')
for strat_id in tqdm(np.unique(strat_ids)):

    feats_strat_de = features_de[strat_ids==strat_id]
    targs_stratgroup_de = 1.*(targets_slow[strat_ids==strat_id]>0)

    try:
        corrs = np.corrcoef(np.hstack((targs_stratgroup_de.reshape(-1,1),feats_strat_de)).T)[1:,0]
        corrs = np.nan_to_num(corrs, posinf=0., neginf=0.)
    except Exception as ex:
        corrs = np.zeros(red_num_pcs, dtype=np.float32)

    daily_strat_corrs[int(strat_id)] = corrs
    daily_strat_index[int(strat_id)] = strat_id

idx = ~np.isnan(daily_strat_corrs).any(axis=1)
daily_strat_corrs = daily_strat_corrs[idx]
daily_strat_index = daily_strat_index[idx]

signs = np.sign(daily_strat_corrs)
daily_strat_corrs = np.clip(np.abs(daily_strat_corrs),0.1,0.3)
daily_strat_corrs = signs * (daily_strat_corrs - 0.1)
daily_strat_corrs = StandardScaler().fit_transform(daily_strat_corrs)

strat_corrs = np.empty((len(features),daily_strat_corrs.shape[1]), dtype=np.float32);  strat_corrs[:] = np.nan

print('creating strat_corrs array...')
for i in tqdm(range(len(daily_strat_index))):

    strat_corrs[strat_ids==daily_strat_index[i]] = daily_strat_corrs[i]

del features_de,day_means,pears_to_target,spear_to_target,daily_feats,daily_strat_corrs,daily_strat_index,time_index,idx,signs
gc.collect()

reading supplemental_train...
computing series...


100%|██████████| 4098/4098 [04:31<00:00, 15.10it/s]


scaling...
PCA...
loading past response regressor...
computing past responses estimates...
computing day means and correlations to estimated past targets...


  c /= stddev[:, None]
  c /= stddev[None, :]
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
100%|██████████| 3638/3638 [01:15<00:00, 47.96it/s]


computing day and corr PCAs...
extending features with day features...


100%|██████████| 3638/3638 [00:13<00:00, 279.61it/s]


computing strat correlations to target...


  c /= stddev[:, None]
  c /= stddev[None, :]
100%|██████████| 4098/4098 [00:23<00:00, 175.16it/s]


creating strat_corrs array...


100%|██████████| 4098/4098 [00:13<00:00, 300.16it/s]


9

In [3]:
print('stacking past and supplemental data and copying to GPU...')

with open(PAST_DATA_PATH+'/context.pkl','rb') as fc:
    
    context  = torch.tensor(np.vstack((pickle.load(fc),context)), dtype=torch.float32, device='cuda')

with open(PAST_DATA_PATH+'/strat_corrs_EVAL.pkl','rb') as fs:

    strat_corrs = torch.tensor(np.vstack((pickle.load(fs),strat_corrs)), dtype=torch.float32, device='cuda')

gc.collect()

with open(PAST_DATA_PATH+'/features_EVAL.pkl','rb') as ff:
    
    features = torch.tensor(np.vstack((pickle.load(ff),features)), dtype=torch.float32, device='cuda')

gc.collect()

stacking past and supplemental data and copying to GPU...


0

In [6]:
print('training models...')

for split in np.arange(5):

    file_name = './models/multitask_CV5{}.ckpt'.format(split)

    if os.path.isfile(file_name):
        os.remove(file_name)

torch.manual_seed(0)
np.random.seed(0)

for split in np.arange(5):

    split_name = 'CV5{}'.format(split)

    print()
    print('training model for split', split_name)
    
    print('loading data')
    data = UbiquantDataModule(features, context, strat_corrs, split_name)

    print('creating model instance')
    model = UbiquantMultiTask(input_width=250)

    monitor='val_corr_resp'
    mode='max'

    early_stop_callback = EarlyStopping(
        monitor=monitor,
        mode=mode,
        patience=2,
        min_delta=0.0001,
        verbose=True
    )

    checkpoint_callback = ModelCheckpoint(
        monitor=monitor,
        mode=mode,
        dirpath='./models',
        filename='multitask_{}'.format(split_name),
        save_top_k=1,
        verbose=True
    )

    trainer = pl.Trainer(   logger=False,
                            gpus=1,
                            max_epochs=18,
                            checkpoint_callback=True,
                            callbacks=[early_stop_callback,checkpoint_callback]
                            )

    print('go!')
    trainer.fit(model, data)

    model.cpu()

    for optimizer_metrics in trainer.optimizers[0].state.values():
        for metric_name, metric in optimizer_metrics.items():
            if torch.is_tensor(metric):
                optimizer_metrics[metric_name] = metric.cpu()

    del early_stop_callback, checkpoint_callback, trainer, model, data

    gc.collect()
    torch.cuda.empty_cache()

training models...

training model for split CV50
loading data
stacking past and supplemental data and copying to GPU...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type        | Params
--------------------------------------------
0 | batch_norm1 | BatchNorm1d | 500   
1 | batch_norm2 | BatchNorm1d | 600   
2 | batch_norm3 | BatchNorm1d | 300   
3 | dense1      | Linear      | 75.3 K
4 | dense2      | Linear      | 45.1 K
5 | dense3      | Linear      | 22.7 K
6 | dropout1    | Dropout     | 0     
7 | dropout2    | Dropout     | 0     
8 | dropout3    | Dropout     | 0     
9 | linear      | Linear      | 906   
--------------------------------------------
145 K     Trainable params
0         Non-trainable params
145 K     Total params
0.582     Total estimated model params size (MB)


train dataset size: 3309165
val dataset size: 775206
creating model instance
go!
Epoch 0: 100%|██████████| 500/500 [01:06<00:00,  7.52it/s, loss=1.49] 

Epoch 0, global step 482: val_corr_resp reached 0.08519 (best 0.08519), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV50.ckpt" as top 1


Epoch 1: 100%|██████████| 500/500 [01:06<00:00,  7.51it/s, loss=1.44]

Epoch 1, global step 965: val_corr_resp reached 0.08835 (best 0.08835), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV50.ckpt" as top 1


Epoch 2: 100%|██████████| 500/500 [01:03<00:00,  7.91it/s, loss=1.52]

Epoch 2, global step 1448: val_corr_resp reached 0.09819 (best 0.09819), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV50.ckpt" as top 1


Epoch 3: 100%|██████████| 500/500 [01:06<00:00,  7.55it/s, loss=1.45]

Epoch 3, step 1931: val_corr_resp was not in top 1


Epoch 4: 100%|█████████▉| 498/500 [01:05<00:00,  7.62it/s, loss=1.51]Epoch     5: reducing learning rate of group 0 to 1.0000e-03.
Epoch 4: 100%|██████████| 500/500 [01:06<00:00,  7.55it/s, loss=1.51]

Epoch 4, step 2414: val_corr_resp was not in top 1


Epoch 4: 100%|██████████| 500/500 [01:06<00:00,  7.55it/s, loss=1.51]
Dataset removed
Dataset removed

training model for split CV51
loading data
stacking past and supplemental data and copying to GPU...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type        | Params
--------------------------------------------
0 | batch_norm1 | BatchNorm1d | 500   
1 | batch_norm2 | BatchNorm1d | 600   
2 | batch_norm3 | BatchNorm1d | 300   
3 | dense1      | Linear      | 75.3 K
4 | dense2      | Linear      | 45.1 K
5 | dense3      | Linear      | 22.7 K
6 | dropout1    | Dropout     | 0     
7 | dropout2    | Dropout     | 0     
8 | dropout3    | Dropout     | 0     
9 | linear      | Linear      | 906   
--------------------------------------------
145 K     Trainable params
0         Non-trainable params
145 K     Total params
0.582     Total estimated model params size (MB)


train dataset size: 3158641
val dataset size: 925730
creating model instance
go!
Epoch 0: 100%|██████████| 502/502 [01:05<00:00,  7.72it/s, loss=1.48] 

Epoch 0, global step 485: val_corr_resp reached 0.09577 (best 0.09577), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV51.ckpt" as top 1


Epoch 1: 100%|██████████| 502/502 [01:04<00:00,  7.78it/s, loss=1.45]

Epoch 1, step 971: val_corr_resp was not in top 1


Epoch 2: 100%|█████████▉| 501/502 [01:02<00:00,  8.07it/s, loss=1.54]Epoch     3: reducing learning rate of group 0 to 1.0000e-03.
Epoch 2: 100%|██████████| 502/502 [01:02<00:00,  7.97it/s, loss=1.54]

Epoch 2, step 1457: val_corr_resp was not in top 1


Epoch 2: 100%|██████████| 502/502 [01:02<00:00,  7.97it/s, loss=1.54]
Dataset removed
Dataset removed

training model for split CV52
loading data
stacking past and supplemental data and copying to GPU...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type        | Params
--------------------------------------------
0 | batch_norm1 | BatchNorm1d | 500   
1 | batch_norm2 | BatchNorm1d | 600   
2 | batch_norm3 | BatchNorm1d | 300   
3 | dense1      | Linear      | 75.3 K
4 | dense2      | Linear      | 45.1 K
5 | dense3      | Linear      | 22.7 K
6 | dropout1    | Dropout     | 0     
7 | dropout2    | Dropout     | 0     
8 | dropout3    | Dropout     | 0     
9 | linear      | Linear      | 906   
--------------------------------------------
145 K     Trainable params
0         Non-trainable params
145 K     Total params
0.582     Total estimated model params size (MB)


train dataset size: 3182246
val dataset size: 902125
creating model instance
go!
Epoch 0: 100%|██████████| 502/502 [01:04<00:00,  7.77it/s, loss=1.43] 

Epoch 0, global step 485: val_corr_resp reached 0.07020 (best 0.07020), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV52.ckpt" as top 1


Epoch 1: 100%|██████████| 502/502 [01:04<00:00,  7.74it/s, loss=1.56]

Epoch 1, global step 971: val_corr_resp reached 0.08896 (best 0.08896), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV52.ckpt" as top 1


Epoch 2: 100%|██████████| 502/502 [01:03<00:00,  7.93it/s, loss=1.47]

Epoch 2, global step 1457: val_corr_resp reached 0.09719 (best 0.09719), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV52.ckpt" as top 1


Epoch 3: 100%|██████████| 502/502 [01:02<00:00,  8.00it/s, loss=1.48]

Epoch 3, global step 1943: val_corr_resp reached 0.10005 (best 0.10005), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV52.ckpt" as top 1


Epoch 4: 100%|██████████| 502/502 [01:02<00:00,  7.98it/s, loss=1.45]

Epoch 4, step 2429: val_corr_resp was not in top 1


Epoch 5: 100%|██████████| 502/502 [01:03<00:00,  7.93it/s, loss=1.47]

Epoch 5, global step 2915: val_corr_resp reached 0.10119 (best 0.10119), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV52.ckpt" as top 1


Epoch 6: 100%|██████████| 502/502 [01:03<00:00,  7.93it/s, loss=1.55]

Epoch 6, global step 3401: val_corr_resp reached 0.10673 (best 0.10673), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV52.ckpt" as top 1


Epoch 7: 100%|██████████| 502/502 [01:03<00:00,  7.94it/s, loss=1.47]

Epoch 7, step 3887: val_corr_resp was not in top 1


Epoch 8: 100%|█████████▉| 501/502 [01:01<00:00,  8.11it/s, loss=1.49]Epoch     9: reducing learning rate of group 0 to 1.0000e-03.
Epoch 8: 100%|██████████| 502/502 [01:02<00:00,  8.02it/s, loss=1.49]

Epoch 8, step 4373: val_corr_resp was not in top 1


Epoch 8: 100%|██████████| 502/502 [01:02<00:00,  8.02it/s, loss=1.49]
Dataset removed
Dataset removed

training model for split CV53
loading data
stacking past and supplemental data and copying to GPU...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type        | Params
--------------------------------------------
0 | batch_norm1 | BatchNorm1d | 500   
1 | batch_norm2 | BatchNorm1d | 600   
2 | batch_norm3 | BatchNorm1d | 300   
3 | dense1      | Linear      | 75.3 K
4 | dense2      | Linear      | 45.1 K
5 | dense3      | Linear      | 22.7 K
6 | dropout1    | Dropout     | 0     
7 | dropout2    | Dropout     | 0     
8 | dropout3    | Dropout     | 0     
9 | linear      | Linear      | 906   
--------------------------------------------
145 K     Trainable params
0         Non-trainable params
145 K     Total params
0.582     Total estimated model params size (MB)


train dataset size: 3285340
val dataset size: 799031
creating model instance
go!
Epoch 0: 100%|██████████| 502/502 [01:02<00:00,  8.02it/s, loss=1.51] 

Epoch 0, global step 485: val_corr_resp reached 0.10088 (best 0.10088), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV53.ckpt" as top 1


Epoch 1: 100%|██████████| 502/502 [01:04<00:00,  7.79it/s, loss=1.52]

Epoch 1, global step 971: val_corr_resp reached 0.10693 (best 0.10693), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV53.ckpt" as top 1


Epoch 2: 100%|██████████| 502/502 [01:03<00:00,  7.88it/s, loss=1.42]

Epoch 2, global step 1457: val_corr_resp reached 0.10873 (best 0.10873), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV53.ckpt" as top 1


Epoch 3: 100%|██████████| 502/502 [01:03<00:00,  7.95it/s, loss=1.44]

Epoch 3, global step 1943: val_corr_resp reached 0.11141 (best 0.11141), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV53.ckpt" as top 1


Epoch 4: 100%|██████████| 502/502 [01:02<00:00,  7.98it/s, loss=1.56]

Epoch 4, global step 2429: val_corr_resp reached 0.11335 (best 0.11335), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV53.ckpt" as top 1


Epoch 5: 100%|██████████| 502/502 [01:06<00:00,  7.59it/s, loss=1.43]

Epoch 5, step 2915: val_corr_resp was not in top 1


Epoch 6: 100%|█████████▉| 500/502 [01:04<00:00,  7.76it/s, loss=1.41]Epoch     7: reducing learning rate of group 0 to 1.0000e-03.
Epoch 6: 100%|██████████| 502/502 [01:05<00:00,  7.62it/s, loss=1.41]

Epoch 6, step 3401: val_corr_resp was not in top 1


Epoch 6: 100%|██████████| 502/502 [01:05<00:00,  7.62it/s, loss=1.41]
Dataset removed
Dataset removed

training model for split CV54
loading data
stacking past and supplemental data and copying to GPU...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type        | Params
--------------------------------------------
0 | batch_norm1 | BatchNorm1d | 500   
1 | batch_norm2 | BatchNorm1d | 600   
2 | batch_norm3 | BatchNorm1d | 300   
3 | dense1      | Linear      | 75.3 K
4 | dense2      | Linear      | 45.1 K
5 | dense3      | Linear      | 22.7 K
6 | dropout1    | Dropout     | 0     
7 | dropout2    | Dropout     | 0     
8 | dropout3    | Dropout     | 0     
9 | linear      | Linear      | 906   
--------------------------------------------
145 K     Trainable params
0         Non-trainable params
145 K     Total params
0.582     Total estimated model params size (MB)


train dataset size: 3402092
val dataset size: 682279
creating model instance
go!
Epoch 0: 100%|██████████| 503/503 [01:06<00:00,  7.58it/s, loss=1.54] 

Epoch 0, global step 486: val_corr_resp reached 0.10340 (best 0.10340), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV54.ckpt" as top 1


Epoch 1: 100%|██████████| 503/503 [01:06<00:00,  7.55it/s, loss=1.5] 

Epoch 1, global step 973: val_corr_resp reached 0.11135 (best 0.11135), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV54.ckpt" as top 1


Epoch 2: 100%|██████████| 503/503 [01:06<00:00,  7.53it/s, loss=1.51]

Epoch 2, step 1460: val_corr_resp was not in top 1


Epoch 3: 100%|██████████| 503/503 [01:07<00:00,  7.50it/s, loss=1.52]

Epoch 3, global step 1947: val_corr_resp reached 0.11533 (best 0.11533), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV54.ckpt" as top 1


Epoch 4: 100%|██████████| 503/503 [01:06<00:00,  7.54it/s, loss=1.52]

Epoch 4, step 2434: val_corr_resp was not in top 1


Epoch 5: 100%|██████████| 503/503 [01:06<00:00,  7.51it/s, loss=1.59]

Epoch 5, global step 2921: val_corr_resp reached 0.11565 (best 0.11565), saving model to "C:\Users\codef\OneDrive\machine learning\ubiquant\models\multitask_CV54.ckpt" as top 1


Epoch 6:  50%|█████     | 254/503 [00:32<00:31,  7.93it/s, loss=1.46]

In [None]:
print('loading models and prepro...')

scalers = []
regressors = []
models = []

compressors_fea = []
compressors_day = []
compressors_pea = []
compressors_spe = []

for split in np.arange(5):

    print('..for split',split)

    scalers.append(pickle.load(open(PREPO_PATH+'/scaler_CV5{}.pkl'.format(split),'rb')))

    compressors_fea.append(pickle.load(open(PREPO_PATH+'/pca_fea_CV5{}.pkl'.format(split),'rb')))
    compressors_day.append(pickle.load(open(PREPO_PATH+'/pca_day_CV5{}.pkl'.format(split),'rb')))
    compressors_pea.append(pickle.load(open(PREPO_PATH+'/pca_pears_CV5{}.pkl'.format(split),'rb')))
    compressors_spe.append(pickle.load(open(PREPO_PATH+'/pca_spear_CV5{}.pkl'.format(split),'rb')))

    regressor = PastResponseRegressor.load_from_checkpoint(PREPO_PATH+'/regressor_CV5{}.ckpt'.format(split), input_width=210)
    regressor.cpu()
    regressor.eval()
    regressors.append(regressor)

    model = UbiquantMultiTask.load_from_checkpoint('./models/multitask_CV5{}.ckpt'.format(split), input_width=250)
    model.cpu()
    model.eval()
    models.append(model)

In [None]:
iter_test = [pd.read_csv('D:/data/ubiquant/example_test.csv').drop(columns=['time_id'])]
submission_df = pd.DataFrame(columns=['row_id,target'])
# USE BELOW CODE INSTEAD FOR KAGGLE COMPETITION
# import ubiquant
# env = ubiquant.make_env()
# iter_test = env.iter_test()

In [None]:
degra = 3
red_num_pcs = 81

print('inference starts...')

for test_df in iter_test:
# USE BELOW CODE INSTEAD FOR KAGGLE COMPETITION
# for (test_df, sample_prediction_df) in iter_test:
    
    if len(test_df) == 0:
        continue

    original_features = test_df.iloc[:,2:].to_numpy(dtype=np.float32)

    day_means = np.mean(original_features,axis=0).reshape(1,-1)

    output = np.zeros(len(test_df))

    for split in np.arange(5):
        
        try:
        
            features = scalers[split].transform(original_features)
            features = compressors_fea[split].transform(features).astype(np.float32)

            try:
                f = torch.from_numpy(features)
                estimate_targets_past = regressors[split](f).detach().cpu().numpy()
                estimate_targets_past = np.nan_to_num(estimate_targets_past, posinf=0., neginf=0.)
            except Exception as ex:
                print('PastResponseRegressor error:',ex)
                estimate_targets_past = np.zeros((len(features),2), dtype=np.float32)

            features_daygroup_de = np.clip(np.round(features[:,:red_num_pcs] * degra), -2*degra, 2*degra)
            targs_daygroup_de = 1.*(estimate_targets_past>0)

            try:
                pears = np.corrcoef(np.hstack((targs_daygroup_de,features_daygroup_de)).T)[:,:2]
                pears = np.nan_to_num(pears, posinf=0., neginf=0.).flatten().reshape(1,-1)
            except Exception as ex:
                pears = np.zeros((1,(red_num_pcs+2)*2), dtype=np.float32)

            try:
                spear = stats.spearmanr(np.hstack((targs_daygroup_de,features_daygroup_de)))[0][:,:2].astype(np.float32)
                spear = np.nan_to_num(spear, posinf=0., neginf=0.).flatten().reshape(1,-1)
            except Exception as ex:
                spear = np.zeros((1,(red_num_pcs+2)*2), dtype=np.float32)

            pears = clip_corrs(pears)
            spear = pears - clip_corrs(spear)

            day_means_split = compressors_day[split].transform(day_means).astype(np.float32)
            pears = compressors_pea[split].transform(pears).astype(np.float32)
            spear = compressors_spe[split].transform(spear).astype(np.float32)

            features = np.nan_to_num(np.hstack((features,
                                      np.repeat(day_means_split,len(features),axis=0),
                                      np.repeat(pears,len(features),axis=0),
                                      np.repeat(spear,len(features),axis=0),
                                      estimate_targets_past)), posinf=0., neginf=0.)

            model_feats = torch.from_numpy(features)
            model_output = models[split](model_feats).detach().numpy()
            model_output = np.nan_to_num(model_output, posinf=0., neginf=0.)
            output += model_output[:,0] / 5
            
        except Exception as ex:
            print('Iteration error:',ex)

    col_id = pd.Series(test_df.iloc[:,0])
    col_target = pd.Series(output)
    submission_df = pd.DataFrame({ 'row_id':col_id, 'target':col_target })
    # USE BELOW CODE INSTEAD FOR KAGGLE COMPETITION
    # sample_prediction_df['target'] = output
    # env.predict(sample_prediction_df)

In [12]:
import numpy as np

original_context = np.ones((347,45))
fake_targets = np.zeros(347)

if (original_context[:,0] != fake_targets).sum() > 0:
    print(len(original_context))

347
