In [1]:
import os
import gc
import random

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GroupKFold

import joblib

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.simplefilter('ignore')


In [2]:
def seed_everything(seed: int):  
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

In [3]:
PATH = "./ubiquant-market-prediction"
PRECISION = "half"
n_features = 300
FEATURES = [f'f_{i}' for i in range(n_features)]

In [5]:
if PRECISION == "half":
    train = pd.read_pickle(f'{PATH}/train.pkl')
else:
    train = pd.read_csv(f'{PATH}/train.csv')
    _ = train.pop("row_id")
    
train.head()

Unnamed: 0,investment_id,time_id,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,...,f_291,f_292,f_293,f_294,f_295,f_296,f_297,f_298,f_299,target
0,1,0,0.932617,0.113708,-0.4021,0.378418,-0.203979,-0.413574,0.96582,1.230469,...,-1.095703,0.200073,0.819336,0.941406,-0.086792,-1.086914,-1.044922,-0.287598,0.321533,-0.300781
1,2,0,0.811035,-0.51416,0.742188,-0.616699,-0.194214,1.771484,1.427734,1.133789,...,0.912598,-0.734375,0.819336,0.941406,-0.387695,-1.086914,-0.929688,-0.974121,-0.343506,-0.231079
2,6,0,0.394043,0.615723,0.567871,-0.60791,0.068909,-1.083008,0.979492,-1.125977,...,0.912598,-0.551758,-1.220703,-1.060547,-0.219116,-1.086914,-0.612305,-0.113953,0.243652,0.568848
3,7,0,-2.34375,-0.011871,1.875,-0.606445,-0.586914,-0.815918,0.77832,0.299072,...,0.912598,-0.266357,-1.220703,0.941406,-0.608887,0.104919,-0.783203,1.151367,-0.773438,-1.064453
4,8,0,0.842285,-0.262939,2.330078,-0.583496,-0.618164,-0.742676,-0.946777,1.230469,...,0.912598,-0.741211,-1.220703,0.941406,-0.588379,0.104919,0.753418,1.345703,-0.737793,-0.531738


In [6]:
train["target_"] = train.groupby("investment_id")['target'].shift(2)
train["target_"] = train["target_"].fillna(train["target"])
# train["target"] = train["target_"]

In [6]:
class UMPDataset(Dataset):
    def __init__(self, df_data, mode='train'):
        self.mode = mode
        
        self.time_id = df_data['time_id'].values.astype(np.int32)
        self.investment_id = df_data['investment_id'].values.astype(np.int32)
        self.values = df_data[FEATURES].values
        if self.mode != 'test':
            self.targets = df_data['target'].values
            
        self.len = df_data.shape[0]
        
    def __len__(self):
        return self.len
    
    def __getitem__(self, idx):
        time_id = self.time_id[idx]
        investment_id = self.investment_id[idx]
        values = self.values[idx]
        if self.mode != 'test':
            targets = self.targets[idx]
            return time_id, investment_id, values, targets
        else:
            return time_id, investment_id, values

In [7]:
def swish(x):
    return x * torch.sigmoid(x)

# Residual block
class Residual1D(nn.Module):
    def __init__(self, in_dim, out_dim, activation='relu'):
        super(Residual1D, self).__init__()
        self.fc1 = nn.Linear(in_dim, out_dim)
        self.bn1 = nn.BatchNorm1d(out_dim)
        
        if activation == 'relu':
            self.activate = nn.ReLU(inplace=True)
        elif activation == 'swish':
            self.activate = swish
        else:
            raise NotImplementedError
            
        self.fc2 = nn.Linear(out_dim, out_dim)
        self.bn2 = nn.BatchNorm1d(out_dim)
        
    def forward(self, x):
        residual = x
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.activate(x)
        x = self.fc2(x)
        x = self.bn2(x)
        
        x += residual
        x = self.activate(x)
        return x


class SimpleMLP(nn.Module):
    def __init__(self, value_dim=300, emb_size=16, time_emb=1220, investment_emb=3774):
        super().__init__()
        self.time_emb = nn.Embedding(time_emb, emb_size)
        self.investment_emb = nn.Embedding(investment_emb, emb_size)
        
        self.bn0 = nn.BatchNorm1d(value_dim)
        self.head = nn.Linear(value_dim, 256)
        
        self.res1 = Residual1D(256, 256, 'relu')
        self.drop1 = nn.Dropout(0.5)
        self.trans1 = nn.Linear(256, 128)
        self.res2 = Residual1D(128, 128, 'relu')
        self.drop2 = nn.Dropout(0.5)
        self.trans2 = nn.Linear(128, 32)
        self.res3 = Residual1D(32, 32, 'relu')
        self.drop3 = nn.Dropout(0.5)

        self.tail = nn.Linear(32, 1)
        
    def forward(self, x_value):
#         time_emb = self.time_emb(x_time)
#         investment_emb = self.investment_emb(x_investment)
        
#         x = torch.cat([x_value, time_emb, investment_emb], 1)
        
#         x = torch.cat([x_value, investment_emb], 1)
        x = self.bn0(x_value)
        
        x = swish(self.head(x))
        x = self.drop1(self.res1(x))
        x = swish(self.trans1(x))
        x = self.drop2(self.res2(x))
        x = swish(self.trans2(x))
        x = self.drop3(self.res3(x))
        
        x = self.tail(x)
        
        return x

In [8]:
def SetL2Regularization(model, weight=0.01):
    params=[]
    for key, value in model.named_parameters():
        if "bias" in key:
            params += [{'params':value,'weight_decay':0.0}]
        else:
            params += [{'params':value,'weight_decay':weight}]
    return params

In [9]:
def train_one_fold(dataloaders, fold_id, split_m='time_id'):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = SimpleMLP().to(device)
    loss_fn = nn.MSELoss()
    params = SetL2Regularization(model, 0.001)
    optimizer = optim.Adam(params, lr=5e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                     factor=0.25, 
                                                     patience=3, 
                                                     mode='min')
    epochs = 15
    
    num_train_examples = len(dataloaders['train'])
    num_valid_examples = len(dataloaders['valid'])
    
    losses = []
    best_loss = np.inf
    best_score = -100000
    print("... Start Training ...")
    for e in range(epochs):
        # train
        model.train()
        train_loss = 0
        for i, (time_, investment_, value_, target_) in enumerate(tqdm(dataloaders['train'])):
            time_ = time_.to(device)
            investment_ = investment_.to(device)
            value_ = value_.to(device=device, dtype=torch.float)
            
            target_ = target_.unsqueeze(1).to(device, dtype=torch.float)

            y_pred = model(value_)
            loss = loss_fn(y_pred, target_)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_epoch_loss = train_loss / num_train_examples

        # valid
        model.eval()
        valid_preds = list()
        valid_loss = 0
        with torch.no_grad():
            for i, (time_, investment_, value_, target_) in enumerate(tqdm(dataloaders['valid'])):
                time_ = time_.to(device)
                investment_ = investment_.to(device)
                value_ = value_.to(device=device, dtype=torch.float)

                target_ = target_.unsqueeze(1).to(device, dtype=torch.float)
                
                y_pred = model(value_)
                
                val_loss = loss_fn(y_pred, target_)
                valid_loss += val_loss.item()
                valid_preds.extend(y_pred.detach().cpu().numpy().flatten())
                
        valid_epoch_loss = valid_loss / num_valid_examples

        # change lr
        scheduler.step(valid_epoch_loss)

        # oof
        oof = df_valid[['target']].copy()
        oof['pred'] = valid_preds
        
        score = oof['pred'].corr(oof['target'])
#         print(oof['pred'], oof['target'], y_pred)
        if score > best_score:
            print("... score ...")
            oof['best_pred'] = valid_preds
            best_preds = valid_preds.copy()
            best_score = score
        else:
            oof['best_pred'] = valid_preds

        # print score
        print(f"Epoch {e}, LR: {optimizer.param_groups[0]['lr']}")
        print(f"train loss: {train_epoch_loss:.8f}, valid loss {valid_epoch_loss:.8f}, pearson score: {score:.6f}")
        losses.append((train_epoch_loss, valid_epoch_loss))

        # save model
        if best_loss > valid_epoch_loss:
            torch.save(model.state_dict(), f'mlp_{split_m}_{fold_id}.pth')
            print(f'-- loss from {best_loss:.8f} to {valid_epoch_loss:.8f}, model saved')
            best_loss = valid_epoch_loss
#         print(oof)
        
    return losses, oof
    

In [10]:
oof_list = list()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
kfold = GroupKFold(n_splits=4)
for fold_id, (trn_idx, val_idx) in enumerate(kfold.split(train, train['target'], train['time_id'])):
    
    print(f'Training Fold: {fold_id}\n')
    
    df_train = train.iloc[trn_idx]
    df_valid = train.iloc[val_idx]
    
    train_set = UMPDataset(df_train, mode='train')
    valid_set = UMPDataset(df_valid, mode='valid')
    dataloaders = {
        'train': DataLoader(train_set, batch_size=2048, num_workers=4, pin_memory=True, shuffle=True),
        'valid': DataLoader(valid_set, batch_size=2048, num_workers=4, pin_memory=True, shuffle=False)
    }
    
    _, oof = train_one_fold(dataloaders, fold_id, split_m="time")
    print(oof.head())
    oof_list.append(oof)

Training Fold: 0

... Start Training ...


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 0, LR: 0.0005
train loss: 0.30956685, valid loss 0.20610948, pearson score: 0.877244
-- loss from inf to 0.20610948, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 1, LR: 0.0005
train loss: 0.23833852, valid loss 0.19424167, pearson score: 0.884713
-- loss from 0.20610948 to 0.19424167, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 2, LR: 0.0005
train loss: 0.22756358, valid loss 0.17921056, pearson score: 0.888403
-- loss from 0.19424167 to 0.17921056, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 3, LR: 0.0005
train loss: 0.22387008, valid loss 0.18201848, pearson score: 0.890278


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 4, LR: 0.0005
train loss: 0.21995970, valid loss 0.18042808, pearson score: 0.892715


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 5, LR: 0.0005
train loss: 0.21847531, valid loss 0.17703901, pearson score: 0.895348
-- loss from 0.17921056 to 0.17703901, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 6, LR: 0.0005
train loss: 0.21555840, valid loss 0.17671178, pearson score: 0.895777
-- loss from 0.17703901 to 0.17671178, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 7, LR: 0.0005
train loss: 0.21484672, valid loss 0.16581970, pearson score: 0.897213
-- loss from 0.17671178 to 0.16581970, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 8, LR: 0.0005
train loss: 0.21342176, valid loss 0.17107039, pearson score: 0.896465


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 9, LR: 0.0005
train loss: 0.21290955, valid loss 0.18148500, pearson score: 0.891588


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 10, LR: 0.0005
train loss: 0.21254109, valid loss 0.17225967, pearson score: 0.896499


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 11, LR: 0.000125
train loss: 0.21165323, valid loss 0.17825753, pearson score: 0.895430


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 12, LR: 0.000125
train loss: 0.19966613, valid loss 0.15742521, pearson score: 0.904022
-- loss from 0.16581970 to 0.15742521, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 13, LR: 0.000125
train loss: 0.19650046, valid loss 0.15752874, pearson score: 0.904300


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 14, LR: 0.000125
train loss: 0.19534952, valid loss 0.15735748, pearson score: 0.903742
-- loss from 0.15742521 to 0.15735748, model saved
        target      pred  best_pred
2272 -0.916992 -1.063049  -1.063049
2273 -0.472168 -0.291866  -0.291866
2274 -0.147949  0.017550   0.017550
2275 -0.372803 -1.124902  -1.124902
2276 -0.105713 -0.443645  -0.443645
Training Fold: 1

... Start Training ...


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 0, LR: 0.0005
train loss: 0.32609544, valid loss 0.23300901, pearson score: 0.872597
-- loss from inf to 0.23300901, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 1, LR: 0.0005
train loss: 0.23827618, valid loss 0.19779460, pearson score: 0.884688
-- loss from 0.23300901 to 0.19779460, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 2, LR: 0.0005
train loss: 0.22765567, valid loss 0.18866255, pearson score: 0.892657
-- loss from 0.19779460 to 0.18866255, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 3, LR: 0.0005
train loss: 0.22232280, valid loss 0.18382963, pearson score: 0.892852
-- loss from 0.18866255 to 0.18382963, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 4, LR: 0.0005
train loss: 0.21952171, valid loss 0.18585734, pearson score: 0.888651


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 5, LR: 0.0005
train loss: 0.21742956, valid loss 0.17705847, pearson score: 0.891105
-- loss from 0.18382963 to 0.17705847, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 6, LR: 0.0005
train loss: 0.21542562, valid loss 0.17696086, pearson score: 0.895967
-- loss from 0.17705847 to 0.17696086, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 7, LR: 0.0005
train loss: 0.21468135, valid loss 0.17343878, pearson score: 0.895947
-- loss from 0.17696086 to 0.17343878, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 8, LR: 0.0005
train loss: 0.21345063, valid loss 0.17428467, pearson score: 0.894901


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 9, LR: 0.0005
train loss: 0.21284706, valid loss 0.17240706, pearson score: 0.895804
-- loss from 0.17343878 to 0.17240706, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 10, LR: 0.0005
train loss: 0.21147813, valid loss 0.19513087, pearson score: 0.892821


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 11, LR: 0.0005
train loss: 0.21107254, valid loss 0.17374947, pearson score: 0.895932


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 12, LR: 0.0005
train loss: 0.21031129, valid loss 0.17993925, pearson score: 0.893857


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 13, LR: 0.000125
train loss: 0.20934953, valid loss 0.18956832, pearson score: 0.889516


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 14, LR: 0.000125
train loss: 0.19861882, valid loss 0.16132390, pearson score: 0.903794
-- loss from 0.17240706 to 0.16132390, model saved
         target      pred  best_pred
22734 -0.661133 -0.720631  -0.720631
22735 -0.659668 -0.707262  -0.707262
22736  1.889648  1.414647   1.414647
22737 -0.303711 -0.460086  -0.460086
22738 -0.548828 -0.432749  -0.432749
Training Fold: 2

... Start Training ...


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 0, LR: 0.0005
train loss: 0.31413209, valid loss 0.21221724, pearson score: 0.874094
-- loss from inf to 0.21221724, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 1, LR: 0.0005
train loss: 0.23783892, valid loss 0.19684964, pearson score: 0.883283
-- loss from 0.21221724 to 0.19684964, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 2, LR: 0.0005
train loss: 0.22799406, valid loss 0.18804574, pearson score: 0.884098
-- loss from 0.19684964 to 0.18804574, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 3, LR: 0.0005
train loss: 0.22327022, valid loss 0.18135725, pearson score: 0.889727
-- loss from 0.18804574 to 0.18135725, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 4, LR: 0.0005
train loss: 0.21909574, valid loss 0.18508110, pearson score: 0.889540


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 5, LR: 0.0005
train loss: 0.21748054, valid loss 0.18610919, pearson score: 0.885014


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 6, LR: 0.0005
train loss: 0.21569821, valid loss 0.18293298, pearson score: 0.890529


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 7, LR: 0.0005
train loss: 0.21382074, valid loss 0.17768625, pearson score: 0.891978
-- loss from 0.18135725 to 0.17768625, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 8, LR: 0.0005
train loss: 0.21277639, valid loss 0.17716255, pearson score: 0.891232
-- loss from 0.17768625 to 0.17716255, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 9, LR: 0.0005
train loss: 0.21183783, valid loss 0.17343764, pearson score: 0.895075
-- loss from 0.17716255 to 0.17343764, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 10, LR: 0.0005
train loss: 0.21126209, valid loss 0.17858136, pearson score: 0.890268


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 11, LR: 0.0005
train loss: 0.21044006, valid loss 0.17499358, pearson score: 0.894274


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 12, LR: 0.0005
train loss: 0.21036074, valid loss 0.18079281, pearson score: 0.892665


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 13, LR: 0.000125
train loss: 0.20941510, valid loss 0.17469259, pearson score: 0.893098


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 14, LR: 0.000125
train loss: 0.19845334, valid loss 0.16804005, pearson score: 0.900942
-- loss from 0.17343764 to 0.16804005, model saved
         target      pred  best_pred
18211 -1.381836 -1.257978  -1.257978
18212 -0.644531 -0.743505  -0.743505
18213 -0.864746 -0.687211  -0.687211
18214  0.318359  0.186217   0.186217
18215  0.019196  0.001116   0.001116
Training Fold: 3

... Start Training ...


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 0, LR: 0.0005
train loss: 0.32795634, valid loss 0.21897918, pearson score: 0.875668
-- loss from inf to 0.21897918, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 1, LR: 0.0005
train loss: 0.24023919, valid loss 0.19851246, pearson score: 0.882999
-- loss from 0.21897918 to 0.19851246, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 2, LR: 0.0005
train loss: 0.22857682, valid loss 0.18618821, pearson score: 0.886803
-- loss from 0.19851246 to 0.18618821, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 3, LR: 0.0005
train loss: 0.22395095, valid loss 0.18441831, pearson score: 0.890708
-- loss from 0.18618821 to 0.18441831, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 4, LR: 0.0005
train loss: 0.22099055, valid loss 0.17767731, pearson score: 0.888822
-- loss from 0.18441831 to 0.17767731, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 5, LR: 0.0005
train loss: 0.21781250, valid loss 0.17437187, pearson score: 0.892294
-- loss from 0.17767731 to 0.17437187, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 6, LR: 0.0005
train loss: 0.21632709, valid loss 0.17396457, pearson score: 0.892859
-- loss from 0.17437187 to 0.17396457, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 7, LR: 0.0005
train loss: 0.21448275, valid loss 0.17101244, pearson score: 0.895661
-- loss from 0.17396457 to 0.17101244, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 8, LR: 0.0005
train loss: 0.21405408, valid loss 0.17459220, pearson score: 0.894091


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 9, LR: 0.0005
train loss: 0.21236263, valid loss 0.18306138, pearson score: 0.893309


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 10, LR: 0.0005
train loss: 0.21093879, valid loss 0.18205640, pearson score: 0.891706


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

Epoch 11, LR: 0.000125
train loss: 0.21030306, valid loss 0.17331391, pearson score: 0.891768


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 12, LR: 0.000125
train loss: 0.19890889, valid loss 0.16292669, pearson score: 0.901685
-- loss from 0.17101244 to 0.16292669, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 13, LR: 0.000125
train loss: 0.19600319, valid loss 0.16153276, pearson score: 0.901985
-- loss from 0.16292669 to 0.16153276, model saved


  0%|          | 0/1151 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

... score ...
Epoch 14, LR: 0.000125
train loss: 0.19563610, valid loss 0.15990516, pearson score: 0.902657
-- loss from 0.16153276 to 0.15990516, model saved
     target      pred  best_pred
0 -0.300781 -0.056666  -0.056666
1 -0.231079 -0.144709  -0.144709
2  0.568848 -0.604352  -0.604352
3 -1.064453 -0.654827  -0.654827
4 -0.531738 -0.152915  -0.152915


In [14]:
oof = pd.concat(oof_list)
print('oof pearson score:', oof['pred'].corr(oof['target']))
print('oof pearson score:', oof['best_pred'].corr(oof['target']))

oof pearson score: 0.9013074423511269
oof pearson score: 0.9013074423511269


In [None]:
y = a x1 + b x2 + c 
a+b = 1



In [11]:
len(train['time_id'].unique())/244

4.963114754098361

In [12]:
c = train.groupby("investment_id")['target']

In [13]:
c

<pandas.core.groupby.generic.SeriesGroupBy object at 0x7f3a2309e8e0>