In [10]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
# import lightgbm as lgb

import torch
from torch import nn
from torch.utils.data import DataLoader,TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
#  site-packages/scipy/sparse/linalg/eigen -> site-packages/scipy/sparse/linalg/eigenDeprecated
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

In [2]:
# !pip install -U pytorch_lightning
# !pip install --upgrade pip

Defaulting to user installation because normal site-packages is not writeable


In [2]:
df = pd.read_csv('preprocessed_df.csv')

target_name = 'Target_HBS'
meta_cols = ['val_gb_new', 'sil_2', 'WW', 'hf_score_final', 'num']

df.head()

Unnamed: 0,LS0000162,LH0000003,IE0400007,KC0300001,GRD_PD0801_000,CF0100120,LA3600001,BS0000664,AS0000144,L21213700,...,LRZ021213,Target_HBS,val_gb_new,sil_2,WW,hf_score_final,num,EH0002901_999999999,EH0614903_999999999,LHC000005_-999
0,-15200,403,2600,0,15,4,5,0,22,223,...,8300,0,2,1,1.0,656,25,1.0,0.0,0.0
1,-900,0,2400,0,12,3,4,0,22,1022,...,1700,0,2,1,1.0,679,26,0.0,0.0,0.0
2,0,0,6000,0,7,1,0,0,22,0,...,200,0,2,1,1.0,620,39,0.0,0.0,1.0
3,0,0,6000,0,7,1,0,0,22,0,...,200,0,2,1,1.0,620,40,0.0,0.0,1.0
4,-8200,0,2900,0,15,1,6,0,21,0,...,4700,0,0,1,1.0,654,47,1.0,0.0,0.0


In [3]:
dev_df = df[df.val_gb_new==0].reset_index(drop=True)
val_df = df[df.val_gb_new==1].reset_index(drop=True)
test_df = df[df.val_gb_new==2].reset_index(drop=True)

In [4]:
x_train = dev_df.drop(meta_cols, axis=1)
y_train = x_train.pop(target_name)

x_val = val_df.drop(meta_cols, axis=1)
y_val = x_val.pop(target_name)


x_test = test_df.drop(meta_cols, axis=1)
y_test = x_test.pop(target_name)

input_features = x_train.columns

x_train.head()


Unnamed: 0,LS0000162,LH0000003,IE0400007,KC0300001,GRD_PD0801_000,CF0100120,LA3600001,BS0000664,AS0000144,L21213700,...,CA0000101,SC1201005,C00000093,LC1200103,LC0000601,C00000151,LRZ021213,EH0002901_999999999,EH0614903_999999999,LHC000005_-999
0,-8200,0,2900,0,15,1,6,0,21,0,...,1,781,740,2,604,740,4700,1.0,0.0,0.0
1,-15000,0,3000,0,12,1,8,0,21,202,...,2,729,45,3,806,942,8900,0.0,0.0,0.0
2,-20000,0,2500,0,4,5,2,0,21,0,...,2,802,336,2,742,347,10600,0.0,0.0,0.0
3,-3100,0,2200,0,14,3,6,0,21,0,...,2,755,147,1,830,826,2900,1.0,0.0,0.0
4,-500,0,2100,0,11,3,12,0,21,0,...,1,781,395,1,458,395,7500,0.0,0.0,0.0


In [5]:
train_tensor_dset = TensorDataset(
    torch.tensor(x_train.values, dtype=torch.float),
    torch.tensor(y_train.values.reshape(-1,1), dtype=torch.float)
)

valid_tensor_dset = TensorDataset(
    torch.tensor(x_val.values, dtype=torch.float),
    torch.tensor(y_val.values.reshape(-1,1), dtype=torch.float)
)

test_tensor_dset = TensorDataset(
    torch.tensor(x_test.values, dtype=torch.float),
    torch.tensor(y_test.values.reshape(-1,1), dtype=torch.float) 
)

In [6]:
class SoftOrdering1DCNN(pl.LightningModule):

    def __init__(self, input_dim, output_dim, sign_size=32, cha_input=16, cha_hidden=32, 
                 K=2, dropout_input=0.2, dropout_hidden=0.2, dropout_output=0.2):
        super().__init__()

        hidden_size = sign_size*cha_input
        sign_size1 = sign_size
        sign_size2 = sign_size//2
        output_size = (sign_size//4) * cha_hidden

        self.hidden_size = hidden_size
        self.cha_input = cha_input
        self.cha_hidden = cha_hidden
        self.K = K
        self.sign_size1 = sign_size1
        self.sign_size2 = sign_size2
        self.output_size = output_size
        self.dropout_input = dropout_input
        self.dropout_hidden = dropout_hidden
        self.dropout_output = dropout_output

        self.batch_norm1 = nn.BatchNorm1d(input_dim)
        self.dropout1 = nn.Dropout(dropout_input)
        dense1 = nn.Linear(input_dim, hidden_size, bias=False)
        self.dense1 = nn.utils.weight_norm(dense1)

        # 1st conv layer
        self.batch_norm_c1 = nn.BatchNorm1d(cha_input)
        conv1 = conv1 = nn.Conv1d(
            cha_input, 
            cha_input*K, 
            kernel_size=5, 
            stride = 1, 
            padding=2,  
            groups=cha_input, 
            bias=False)
        self.conv1 = nn.utils.weight_norm(conv1, dim=None)

        self.ave_po_c1 = nn.AdaptiveAvgPool1d(output_size = sign_size2)

        # 2nd conv layer
        self.batch_norm_c2 = nn.BatchNorm1d(cha_input*K)
        self.dropout_c2 = nn.Dropout(dropout_hidden)
        conv2 = nn.Conv1d(
            cha_input*K, 
            cha_hidden, 
            kernel_size=3, 
            stride=1, 
            padding=1, 
            bias=False)
        self.conv2 = nn.utils.weight_norm(conv2, dim=None)

        # 3rd conv layer
        self.batch_norm_c3 = nn.BatchNorm1d(cha_hidden)
        self.dropout_c3 = nn.Dropout(dropout_hidden)
        conv3 = nn.Conv1d(
            cha_hidden, 
            cha_hidden, 
            kernel_size=3, 
            stride=1, 
            padding=1, 
            bias=False)
        self.conv3 = nn.utils.weight_norm(conv3, dim=None)
        

        # 4th conv layer
        self.batch_norm_c4 = nn.BatchNorm1d(cha_hidden)
        conv4 = nn.Conv1d(
            cha_hidden, 
            cha_hidden, 
            kernel_size=5, 
            stride=1, 
            padding=2, 
            groups=cha_hidden, 
            bias=False)
        self.conv4 = nn.utils.weight_norm(conv4, dim=None)

        self.avg_po_c4 = nn.AvgPool1d(kernel_size=4, stride=2, padding=1)

        self.flt = nn.Flatten()

        self.batch_norm2 = nn.BatchNorm1d(output_size)
        self.dropout2 = nn.Dropout(dropout_output)
        dense2 = nn.Linear(output_size, output_dim, bias=False)
        self.dense2 = nn.utils.weight_norm(dense2)

        self.loss = nn.BCEWithLogitsLoss()

    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = nn.functional.celu(self.dense1(x))

        x = x.reshape(x.shape[0], self.cha_input, self.sign_size1)

        x = self.batch_norm_c1(x)
        x = nn.functional.relu(self.conv1(x))

        x = self.ave_po_c1(x)

        x = self.batch_norm_c2(x)
        x = self.dropout_c2(x)
        x = nn.functional.relu(self.conv2(x))
        x_s = x

        x = self.batch_norm_c3(x)
        x = self.dropout_c3(x)
        x = nn.functional.relu(self.conv3(x))

        x = self.batch_norm_c4(x)
        x = self.conv4(x)
        x =  x + x_s
        x = nn.functional.relu(x)

        x = self.avg_po_c4(x)

        x = self.flt(x)

        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = self.dense2(x)

        return x

    def training_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self.forward(X)
        loss = self.loss(y_hat, y)
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self.forward(X)
        loss = self.loss(y_hat, y)
        self.log('valid_loss', loss)
        
    def test_step(self, batch, batch_idx):
        X, y = batch
        y_logit = self.forward(X)
        y_probs = torch.sigmoid(y_logit).detach().cpu().numpy()
        loss = self.loss(y_logit, y)
        metric = roc_auc_score(y.cpu().numpy(), y_probs)
        self.log('test_loss', loss)
        self.log('test_metric', metric)
        
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=1e-2, momentum=0.9)
        scheduler = {
            'scheduler': ReduceLROnPlateau(
                optimizer, 
                mode="min", 
                factor=0.5, 
                patience=5, 
                min_lr=1e-5),
            'interval': 'epoch',
            'frequency': 1,
            'reduce_on_plateau': True,
            'monitor': 'valid_loss',
        }
        return [optimizer], [scheduler]

In [7]:
model = SoftOrdering1DCNN(
    input_dim=len(input_features), 
    output_dim=1, 
    sign_size=16, 
    cha_input=64, 
    cha_hidden=64, 
    K=2, 
    dropout_input=0.3, 
    dropout_hidden=0.3, 
    dropout_output=0.2
)

early_stop_callback = EarlyStopping(
   monitor='valid_loss',
   min_delta=.0,
   patience=21,
   verbose=True,
   mode='min'
)

trainer = pl.Trainer(accelerator='cpu', devices=1, callbacks=[early_stop_callback], min_epochs=10, max_epochs=200, gpus=1)

  rank_zero_deprecation(
  rank_zero_warn(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [8]:
trainer.fit(
    model, 
    DataLoader(train_tensor_dset, batch_size=2048, shuffle=True, num_workers=4),
    DataLoader(valid_tensor_dset, batch_size=2048, shuffle=False, num_workers=4)
)


   | Name          | Type              | Params
-----------------------------------------------------
0  | batch_norm1   | BatchNorm1d       | 112   
1  | dropout1      | Dropout           | 0     
2  | dense1        | Linear            | 58.4 K
3  | batch_norm_c1 | BatchNorm1d       | 128   
4  | conv1         | Conv1d            | 641   
5  | ave_po_c1     | AdaptiveAvgPool1d | 0     
6  | batch_norm_c2 | BatchNorm1d       | 256   
7  | dropout_c2    | Dropout           | 0     
8  | conv2         | Conv1d            | 24.6 K
9  | batch_norm_c3 | BatchNorm1d       | 128   
10 | dropout_c3    | Dropout           | 0     
11 | conv3         | Conv1d            | 12.3 K
12 | batch_norm_c4 | BatchNorm1d       | 128   
13 | conv4         | Conv1d            | 321   
14 | avg_po_c4     | AvgPool1d         | 0     
15 | flt           | Flatten           | 0     
16 | batch_norm2   | BatchNorm1d       | 512   
17 | dropout2      | Dropout           | 0     
18 | dense2        | Linear      

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved. New best score: 0.067


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.066


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.066


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.065


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.065


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.065


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.064


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.063


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.063


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.063


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.063


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.063


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.063


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.063


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.063


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.063


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.063


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.063


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Monitored metric valid_loss did not improve in the last 21 records. Best score: 0.063. Signaling Trainer to stop.


In [9]:
# torch.tensor(x_val.values, dtype=torch.float).to('cuda')
# pred_val = model.forward(torch.Tensor(x_train.values[1:3,:]))
# pred_val
# torch.Tensor(x_train.values[1,:])
# x_train.values[1,:]

# x_train.values[1:3,:].shape
def model_predict_prob(model, dset, batch_size=2048):
    for idx, (x, y) in enumerate(DataLoader(dset, batch_size=batch_size, shuffle=False)):
        if idx == 0: 
            pred_dev = model.forward(x)
        else:
            pred_dev = torch.cat([pred_dev, model.forward(x)], dim = 0)
           
    pred_dev = pred_dev.squeeze(1).detach().numpy()
    pred_dev = np.exp(pred_dev)/(np.exp(pred_dev)+1)
    return pred_dev


In [12]:


pred_dev = model.forward(torch.Tensor(x_train.values))
pred_dev = pred_dev.squeeze(1).detach().numpy()
pred_dev = np.exp(pred_dev)/(np.exp(pred_dev)+1)


pred_val = model.to('cpu').forward(torch.Tensor(x_val.values))
pred_val = pred_val.squeeze(1).detach().numpy()
pred_val = np.exp(pred_val)/(np.exp(pred_val)+1)


pred_test = model.forward(torch.Tensor(x_test.values))
pred_test = pred_test.squeeze(1).detach().numpy()
pred_test = np.exp(pred_test)/(np.exp(pred_test)+1)

# pred_dev = model_predict_prob(model, train_tensor_dset)
# pred_val = model_predict_prob(model, valid_tensor_dset)
# pred_test = model_predict_prob(model, test_tensor_dset)


In [13]:
from scipy.stats import ks_2samp
from sklearn.metrics import make_scorer, roc_auc_score, log_loss
from sklearn.model_selection import GridSearchCV

def ks_stat(y, yhat):
    return ks_2samp(yhat[y==1], yhat[y!=1]).statistic

In [14]:
ks_dev = ks_stat(y_train, pred_dev)
ks_val = ks_stat(y_val, pred_val)
ks_test = ks_stat(y_test, pred_test)

In [16]:
print(ks_dev)
print(ks_val)
print(ks_test)

0.4749712932937272
0.32914394874808595
0.4930307929573152


In [70]:
class DNN(pl.LightningModule):

    def __init__(self, input_dim, output_dim, nn_depth, nn_width, dropout, momentum):
        super().__init__()

        self.bn_in = nn.BatchNorm1d(input_dim, momentum=momentum)
        self.dp_in = nn.Dropout(dropout)
        self.ln_in = nn.Linear(input_dim, nn_width, bias=False)

        self.bnorms = nn.ModuleList([nn.BatchNorm1d(nn_width, momentum=momentum) for i in range(nn_depth-1)])
        self.dropouts = nn.ModuleList([nn.Dropout(dropout) for i in range(nn_depth-1)])
        self.linears = nn.ModuleList([nn.Linear(nn_width, nn_width, bias=False) for i in range(nn_depth-1)])
        
        self.bn_out = nn.BatchNorm1d(nn_width, momentum=momentum)
        self.dp_out = nn.Dropout(dropout/2)
        self.ln_out = nn.Linear(nn_width, output_dim, bias=False)

        self.loss = nn.BCEWithLogitsLoss()

    def forward(self, x):
        x = self.bn_in(x)
        x = self.dp_in(x)
        x = nn.functional.relu(self.ln_in(x))

        for bn_layer,dp_layer,ln_layer in zip(self.bnorms,self.dropouts,self.linears):
            x = bn_layer(x)
            x = dp_layer(x)
            x = ln_layer(x)
            x = nn.functional.relu(x)
            
        x = self.bn_out(x)
        x = self.dp_out(x)
        x = self.ln_out(x)
        return x

    def training_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self.forward(X)
        loss = self.loss(y_hat, y)
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self.forward(X)
        loss = self.loss(y_hat, y)
        self.log('valid_loss', loss)
        
    def test_step(self, batch, batch_idx):
        X, y = batch
        y_logit = self.forward(X)
        y_probs = torch.sigmoid(y_logit).detach().cpu().numpy()
        loss = self.loss(y_logit, y)
        metric = roc_auc_score(y.cpu().numpy(), y_probs)
        self.log('test_loss', loss)
        self.log('test_metric', metric)
        
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=1e-2, momentum=0.9)
        scheduler = {
            'scheduler': ReduceLROnPlateau(
                optimizer, 
                mode="min", 
                factor=0.5, 
                patience=5, 
                min_lr=1e-5),
            'interval': 'epoch',
            'frequency': 1,
            'reduce_on_plateau': True,
            'monitor': 'valid_loss',
        }
        return [optimizer], [scheduler]

In [71]:
model = DNN(
    input_dim=len(input_features), 
    output_dim=1, 
    nn_depth=3, 
    nn_width=256, 
    dropout=0.2, 
    momentum=0.1
)

early_stop_callback = EarlyStopping(
   monitor='valid_loss',
   min_delta=.0,
   patience=20,
   verbose=True,
   mode='min'
)

trainer = pl.Trainer(accelerator='gpu', devices=1, callbacks=[early_stop_callback], min_epochs=10, max_epochs=200, gpus=1)

  rank_zero_deprecation(
  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [72]:
trainer.fit(
    model, 
    DataLoader(train_tensor_dset, batch_size=2048, shuffle=True, num_workers=4),
    DataLoader(valid_tensor_dset, batch_size=2048, shuffle=False, num_workers=4)
)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type              | Params
-----------------------------------------------
0 | bn_in    | BatchNorm1d       | 190   
1 | dp_in    | Dropout           | 0     
2 | ln_in    | Linear            | 24.3 K
3 | bnorms   | ModuleList        | 1.0 K 
4 | dropouts | ModuleList        | 0     
5 | linears  | ModuleList        | 131 K 
6 | bn_out   | BatchNorm1d       | 512   
7 | dp_out   | Dropout           | 0     
8 | ln_out   | Linear            | 256   
9 | loss     | BCEWithLogitsLoss | 0     
-----------------------------------------------
157 K     Trainable params
0         Non-trainable params
157 K     Total params
0.629     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved. New best score: 0.519


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.048 >= min_delta = 0.0. New best score: 0.471


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.028 >= min_delta = 0.0. New best score: 0.443


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.027 >= min_delta = 0.0. New best score: 0.416


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.015 >= min_delta = 0.0. New best score: 0.401


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.393


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.388


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.386


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.384


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.382


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.382


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.381


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.380


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.379


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.378


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.378


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.378


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.377


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.377


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.376


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.375


Validation: 0it [00:00, ?it/s]

Metric valid_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.375


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Monitored metric valid_loss did not improve in the last 20 records. Best score: 0.375. Signaling Trainer to stop.


In [73]:
# pred_val = model.forward(valid_tensor_dset)
pred_dev = model.forward(torch.Tensor(x_train.values))
pred_dev = pred_dev.squeeze(1).detach().numpy()
pred_dev = np.exp(pred_dev)/(np.exp(pred_dev)+1)


pred_val = model.forward(torch.Tensor(x_val.values))
pred_val = pred_val.squeeze(1).detach().numpy()
pred_val = np.exp(pred_val)/(np.exp(pred_val)+1)


pred_test = model.forward(torch.Tensor(x_test.values))
pred_test = pred_test.squeeze(1).detach().numpy()
pred_test = np.exp(pred_test)/(np.exp(pred_test)+1)


In [74]:
ks_dev = ks_stat(y_train, pred_dev)
ks_val = ks_stat(y_val, pred_val)
ks_test = ks_stat(y_test, pred_test)

In [75]:
print(ks_dev)
print(ks_val)
print(ks_test)

0.5256799627731219
0.4410318471177934
0.3699879163752162


In [None]:
0.5634036978308838
0.48823018504250826
0.4254748562785518