In [1]:
import os
import gc
import warnings
os.environ['OPENBLAS_NUM_THREADS'] = '1'
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
# import bisect
import pickle
import torch
from trx_encoder import TrxEncoder
from ptls.nn import LongformerEncoder, TransformerSeqEncoder, LongformerSeqEncoder, TabFormerFeatureEncoder, TransformerEncoder, RnnSeqEncoder, PBLinear, PBL2Norm, PBLayerNorm, Head
from ptls.frames.bert import MLMPretrainModule
from ptls.frames.tabformer.tabformer_module import TabformerPretrainModule
from ptls.data_load.datasets import MemoryMapDataset
from ptls.data_load.iterable_processing import SeqLenFilter, FeatureFilter
from ptls.frames.supervised import SeqToTargetDataset, SequenceToTarget
from ptls.frames import PtlsDataModule
import torchmetrics
from functools import partial
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import Trainer, seed_everything
import pytorch_lightning as pl
import logging
from longformer import MLMCPCPretrainModule
from ptls.nn import PBLinear, PBL2Norm, PBLayerNorm, PBDropout
from ptls.frames.supervised.seq_to_target_dataset import SeqToTargetIterableDataset
from ptls.data_load.datasets import inference_data_loader
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import math

In [2]:
!nvidia-smi

Sun Mar 26 22:14:36 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0  On |                  Off |
|  0%   44C    P5    38W / 450W |    662MiB / 24564MiB |      8%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
def sigmoid(x):
    return 1 / (1 + math.exp(-x))


def get_dataset(data):
    return SeqToTargetDataset(
        MemoryMapDataset(
            data=data,
            i_filters=[
                FeatureFilter(keep_feature_names='target'),
            ],
        ),
        target_col_name='target',
    )


In [4]:
EMB_SIZE = 128
PB_LINEAR = 32
HIDDEN_SIZE = 64
BATCH_SIZE = 32
NUM_EPOCHS = 4
LR = 0.01

STEP_SIZE = 5
GAMMA = 0.9
N_SPLITS = 5
WORD_SIZE = 300
BIDIRECT = True
RNN_TYPE = 'gru' #possible: 'gru', 'lstm'
USE_TEXTS = True

DATA_PATH = 'data_nn_wt.pickle'
TARGET_PATH = "public_train.pqt"
PREPROCESSOR_PATH = 'preprocessor_nn.p'

target = pd.read_parquet(TARGET_PATH)
sb = pd.read_csv("sample_submission.csv")
target = target[(target['is_male'] != 'NA') & ~(target['is_male'].isna())] 

In [5]:
%%time
print("loading data...")
with open(DATA_PATH, 'rb') as handle:
    data = pickle.load(handle)#[:1000]
with open('words_embs.npy', 'rb') as f:
    embs = np.load(f)
print("loading data complete")

loading data...
loading data complete
CPU times: user 3min 8s, sys: 12.9 s, total: 3min 21s
Wall time: 3min 21s


In [6]:
import pickle
with open('cold_users.pickle', 'rb') as handle:
    cold_users = pickle.load(handle)
    
target = target[~target.user_id.isin(cold_users)]
print(len(target))

257535


In [7]:
train_user_ids = target["user_id"].unique()
test_user_ids = sb["user_id"].unique()

dataset_train_all = [e for e in data if e["user_id"] in train_user_ids]#[:100]
dataset_test = [e for e in data if e["user_id"] in test_user_ids]#[:10]
dataset_test = sorted(dataset_test, key=lambda x: x['user_id'])

df_target = target
df_target.set_index('user_id', inplace=True)
df_target.rename(columns={"is_male": "target"}, inplace=True)
for el in dataset_train_all:
    el['target'] = float(df_target['target'][el['user_id']])
    
del data
gc.collect()

2293

In [8]:
numeric_values={}
numeric_values['price'] = 'log'
trx_encoder_params = dict(
    # embeddings_noise=0.005,
    numeric_values=numeric_values,
    embeddings={
        'region_name': {'in': 1000, 'out': 2},
        'city_name': {'in': 10000, 'out': 3},
        'cpe_manufacturer_name': {'in': 1000, 'out': 2},
        'cpe_model_name': {'in': 1000, 'out': 2},
        'url_host': {'in': 170000, 'out': EMB_SIZE},
        'cpe_type_cd': {'in': 10, 'out': 2},
        'cpe_model_os_type': {'in': 100, 'out': 2},
        'part_of_day': {'in': 10, 'out': 2},
        'urls_topics': {'in': 300, 'out': 8},
        'request_cnt': {'in': 100, 'out': 1},
    },
)
if USE_TEXTS:
    trx_encoder_params["text_value"] = {"in": len(embs), "out": WORD_SIZE, "weight": embs, "name": "text"}
trx_encoder = TrxEncoder(**trx_encoder_params)

In [9]:
skf = StratifiedKFold(n_splits=N_SPLITS)
oof_preds = np.zeros(len(dataset_train_all))
test_preds = np.zeros(len(dataset_test))

for i, (train_index, valid_index) in enumerate(skf.split(dataset_train_all, [e['target'] for e in dataset_train_all])):
    print(f"Fold {i+1}:")
    dataset_train, dataset_valid = [dataset_train_all[i] for i in train_index], [dataset_train_all[i] for i in valid_index]
    finetune_dm = PtlsDataModule(
        train_data=get_dataset(dataset_train),
        valid_data=get_dataset(dataset_valid),
        test_data=get_dataset(dataset_test),
        train_num_workers=1,
        valid_num_workers=1,
        test_num_workers=1,
        train_batch_size=BATCH_SIZE,
        test_batch_size=BATCH_SIZE,
        valid_batch_size=BATCH_SIZE,
    )
    seed_everything(42, workers=True)
    downstream_model = SequenceToTarget(
        seq_encoder=RnnSeqEncoder(
            trx_encoder=torch.nn.Sequential(
                torch.nn.Sequential(
                    TrxEncoder(**trx_encoder_params),
                    PBLinear(trx_encoder.output_size + WORD_SIZE*int(USE_TEXTS == True), PB_LINEAR),
                    PBL2Norm(),
                ),
                PBLayerNorm(PB_LINEAR),
            ),
            input_size=PB_LINEAR,
            hidden_size=HIDDEN_SIZE,
            bidir=BIDIRECT,
            type=RNN_TYPE,
        ),
        head=torch.nn.Sequential(
                torch.nn.Linear(HIDDEN_SIZE*(1 + int(BIDIRECT == True)), 1),
                #torch.nn.Sigmoid(),
                torch.nn.Flatten(start_dim=0),
        ),
        loss=torch.nn.BCEWithLogitsLoss(),
        metric_list=torchmetrics.AUROC(num_classes=2, task='binary'),
        pretrained_lr=0.005,
        optimizer_partial=partial(torch.optim.Adam, lr=LR),
        lr_scheduler_partial=partial(torch.optim.lr_scheduler.StepLR, step_size=1, gamma=0.1),
    )

    trainer_ft = pl.Trainer(
        max_epochs=NUM_EPOCHS,
        devices=1, accelerator="gpu",
        enable_progress_bar=True,
        gradient_clip_val=1000,
        gradient_clip_algorithm="norm",
        callbacks=[
            pl.callbacks.LearningRateMonitor(logging_interval='step'),
                    pl.callbacks.ModelCheckpoint(monitor="val_BinaryAUROC",
                                         mode="max",
                                         save_top_k=1),
        ]
    )
    print(f'logger.version = {trainer_ft.logger.version}')
    trainer_ft.fit(downstream_model, finetune_dm)
    print(trainer_ft.logged_metrics)

    trainer_ft.test(dataloaders=finetune_dm.val_dataloader(), verbose=True)

    valid_dl = inference_data_loader(dataset_valid, num_workers=1, batch_size= BATCH_SIZE)
    valid_preds = np.array([float(e) for e in torch.hstack(trainer_ft.predict(downstream_model, valid_dl))])
    oof_preds[valid_index] = valid_preds

    test_dl = inference_data_loader(dataset_test, num_workers=1, batch_size= BATCH_SIZE)
    test_preds += np.array([float(e) for e in torch.hstack(trainer_ft.predict(downstream_model, test_dl))])/N_SPLITS

Fold 1:


Global seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


logger.version = 47


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type              | Params
----------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder     | 43.5 M
1 | head          | Sequential        | 65    
2 | loss          | BCEWithLogitsLoss | 0     
3 | train_metrics | ModuleDict        | 0     
4 | valid_metrics | ModuleDict        | 0     
5 | test_metrics  | ModuleDict        | 0     
----------------------------------------------------
21.8 M    Trainable params
21.6 M    Non-trainable params
43.5 M    Total params
173.871   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /app/lightning_logs/version_47/checkpoints/epoch=3-step=25756.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /app/lightning_logs/version_47/checkpoints/epoch=3-step=25756.ckpt


{'loss': tensor(0.5231), 'seq_len': tensor(398.1667), 'y': tensor(0.4167), 'val_loss': tensor(0.4380), 'val_BinaryAUROC': tensor(0.8794), 'train_BinaryAUROC': tensor(0.9015)}


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
    test_BinaryAUROC        0.8793988823890686
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6439it [00:00, ?it/s]

You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6439it [00:00, ?it/s]

Fold 2:


Global seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type              | Params
----------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder     | 43.5 M
1 | head          | Sequential        | 65    
2 | loss          | BCEWithLogitsLoss | 0     
3 | train_metrics | ModuleDict        | 0     
4 | valid_metrics | ModuleDict        | 0     
5 | test_metrics  | ModuleDict        | 0     
----------------------------------

logger.version = 48


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /app/lightning_logs/version_48/checkpoints/epoch=1-step=12878.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /app/lightning_logs/version_48/checkpoints/epoch=1-step=12878.ckpt


{'loss': tensor(0.6352), 'seq_len': tensor(292.5000), 'y': tensor(0.6667), 'val_loss': tensor(0.4362), 'val_BinaryAUROC': tensor(0.8804), 'train_BinaryAUROC': tensor(0.9015)}


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
    test_BinaryAUROC        0.8805657029151917
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6439it [00:00, ?it/s]

You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6439it [00:00, ?it/s]

Fold 3:


Global seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type              | Params
----------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder     | 43.5 M
1 | head          | Sequential        | 65    
2 | loss          | BCEWithLogitsLoss | 0     
3 | train_metrics | ModuleDict        | 0     
4 | valid_metrics | ModuleDict        | 0     
5 | test_metrics  | ModuleDict        | 0     
----------------------------------

logger.version = 49


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /app/lightning_logs/version_49/checkpoints/epoch=1-step=12878.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /app/lightning_logs/version_49/checkpoints/epoch=1-step=12878.ckpt


{'loss': tensor(0.6385), 'seq_len': tensor(312.1667), 'y': tensor(0.6667), 'val_loss': tensor(0.4402), 'val_BinaryAUROC': tensor(0.8780), 'train_BinaryAUROC': tensor(0.9021)}


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
    test_BinaryAUROC         0.878201425075531
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6439it [00:00, ?it/s]

You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6439it [00:00, ?it/s]

Fold 4:


Global seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type              | Params
----------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder     | 43.5 M
1 | head          | Sequential        | 65    
2 | loss          | BCEWithLogitsLoss | 0     
3 | train_metrics | ModuleDict        | 0     
4 | valid_metrics | ModuleDict        | 0     
5 | test_metrics  | ModuleDict        | 0     
----------------------------------

logger.version = 50


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /app/lightning_logs/version_50/checkpoints/epoch=3-step=25756.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /app/lightning_logs/version_50/checkpoints/epoch=3-step=25756.ckpt


{'loss': tensor(0.5929), 'seq_len': tensor(366.5833), 'y': tensor(0.6667), 'val_loss': tensor(0.4398), 'val_BinaryAUROC': tensor(0.8785), 'train_BinaryAUROC': tensor(0.9024)}


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
    test_BinaryAUROC        0.8785447478294373
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6439it [00:00, ?it/s]

You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6439it [00:00, ?it/s]

Fold 5:


Global seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type              | Params
----------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder     | 43.5 M
1 | head          | Sequential        | 65    
2 | loss          | BCEWithLogitsLoss | 0     
3 | train_metrics | ModuleDict        | 0     
4 | valid_metrics | ModuleDict        | 0     
5 | test_metrics  | ModuleDict        | 0     
----------------------------------

logger.version = 51


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /app/lightning_logs/version_51/checkpoints/epoch=0-step=6439.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /app/lightning_logs/version_51/checkpoints/epoch=0-step=6439.ckpt


{'loss': tensor(nan), 'seq_len': tensor(306.1667), 'y': tensor(0.6667), 'val_loss': tensor(nan), 'val_BinaryAUROC': tensor(0.5044), 'train_BinaryAUROC': tensor(0.4995)}


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
    test_BinaryAUROC        0.5043914914131165
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6439it [00:00, ?it/s]

You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6439it [00:00, ?it/s]

In [10]:
oof_preds1 = [sigmoid(e) for e in oof_preds]

In [11]:
print(f"oof roc auc: {roc_auc_score([e['target'] for e in dataset_train_all], oof_preds1)}") #876

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [None]:
pred_test_nn = pd.DataFrame()
pred_test_nn["is_male"] = test_preds
pred_test_nn['is_male'] = pred_test_nn['is_male'].apply(lambda x: sigmoid(x))
pred_test_nn["user_id"] = [e["user_id"] for e in dataset_test]
pred_test_nn.sort_values("user_id", inplace=True)
pred_test_nn.to_csv("ismale_rnn_test.csv", index=False)

In [None]:
pred_oof_nn = pd.DataFrame()
pred_oof_nn["is_male"] = oof_preds1
pred_oof_nn["user_id"] = [e["user_id"] for e in dataset_train_all]
pred_oof_nn.sort_values("user_id", inplace=True)
pred_oof_nn.to_csv("ismale_rnn_oof.csv", index=False)