In [1]:
import os
import gc
import warnings
os.environ['OPENBLAS_NUM_THREADS'] = '1'
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
# import bisect
import pickle
import torch
from trx_encoder import TrxEncoder
from ptls.nn import LongformerEncoder, TransformerSeqEncoder, LongformerSeqEncoder, TabFormerFeatureEncoder, TransformerEncoder, RnnSeqEncoder, PBLinear, PBL2Norm, PBLayerNorm, Head
from ptls.frames.bert import MLMPretrainModule
from ptls.frames.tabformer.tabformer_module import TabformerPretrainModule
from ptls.data_load.datasets import MemoryMapDataset
from ptls.data_load.iterable_processing import SeqLenFilter, FeatureFilter
from ptls.frames.supervised import SeqToTargetDataset, SequenceToTarget
from ptls.frames import PtlsDataModule
import torchmetrics
from functools import partial
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import Trainer, seed_everything
import pytorch_lightning as pl
import logging
from longformer import MLMCPCPretrainModule
from ptls.nn import PBLinear, PBL2Norm, PBLayerNorm, PBDropout
from ptls.frames.supervised.seq_to_target_dataset import SeqToTargetIterableDataset
from ptls.data_load.datasets import inference_data_loader
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import math

In [2]:
# !pip install git+https://github.com/livington/pytorch-lifestream.git@main

In [3]:
from ptls.data_load.iterable_processing_dataset import IterableProcessingDataset
from ptls.data_load.augmentations.seq_len_limit import SeqLenLimit


class ISeqLenLimit(IterableProcessingDataset):
    def __init__(self, max_seq_len, strategy='tail'):
        super().__init__()

        self.proc = SeqLenLimit(max_seq_len, strategy)

    def process(self, features):
        return self.proc(features)

In [4]:
def sigmoid(x):
    return 1 / (1 + math.exp(-x))


def get_dataset(data):
    return SeqToTargetDataset(
        MemoryMapDataset(
            data=data,
            i_filters=[
                FeatureFilter(keep_feature_names='target'),
                ISeqLenLimit(1024) # 1024, 512, 256, 8
            ],
        ),
        target_col_name='target',
    )

In [5]:
EMB_SIZE = 128
PB_LINEAR = 32
HIDDEN_SIZE = 64
BATCH_SIZE = 32
NUM_EPOCHS = 4
LR = 0.01

STEP_SIZE = 5
GAMMA = 0.9
N_SPLITS = 5
WORD_SIZE = 300
BIDIRECT = True
RNN_TYPE = 'lstm' #possible: 'gru', 'lstm'
USE_TEXTS = True

DATA_PATH = 'data_nn_wt.pickle'
TARGET_PATH = "public_train.pqt"
PREPROCESSOR_PATH = 'preprocessor_nn.p'

In [6]:
target = target = pd.read_csv("tr.csv", usecols=['user_id', 'age'])
target['age'] = target['age'] + 1
sb = pd.read_csv("sample_submission.csv")

In [7]:
%%time
print("loading data...")
with open(DATA_PATH, 'rb') as handle:
    data = pickle.load(handle)#[:1000]
with open('words_embs.npy', 'rb') as f:
    embs = np.load(f)
print("loading data complete")

loading data...
loading data complete
CPU times: user 3min 10s, sys: 13.5 s, total: 3min 24s
Wall time: 3min 24s


In [8]:
import pickle
with open('cold_users.pickle', 'rb') as handle:
    cold_users = pickle.load(handle)
    
target = target[~target.user_id.isin(cold_users)]
print(len(target))

263065


In [9]:
train_user_ids = target["user_id"].unique()
test_user_ids = sb["user_id"].unique()

dataset_train_all = [e for e in data if e["user_id"] in train_user_ids]#[:100]
dataset_test = [e for e in data if e["user_id"] in test_user_ids]#[:10]
dataset_test = sorted(dataset_test, key=lambda x: x['user_id'])

df_target = target
df_target.set_index('user_id', inplace=True)
df_target.rename(columns={"age": "target"}, inplace=True)
for el in dataset_train_all:
    el['target'] = int(df_target['target'][el['user_id']])
    
del data
gc.collect()

1925

In [10]:
numeric_values={}
numeric_values['price'] = 'log'
trx_encoder_params = dict(
    # embeddings_noise=0.005,
    numeric_values=numeric_values,
    embeddings={
        'region_name': {'in': 1000, 'out': 2},
        'city_name': {'in': 10000, 'out': 3},
        'cpe_manufacturer_name': {'in': 1000, 'out': 2},
        'cpe_model_name': {'in': 1000, 'out': 2},
        'url_host': {'in': 200000, 'out': EMB_SIZE},
        'cpe_type_cd': {'in': 10, 'out': 2},
        'cpe_model_os_type': {'in': 100, 'out': 2},
        'part_of_day': {'in': 10, 'out': 2},
        'urls_topics': {'in': 300, 'out': 8},
        'request_cnt': {'in': 100, 'out': 1},
    },
)
if USE_TEXTS:
    trx_encoder_params["text_value"] = {"in": len(embs), "out": WORD_SIZE, "weight": embs, "name": "text"}
trx_encoder = TrxEncoder(**trx_encoder_params)

In [11]:
skf = StratifiedKFold(n_splits=N_SPLITS, random_state=45, shuffle=True)
oof_preds = np.zeros((len(dataset_train_all), 7))
test_preds = np.zeros((len(dataset_test), 7))

for i, (train_index, valid_index) in enumerate(skf.split(dataset_train_all, [e['target'] for e in dataset_train_all])):
    print(f"Fold {i+1}:")
    dataset_train, dataset_valid = [dataset_train_all[i] for i in train_index], [dataset_train_all[i] for i in valid_index]
    finetune_dm = PtlsDataModule(
        train_data=get_dataset(dataset_train),
        valid_data=get_dataset(dataset_valid),
        test_data=get_dataset(dataset_test),
        train_num_workers=1,
        valid_num_workers=1,
        test_num_workers=1,
        train_batch_size=BATCH_SIZE,
        test_batch_size=BATCH_SIZE,
        valid_batch_size=BATCH_SIZE,
    )
    seed_everything(42 + i, workers=True)
    downstream_model = SequenceToTarget(
        seq_encoder=RnnSeqEncoder(
            trx_encoder=torch.nn.Sequential(
                torch.nn.Sequential(
                    TrxEncoder(**trx_encoder_params),
                    PBLinear(trx_encoder.output_size + WORD_SIZE*int(USE_TEXTS == True), PB_LINEAR),
                    PBL2Norm(),
                ),
                PBLayerNorm(PB_LINEAR),
            ),
            input_size=PB_LINEAR,
            hidden_size=HIDDEN_SIZE,
            bidir=BIDIRECT,
            type=RNN_TYPE,
        ),
        head=torch.nn.Sequential(
                torch.nn.Linear(HIDDEN_SIZE*(1 + int(BIDIRECT == True)), 7),
                #torch.nn.Sigmoid(),
                # torch.nn.Flatten(start_dim=0),
        ),
        loss=torch.nn.CrossEntropyLoss(),
        metric_list=torchmetrics.F1Score(num_classes=7, task='multiclass', average='weighted'),
        pretrained_lr=0.005,
        optimizer_partial=partial(torch.optim.Adam, lr=LR),
        lr_scheduler_partial=partial(torch.optim.lr_scheduler.StepLR, step_size=1, gamma=0.1),
    )

    trainer_ft = pl.Trainer(
        max_epochs=NUM_EPOCHS,
        devices=1, accelerator="gpu",
        enable_progress_bar=True,
        gradient_clip_val=1000,
        gradient_clip_algorithm="norm",
        callbacks=[
            pl.callbacks.LearningRateMonitor(logging_interval='step'),
                    pl.callbacks.ModelCheckpoint(monitor="val_MulticlassF1Score",
                                         mode="max",
                                         save_top_k=1),
        ]
    )
    print(f'logger.version = {trainer_ft.logger.version}')
    trainer_ft.fit(downstream_model, finetune_dm)
    print(trainer_ft.logged_metrics)

    trainer_ft.test(dataloaders=finetune_dm.val_dataloader(), verbose=True)

    valid_dl = inference_data_loader(dataset_valid, num_workers=1, batch_size= BATCH_SIZE)
    valid_preds = torch.vstack(trainer_ft.predict(downstream_model, valid_dl))
    valid_preds = torch.nn.Softmax(dim=1)(valid_preds)
    oof_preds[valid_index] = valid_preds.detach().cpu().numpy()

    test_dl = inference_data_loader(dataset_test, num_workers=1, batch_size= BATCH_SIZE)
    test_preds_ = torch.vstack(trainer_ft.predict(downstream_model, test_dl))/N_SPLITS
    test_preds += torch.nn.Softmax(dim=1)(test_preds_).detach().cpu().numpy()

Fold 1:


Global seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


logger.version = 89


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder    | 47.3 M
1 | head          | Sequential       | 903   
2 | loss          | CrossEntropyLoss | 0     
3 | train_metrics | ModuleDict       | 0     
4 | valid_metrics | ModuleDict       | 0     
5 | test_metrics  | ModuleDict       | 0     
---------------------------------------------------
25.7 M    Trainable params
21.6 M    Non-trainable params
47.3 M    Total params
189.385   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /app/lightning_logs/version_89/checkpoints/epoch=3-step=26308.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /app/lightning_logs/version_89/checkpoints/epoch=3-step=26308.ckpt


{'loss': tensor(1.3688), 'seq_len': tensor(545.2500), 'y': tensor(2.7000), 'val_loss': tensor(1.2688), 'val_MulticlassF1Score': tensor(0.4443), 'train_MulticlassF1Score': tensor(0.4629)}


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
 test_MulticlassF1Score     0.4442833662033081
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6577it [00:00, ?it/s]

You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6577it [00:00, ?it/s]

Fold 2:


Global seed set to 43
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder    | 47.3 M
1 | head          | Sequential       | 903   
2 | loss          | CrossEntropyLoss | 0     
3 | train_metrics | ModuleDict       | 0     
4 | valid_metrics | ModuleDict       | 0     
5 | test_metrics  | ModuleDict       | 0     
------------------------------------------

logger.version = 90


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /app/lightning_logs/version_90/checkpoints/epoch=3-step=26308.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /app/lightning_logs/version_90/checkpoints/epoch=3-step=26308.ckpt


{'loss': tensor(1.3054), 'seq_len': tensor(468.3500), 'y': tensor(3.), 'val_loss': tensor(1.2649), 'val_MulticlassF1Score': tensor(0.4437), 'train_MulticlassF1Score': tensor(0.4642)}


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
 test_MulticlassF1Score     0.4437177777290344
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6577it [00:00, ?it/s]

You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6577it [00:00, ?it/s]

Fold 3:


Global seed set to 44
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder    | 47.3 M
1 | head          | Sequential       | 903   
2 | loss          | CrossEntropyLoss | 0     
3 | train_metrics | ModuleDict       | 0     
4 | valid_metrics | ModuleDict       | 0     
5 | test_metrics  | ModuleDict       | 0     
------------------------------------------

logger.version = 91


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /app/lightning_logs/version_91/checkpoints/epoch=3-step=26308.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /app/lightning_logs/version_91/checkpoints/epoch=3-step=26308.ckpt


{'loss': tensor(1.2527), 'seq_len': tensor(574.0500), 'y': tensor(2.5500), 'val_loss': tensor(1.2653), 'val_MulticlassF1Score': tensor(0.4447), 'train_MulticlassF1Score': tensor(0.4634)}


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
 test_MulticlassF1Score      0.444654256105423
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6577it [00:00, ?it/s]

You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6577it [00:00, ?it/s]

Fold 4:


Global seed set to 45
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder    | 47.3 M
1 | head          | Sequential       | 903   
2 | loss          | CrossEntropyLoss | 0     
3 | train_metrics | ModuleDict       | 0     
4 | valid_metrics | ModuleDict       | 0     
5 | test_metrics  | ModuleDict       | 0     
------------------------------------------

logger.version = 92


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /app/lightning_logs/version_92/checkpoints/epoch=2-step=19731.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /app/lightning_logs/version_92/checkpoints/epoch=2-step=19731.ckpt


{'loss': tensor(0.8137), 'seq_len': tensor(481.2000), 'y': tensor(2.7000), 'val_loss': tensor(1.2637), 'val_MulticlassF1Score': tensor(0.4428), 'train_MulticlassF1Score': tensor(0.4649)}


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
 test_MulticlassF1Score     0.44288069009780884
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6577it [00:00, ?it/s]

You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6577it [00:00, ?it/s]

Fold 5:


Global seed set to 46
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | seq_encoder   | RnnSeqEncoder    | 47.3 M
1 | head          | Sequential       | 903   
2 | loss          | CrossEntropyLoss | 0     
3 | train_metrics | ModuleDict       | 0     
4 | valid_metrics | ModuleDict       | 0     
5 | test_metrics  | ModuleDict       | 0     
------------------------------------------

logger.version = 93


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /app/lightning_logs/version_93/checkpoints/epoch=2-step=19731.ckpt


{'loss': tensor(1.0334), 'seq_len': tensor(441.9500), 'y': tensor(2.5500), 'val_loss': tensor(1.2687), 'val_MulticlassF1Score': tensor(0.4395), 'train_MulticlassF1Score': tensor(0.4613)}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /app/lightning_logs/version_93/checkpoints/epoch=2-step=19731.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
 test_MulticlassF1Score     0.4395180940628052
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6577it [00:00, ?it/s]

You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 6577it [00:00, ?it/s]

In [12]:
pred_test_nn = pd.DataFrame()
for i in range(7):
    pred_test_nn[f'age_prob_1024_lstm_{i}'] = test_preds[:, i]
pred_test_nn["user_id"] = [e["user_id"] for e in dataset_test]
pred_test_nn.sort_values("user_id", inplace=True)
pred_test_nn.to_csv("age_lstm_test_1024.csv", index=False)

In [13]:
pred_oof_nn = pd.DataFrame()
for i in range(7):
    pred_oof_nn[f'age_prob_1024_lstm_{i}'] = oof_preds[:, i]
pred_oof_nn["user_id"] = [e["user_id"] for e in dataset_train_all]
pred_oof_nn.sort_values("user_id", inplace=True)
pred_oof_nn.to_csv("age_lstm_oof_1024.csv", index=False)

In [14]:
pred_oof_nn

Unnamed: 0,age_prob_0,age_prob_1,age_prob_2,age_prob_3,age_prob_4,age_prob_5,age_prob_6,user_id
0,5.514933e-09,0.007125,0.097371,0.308670,0.339761,0.204677,0.042396,0
1,4.139704e-08,0.012657,0.214487,0.447223,0.245477,0.072052,0.008103,1
2,3.310023e-08,0.251905,0.515394,0.183775,0.032996,0.011937,0.003993,2
3,7.131284e-09,0.113768,0.434225,0.314108,0.097475,0.033316,0.007108,3
4,3.896784e-09,0.157247,0.606553,0.208856,0.021494,0.004887,0.000963,4
...,...,...,...,...,...,...,...,...
263060,4.639892e-05,0.041465,0.198957,0.257304,0.230839,0.197619,0.073770,415282
263061,3.924040e-06,0.094044,0.358405,0.279598,0.157236,0.091486,0.019227,415283
263062,2.594526e-06,0.077804,0.304284,0.306541,0.178612,0.106338,0.026418,415295
263063,1.308965e-05,0.152782,0.360145,0.237567,0.136960,0.090494,0.022038,415300
