In [1]:
#Imports
import numpy as np
import pandas as pd
from os.path import join
from datetime import datetime
from pathlib import Path

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from pytorch_lightning import loggers as pl_loggers

import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything, LightningModule
from pytorch_lightning.callbacks import ModelCheckpoint

from transformers import BertModel
from utils.data import RelevantDataset

#Notebook Parameters
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
    
bert_id = "google/bert_uncased_L-2_H-128_A-2"

noetbook_datetime = datetime.now()

seed_everything(1337)

Global seed set to 1337


1337

# Training

## Set Architecture

In [2]:
from utils.architectures import RelevantModuleV02 as RelevantModule


## Load Datasets

In [3]:
train_ds = RelevantDataset(dataset="train", target_mode="isrelevant", device=device)
validation_ds = RelevantDataset(dataset="val", device = device, target_mode="isrelevant", dimensions = train_ds.dimensions)

## Hyperparameters

In [4]:
start_lr = 1e-4
batch_size = 16

In [5]:
# model
model = RelevantModule(
    bert = BertModel.from_pretrained(bert_id).to(device),
    input_size = sum(train_ds.dimensions[0][1]), 
    output_size = train_ds.dimensions[1],
    start_lr=start_lr
)


#monitoring
architecture_name = model.__class__.__name__
logdir = join("logs", architecture_name, noetbook_datetime.strftime("%Y-%m-%dT%H-%M-%S"))
print(f"Logging to {logdir}")
Path(logdir).mkdir(parents=True, exist_ok=True)

tb_logger = pl_loggers.TensorBoardLogger(logdir, name="", version="")

checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    dirpath=logdir,
    verbose=True,
    save_last=True,
    save_top_k=-1, #save all
    mode="min",
    filename='-{epoch:02d}-{val_loss:.2f}'
)

#Dataloaders
train_dl = DataLoader(train_ds,batch_size  = batch_size, shuffle=True)
validation_dl = DataLoader(train_ds, batch_size  = 64, shuffle=False)

# training
trainer = Trainer(
    gpus=1, 
    precision=16, 
    logger=tb_logger, 
    callbacks=[checkpoint_callback]
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


Logging to logs\RelevantModuleV02\2021-04-19T23-12-14


## Executing run

In [None]:
trainer.fit(model, train_dl, validation_dl)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type       | Params
-------------------------------------------------
0 | bert              | BertModel  | 4.4 M 
1 | linear_after_bert | Linear     | 33.0 K
2 | feed_forward      | Sequential | 713 K 
-------------------------------------------------
5.1 M     Trainable params
0         Non-trainable params
5.1 M     Total params
20.528    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Epoch 0, global step 16373: val_loss reached 0.25173 (best 0.25173), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=00-val_loss=0.25.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 1, global step 32747: val_loss reached 0.22714 (best 0.22714), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=01-val_loss=0.23.ckpt" as top 2


Validating: 0it [00:00, ?it/s]

Epoch 2, global step 49121: val_loss reached 0.20449 (best 0.20449), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=02-val_loss=0.20.ckpt" as top 3


Validating: 0it [00:00, ?it/s]

Epoch 3, global step 65495: val_loss reached 0.19907 (best 0.19907), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=03-val_loss=0.20.ckpt" as top 4


Validating: 0it [00:00, ?it/s]

Epoch 4, global step 81869: val_loss reached 0.16567 (best 0.16567), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=04-val_loss=0.17.ckpt" as top 5


Validating: 0it [00:00, ?it/s]

Epoch 5, global step 98243: val_loss reached 0.14733 (best 0.14733), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=05-val_loss=0.15.ckpt" as top 6


Validating: 0it [00:00, ?it/s]

Epoch 6, global step 114617: val_loss reached 0.12431 (best 0.12431), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=06-val_loss=0.12.ckpt" as top 7


Validating: 0it [00:00, ?it/s]

Epoch 7, global step 130991: val_loss reached 0.11833 (best 0.11833), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=07-val_loss=0.12.ckpt" as top 8


Validating: 0it [00:00, ?it/s]

Epoch 8, global step 147365: val_loss reached 0.09893 (best 0.09893), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=08-val_loss=0.10.ckpt" as top 9


Validating: 0it [00:00, ?it/s]

Epoch 9, global step 163739: val_loss reached 0.08671 (best 0.08671), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=09-val_loss=0.09.ckpt" as top 10


Validating: 0it [00:00, ?it/s]

Epoch 10, global step 180113: val_loss reached 0.08256 (best 0.08256), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=10-val_loss=0.08.ckpt" as top 11


Validating: 0it [00:00, ?it/s]

Epoch 11, global step 196487: val_loss reached 0.06901 (best 0.06901), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=11-val_loss=0.07.ckpt" as top 12


Validating: 0it [00:00, ?it/s]

Epoch 12, global step 212861: val_loss reached 0.06440 (best 0.06440), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=12-val_loss=0.06.ckpt" as top 13


Validating: 0it [00:00, ?it/s]

Epoch 13, global step 229235: val_loss reached 0.06254 (best 0.06254), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=13-val_loss=0.06.ckpt" as top 14


Validating: 0it [00:00, ?it/s]

Epoch 14, global step 245609: val_loss reached 0.05437 (best 0.05437), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=14-val_loss=0.05.ckpt" as top 15


Validating: 0it [00:00, ?it/s]

Epoch 15, global step 261983: val_loss reached 0.05144 (best 0.05144), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=15-val_loss=0.05.ckpt" as top 16


Validating: 0it [00:00, ?it/s]

Epoch 16, global step 278357: val_loss reached 0.05004 (best 0.05004), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=16-val_loss=0.05.ckpt" as top 17


Validating: 0it [00:00, ?it/s]

Epoch 17, global step 294731: val_loss reached 0.04643 (best 0.04643), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=17-val_loss=0.05.ckpt" as top 18


Validating: 0it [00:00, ?it/s]

Epoch 18, global step 311105: val_loss reached 0.04608 (best 0.04608), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=18-val_loss=0.05.ckpt" as top 19


Validating: 0it [00:00, ?it/s]

Epoch 19, global step 327479: val_loss reached 0.04558 (best 0.04558), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=19-val_loss=0.05.ckpt" as top 20


Validating: 0it [00:00, ?it/s]

Epoch 20, global step 343853: val_loss reached 0.04159 (best 0.04159), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=20-val_loss=0.04.ckpt" as top 21


Validating: 0it [00:00, ?it/s]

Epoch 21, global step 360227: val_loss reached 0.04091 (best 0.04091), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=21-val_loss=0.04.ckpt" as top 22


Validating: 0it [00:00, ?it/s]

Epoch 22, global step 376601: val_loss reached 0.04080 (best 0.04080), saving model to "E:\CHEERS_Challenge\CHEERS_challenge_round_1\logs\RelevantModuleV02\2021-04-19T23-12-14\-epoch=22-val_loss=0.04.ckpt" as top 23


Validating: 0it [00:00, ?it/s]

# Notes

Visit: https://pytorch-lightning.readthedocs.io/en/latest/common/weights_loading.html#restoring-training-state

In [None]:
#import utils.data
#utils.data.preprocess("data_round_1")