In [3]:
#Imports
import numpy as np
import pandas as pd
from os.path import join
from datetime import datetime
from pathlib import Path

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from pytorch_lightning import loggers as pl_loggers

import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything, LightningModule
from pytorch_lightning.callbacks import ModelCheckpoint

from transformers import BertModel
from utils.data import RelevantDataset
import importlib

#Notebook Parameters
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
    
bert_id = "google/bert_uncased_L-2_H-128_A-2" #small bert (only 2 layers I think)
#bert_id = "bert-base-uncased" # 12-layer, 768-hidden, 12-heads, 110M parameters. Trained on lower-cased English text.

noetbook_datetime = datetime.now()

seed_everything(1337)

Global seed set to 1337


1337

# Training

## Set Architecture

In [4]:
from utils.architectures import RelevantModuleV03 as RelevantModule


## Load Datasets

In [5]:
train_ds = RelevantDataset(dataset="train", target_mode="isrelevant", device=device, move_to_tensor=False)
validation_ds = RelevantDataset(dataset="val", device = device, target_mode="isrelevant", dimensions = train_ds.dimensions, move_to_tensor=False)

## Hyperparameters

In [6]:
start_lr = 1e-4
batch_size = 64

In [8]:
# model
model = RelevantModule(
    bert = BertModel.from_pretrained(bert_id).to(device),
    input_size = sum(train_ds.dimensions[0][1]), 
    output_size = train_ds.dimensions[1],
    start_lr=start_lr,
    prior = train_ds.prior
)


#monitoring
architecture_name = model.__class__.__name__
logdir = join("logs", architecture_name, noetbook_datetime.strftime("%Y-%m-%dT%H-%M-%S"))
print(f"Logging to {logdir}")
Path(logdir).mkdir(parents=True, exist_ok=True)

tb_logger = pl_loggers.TensorBoardLogger(logdir, name="", version="")

checkpoint_callback = ModelCheckpoint(
    monitor='f1_support',
    dirpath=logdir,
    verbose=True,
    save_last=True,
    save_top_k=-1, #save all
    mode="min",
    filename='-{epoch:02d}-{val_loss:.2f}'
)

#Dataloaders
train_dl = DataLoader(train_ds,batch_size  = batch_size, shuffle=True, num_workers=4)
validation_dl = DataLoader(validation_ds, batch_size  = 64, shuffle=False, num_workers=4)

# training
trainer = Trainer(
    gpus=1, 
    precision=16, 
    logger=tb_logger, 
    callbacks=[checkpoint_callback]
)

Some weights of the model checkpoint at google/bert_uncased_L-2_H-128_A-2 were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


Logging to logs/RelevantModuleV03/2021-06-13T10-58-32


## Executing run

In [None]:
trainer.fit(model, train_dl, validation_dl)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type       | Params
-------------------------------------------------
0 | bert              | BertModel  | 4.4 M 
1 | linear_after_bert | Linear     | 33.0 K
2 | feed_forward      | Sequential | 714 K 
-------------------------------------------------
5.1 M     Trainable params
2         Non-trainable params
5.1 M     Total params
20.532    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
Global seed set to 1337


Training: 0it [00:00, ?it/s]

# Notes

Visit: https://pytorch-lightning.readthedocs.io/en/latest/common/weights_loading.html#restoring-training-state

In [None]:
#import utils.data
#utils.data.preprocess("data_round_1")

In [None]:
from torch.nn import functional as F

In [None]:
F.one_hot(torch.tensor([1,0]), num_classes = 2)

In [None]:
import sklearn.metrics as sm
import numpy as np

In [None]:
sm.precision_recall_fscore_support(np.random.randint(0,2, size=10), np.random.randint(0,2, size=10), average="macro")