In [1]:
%%capture
! pip install pytorch-lightning
! pip install transformers==4.3.0
! pip install sentencepiece

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd drive/MyDrive/graphite/keyword_category_prediction_api

/content/drive/MyDrive/graphite/keyword_category_prediction_api


In [4]:
# Data reading and preprocessing
from modeling.bert_base_multilingual.cased.preprocessing import get_train_test_data, get_categories, add_category_columns
# Datset
from modeling.bert_base_multilingual.cased.text_dataset import KeywordDataset
from modeling.bert_base_multilingual.cased.data_module import KeywordDataModule
# Model
from modeling.bert_base_multilingual.cased.model import KeywordCategorizer
# Transformer imports
from transformers import BertTokenizer
# Logging and saving
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
# Metrics
from modeling.bert_base_multilingual.cased.metrics import mean_auc_roc, mean_avg_precision
# General
import torch
from tqdm.auto import tqdm

# Data

In [5]:
# Get data
pd_train, pd_test = get_train_test_data()

# Get categories
categories_dict = get_categories(pd_train, pd_test)

# Add category columns and fill them
pd_train = add_category_columns(pd_train, categories_dict)
pd_test = add_category_columns(pd_test, categories_dict)

# Temporary sampling
pd_train = pd_train.sample(round(pd_train.shape[0]*.01))
pd_test = pd_test.sample(round(pd_test.shape[0]*.01))

# Global variables and parameters

In [6]:
MODEL_NAME = 'bert-base-multilingual-cased' 
LABEL_COLUMNS = list(categories_dict.keys())
MAX_TOKEN_COUNT = 40
N_EPOCHS = 5
BATCH_SIZE = 52

# Optimizer scheduler
STEPS_PER_EPOCH = len(pd_train) // BATCH_SIZE
TOTAL_TRAINING_STEPS = STEPS_PER_EPOCH * N_EPOCHS
WARMUP_STEPS = TOTAL_TRAINING_STEPS // 5

# Dataset

In [7]:
data_module = KeywordDataModule(pd_train, pd_test, BertTokenizer.from_pretrained(MODEL_NAME), LABEL_COLUMNS, BATCH_SIZE, MAX_TOKEN_COUNT)

Downloading:   0%|          | 0.00/996k [00:00<?, ?B/s]

# Model

In [8]:
model = KeywordCategorizer(len(LABEL_COLUMNS), LABEL_COLUMNS, TOTAL_TRAINING_STEPS, WARMUP_STEPS, MODEL_NAME)

Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/714M [00:00<?, ?B/s]

# Training

### Checkpoints and early stopping

In [9]:
checkpoint_callback = ModelCheckpoint(
  dirpath="checkpoints",
  filename="best-checkpoint",
  save_top_k=1,
  verbose=True,
  monitor="val_loss",
  mode="min"
)

logger = TensorBoardLogger("lightning_logs", name="keyword-categories")

early_stopping_callback = EarlyStopping(monitor='val_loss', patience=2)

### Initialize trainer - Requires GPU

In [10]:
trainer = pl.Trainer(
    logger=logger,
    checkpoint_callback=True,
    callbacks=[checkpoint_callback, early_stopping_callback],
    max_epochs=N_EPOCHS,
    gpus=1, # If no GPU available comment this line
    progress_bar_refresh_rate=10
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


### Start training

In [11]:
trainer.fit(model, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type      | Params
-----------------------------------------
0 | bert       | BertModel | 177 M 
1 | classifier | Linear    | 16.9 K
2 | criterion  | BCELoss   | 0     
-----------------------------------------
177 M     Trainable params
0         Non-trainable params
177 M     Total params
711.481   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  stream(template_mgs % msg_args)
  thresholds = tensor(reversed(thresholds[sl]))


Training: -1it [00:00, ?it/s]

  f"One of the returned values {set(extra.keys())} has a `grad_fn`. We will detach it automatically"


Validating: 0it [00:00, ?it/s]

Epoch 0, global step 134: val_loss reached 0.31163 (best 0.31163), saving model to "/content/drive/MyDrive/graphite/keyword_category_prediction_api/checkpoints/best-checkpoint.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 1, global step 269: val_loss reached 0.29729 (best 0.29729), saving model to "/content/drive/MyDrive/graphite/keyword_category_prediction_api/checkpoints/best-checkpoint.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 2, global step 404: val_loss reached 0.28158 (best 0.28158), saving model to "/content/drive/MyDrive/graphite/keyword_category_prediction_api/checkpoints/best-checkpoint.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 3, global step 539: val_loss reached 0.27009 (best 0.27009), saving model to "/content/drive/MyDrive/graphite/keyword_category_prediction_api/checkpoints/best-checkpoint.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 4, global step 674: val_loss reached 0.26658 (best 0.26658), saving model to "/content/drive/MyDrive/graphite/keyword_category_prediction_api/checkpoints/best-checkpoint.ckpt" as top 1


# Testing

In [None]:
trainer.test()

# Tensorboard

In [None]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs 

# Evalutaion

In [None]:
%cd drive/MyDrive/graphite/keyword_category_prediction_api

In [None]:
# Load model
trained_model = KeywordCategorizer.load_from_checkpoint(
    'checkpoints/best-checkpoint.ckpt', 
    n_classes=len(LABEL_COLUMNS),
    label_columns=LABEL_COLUMNS
    )

trained_model.eval()
trained_model.freeze()

# Send model to available device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
trained_model = trained_model.to(device)

### Get evaluation dataset

In [None]:
test_dataset = KeywordDataset(pd_test, BertTokenizer.from_pretrained(MODEL_NAME), LABEL_COLUMNS, MAX_TOKEN_COUNT)

### Start evaluation

In [None]:
predictions = []
labels = []

for item in tqdm(test_dataset):
  _, prediction = trained_model(
    item["input_ids"].unsqueeze(dim=0).to(device),
    item["attention_mask"].unsqueeze(dim=0).to(device)
  )
  predictions.append(prediction.flatten())
  labels.append(item["labels"].int())

predictions = torch.stack(predictions).detach().cpu()
labels = torch.stack(labels).detach().cpu()

### Compute AUC ROC metrics

In [None]:
mean_aucroc, auc_roc_class = mean_auc_roc(predictions, labels, LABEL_COLUMNS)

print("Mean AUC ROC:", mean_aucroc, "\n\nAUC ROC per category:", auc_roc_class)

### Compute Mean Average Precision metrics

In [None]:
mean_avg_prec, avg_prec_class = mean_avg_precision(predictions, labels, LABEL_COLUMNS)

print("Mean Average Precision:", mean_avg_prec, "\n\nAverage Precision per category:", avg_prec_class)