<img src="./imgs/title.JPG" width="600">

In [None]:
import nemo
import nemo.collections.nlp as nemo_nlp
from nemo.collections.nlp.data.datasets import BertTextClassificationDataset
from nemo.collections.nlp.nm.data_layers.text_classification_datalayer import BertTextClassificationDataLayer
from nemo.collections.nlp.nm.trainables import SequenceClassifier

from nemo.backends.pytorch.common import CrossEntropyLossNM
from nemo.utils.lr_policies import get_lr_policy
from nemo.collections.nlp.callbacks.text_classification_callback import eval_iter_callback, eval_epochs_done_callback
from nemo import logging

import os
import json
import math
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import torch

<img src="imgs/nemo_intro.JPG" width="600">

#### NVIDIA NeMo is a Python toolkit for building, training, and fine-tuning GPU-accelerated conversational AI models using a simple interface.
Source: https://developer.nvidia.com/nvidia-nemo

# Setting

In [None]:
ENCODER = 'BERT' # 'BERT-large', 'Megatron-BERT'

path_pool = {'BERT':'./checkpoint/bert/',
             'BERT-large':'./checkpoint/bert_large/',
             'Megatron-BERT':'./checkpoint/megatron_bert/'}
name_pool = {'BERT':'bert-base-uncased',
             'BERT-large':'bert-large-uncased',
             'Megatron-BERT':'megatron-bert-uncased'}

PRETRAINED_MODEL_NAME = name_pool[ENCODER]
PRETRAINED_MODEL_CHECKPOINT = path_pool[ENCODER]+'bert.pt'
PRETRAINED_MODEL_CONFIG = path_pool[ENCODER]+'bert_config.json'
TRAIN_DATA_PATH = './data/train.tsv'
TEST_DATA_PATH = './data/test.tsv'
AMP_OPTIMIZATION_LEVEL = 'O2'
WORK_DIR = 'output/'

MAX_SEQ_LEN = 256
DROPOUT_RATE = .3
# batch size for RTX2080Ti
if ENCODER=='BERT':
    BATCH_SIZE = 32
else:
    BATCH_SIZE = 8

n_epochs = 3
lr_warmup_proportion = 0.1
lr = 3e-5
weight_decay = 0.01

# Components

<img src="imgs/components.JPG" width="600">

In [None]:
nf = nemo.core.NeuralModuleFactory(log_dir=WORK_DIR,
                                   create_tb_writer=True,
                                   add_time_to_log_dir=False,
                                   optimization_level=AMP_OPTIMIZATION_LEVEL)
model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
    pretrained_model_name=PRETRAINED_MODEL_NAME,
    config=PRETRAINED_MODEL_CONFIG,
    checkpoint=PRETRAINED_MODEL_CHECKPOINT
)
tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
    tokenizer_name='nemobert',
    pretrained_model_name=PRETRAINED_MODEL_NAME,
    do_lower_case=True
)
classifier = SequenceClassifier(    
    hidden_size=model.hidden_size,
    num_classes=2,
    dropout=DROPOUT_RATE,
    num_layers=2,
    log_softmax=False,
)
loss_func = CrossEntropyLossNM()
train_data_layer = BertTextClassificationDataLayer(
    tokenizer=tokenizer,
    input_file=TRAIN_DATA_PATH,
    max_seq_length=MAX_SEQ_LEN,
    batch_size=BATCH_SIZE,
    shuffle=True,
    use_cache=True
)
eval_data_layer = BertTextClassificationDataLayer(
    tokenizer=tokenizer,
    input_file=TEST_DATA_PATH,
    max_seq_length=MAX_SEQ_LEN,
    batch_size=BATCH_SIZE,
    shuffle=False,
    use_cache=False
)

In [None]:
model.num_weights, model.hidden_size

# Create the graph

<img src="imgs/create_graph.JPG" width="600">

In [None]:
train_data = train_data_layer()
train_hidden_states = model(input_ids=train_data.input_ids, token_type_ids=train_data.input_type_ids, attention_mask=train_data.input_mask)
train_logits = classifier(hidden_states=train_hidden_states)
loss = loss_func(logits=train_logits, labels=train_data.labels)
# If you're training on multiple GPUs, this should be
# len(train_data_layer) // (batch_size * batches_per_step * num_gpus)
train_steps_per_epoch = len(train_data_layer) // BATCH_SIZE

eval_data = eval_data_layer()
eval_hidden_states = model(input_ids=eval_data.input_ids, token_type_ids=eval_data.input_type_ids, attention_mask=eval_data.input_mask)
eval_logits = classifier(hidden_states=eval_hidden_states)

In [None]:
train_callback = nemo.core.SimpleLossLoggerCallback(
    tensors=[loss],
    print_func=lambda x: logging.info("Loss: {:.3f}".format(x[0].item())),
    get_tb_values=lambda x: [["loss", x[0]]],
    step_freq=train_steps_per_epoch,
    tb_writer=nf.tb_writer,
)
eval_callback = nemo.core.EvaluatorCallback(
        eval_tensors=[eval_logits, eval_data.labels],
        user_iter_callback=lambda x, y: eval_iter_callback(x, y, eval_data_layer),
        user_epochs_done_callback=lambda x: eval_epochs_done_callback(x, f'{nf.work_dir}/graphs'),
        tb_writer=nf.tb_writer,
        eval_step=train_steps_per_epoch,
    )

# Training

In [None]:
lr_policy_fn = get_lr_policy("WarmupAnnealing", total_steps=n_epochs * train_steps_per_epoch, warmup_ratio=lr_warmup_proportion
)
nf.train(
    tensors_to_optimize=[loss],
    callbacks=[train_callback, eval_callback],
    lr_policy=lr_policy_fn,
    optimizer="adam_w",
    optimization_params={"num_epochs": n_epochs, "lr": lr, "weight_decay": weight_decay},
)

### Save the model

In [None]:
model.save_to('model.nemo')
classifier.save_to('classifier.nemo')