%pip install transformers, datasets, torch

In [2]:
import torch
import torch.nn as nn
from ag_news_dataloader import construct_ag_news_dataloaders
from ag_news_trainer import infer, train
from roberta_classification_model import RobertaClsModel
from torch import cuda
from transformers import RobertaForSequenceClassification, RobertaTokenizer

Setup the dataloaders

In [3]:
roberta_tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
train_dataloader, val_dataloader, test_dataloader = construct_ag_news_dataloaders(
    batch_size=32, train_split_ratio=0.8, tokenizer=roberta_tokenizer
)

Found cached dataset ag_news (/Users/david/.cache/huggingface/datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548)
100%|██████████| 2/2 [00:00<00:00, 38.45it/s]
Found cached dataset ag_news (/Users/david/.cache/huggingface/datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548)
100%|██████████| 2/2 [00:00<00:00, 381.63it/s]
                                                                 

Training data example encoding: tensor([    0,  3084,     4,   504,  9720,  3557,     6,  5750,   312,     4,
          501, 23475,  1641,  1604,  2075,    13, 18817,  1314,     8,    80,
         5020,     6,     8, 21127,  1547,   355, 23672,  1314,  6404,     7,
          483,   440,     4,   504,  9720,   375,  5750,   331,  3557,    12,
         1570,    15,   378,     4,     2,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,    

Setup the different variables we'd like for training

In [6]:
device = "cuda" if cuda.is_available() else "cpu"
print(f"Detected Device {device}")
# We'll provide two options. First we create our own model on top of the vanilla RoBERTa model. The second is to use
# HuggingFace's RobertaForSequenceClassification class, which essentially does the same thing.
use_hf_sequence_classification = True
roberta_classifier_model = (
    RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=4)
    if use_hf_sequence_classification
    else RobertaClsModel()
)
loss_function = nn.CrossEntropyLoss()
n_training_epochs = 1

Detected Device cpu


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

Train the model on the training dataset

In [7]:
print("Begin Model Training...")
train(roberta_classifier_model, train_dataloader, val_dataloader, loss_function, device, n_training_epochs)
print("Training Complete")

Begin Model Training...
Starting Epoch 0


  0%|          | 0/3000 [00:37<?, ?it/s]


TypeError: cross_entropy_loss(): argument 'input' (position 1) must be Tensor, not SequenceClassifierOutput

Save the final model to disk

In [None]:
print("Saving model...")
output_model_file = "./roberta_ag_news.bin"
torch.save(roberta_classifier_model, output_model_file)
print("Model saved.")

Load model back up and perform inference on the test set

In [None]:
print("Loading model...")
roberta_classifier_model = torch.load(output_model_file)
print("Model loaded.")

print("Evaluating model on test set...")
infer(roberta_classifier_model, loss_function, test_dataloader, device)
print("Model evaluated.")