%pip install transformers, datasets, torch

In [None]:
import torch
import torch.nn as nn
from ag_news_dataloader import construct_ag_news_dataloaders
from ag_news_trainer import infer, train
from roberta_classification_model import RobertaClsModel
from torch import cuda
from transformers import RobertaForSequenceClassification, RobertaTokenizer

Setup the dataloaders

In [None]:
roberta_tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
train_dataloader, val_dataloader, test_dataloader = construct_ag_news_dataloaders(
    batch_size=32, train_split_ratio=0.8, tokenizer=roberta_tokenizer
)

Setup the different variables we'd like for training

In [None]:
device = "cuda" if cuda.is_available() else "cpu"
print(f"Detected Device {device}")
# We'll provide two options. First we create our own model on top of the vanilla RoBERTa model. The second is to use
# HuggingFace's RobertaForSequenceClassification class, which essentially does the same thing.
use_hf_sequence_classification = True
roberta_classifier_model = (
    RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=4)
    if use_hf_sequence_classification
    else RobertaClsModel()
)
loss_function = nn.CrossEntropyLoss()
n_training_epochs = 1

Train the model on the training dataset

In [None]:
print("Begin Model Training...")
train(roberta_classifier_model, train_dataloader, val_dataloader, loss_function, device, n_training_epochs)
print("Training Complete")

Save the final model to disk

In [None]:
print("Saving model...")
output_model_file = "./roberta_ag_news.bin"
torch.save(roberta_classifier_model, output_model_file)
print("Model saved.")

Load model back up and perform inference on the test set

In [None]:
print("Loading model...")
roberta_classifier_model = torch.load(output_model_file)
print("Model loaded.")

print("Evaluating model on test set...")
infer(roberta_classifier_model, loss_function, test_dataloader, device)
print("Model evaluated.")