In [None]:
%pip install transformers datasets torch

In [None]:
import torch
import torch.nn as nn
from ag_news_trainer import infer, train
from custom_dataloaders import construct_dataloaders
from roberta_classification_model import RobertaClsModel
from torch import cuda
from transformers import AutoModelForSequenceClassification, AutoTokenizer

Choose your pre-trained model.

In [None]:
use_hf_sequence_classification = True
hf_model_name = "roberta-base"  # Also try "facebook/opt-125m" for OPT.

Choose your dataset. Make sure that the number of classes in your model matches the number of different labels in that dataset.

In [None]:
dataset_name = "ag_news"
dataset_num_labels = 4

dataset_name = "SetFit/sst2"
dataset_num_labels = 2

Setup the dataloaders

In [None]:
tokenizer = AutoTokenizer.from_pretrained(hf_model_name)
train_dataloader, val_dataloader, test_dataloader = construct_dataloaders(
    batch_size=8, train_split_ratio=0.8, tokenizer=tokenizer, dataset_name="ag_news"
)

Setup the different variables we'd like for training

In [None]:
device = "cuda" if cuda.is_available() else "cpu"
print(f"Detected Device {device}")
# We'll provide two options. First we create our own model on top of the vanilla RoBERTa model. The second is to use
# HuggingFace's AutoModel class, which essentially does the same thing for RoBERTa, but with support additional base
# models such as OPT and GPT-J.
use_hf_sequence_classification = True
classifier_model = (
    AutoModelForSequenceClassification.from_pretrained(hf_model_name, num_labels=4)
    if use_hf_sequence_classification
    else RobertaClsModel()
)
loss_function = nn.CrossEntropyLoss()
n_training_epochs = 1
n_training_steps = 300

Train the model on the training dataset

In [None]:
print("Begin Model Training...")
train(
    classifier_model,
    train_dataloader,
    val_dataloader,
    loss_function,
    device,
    n_training_epochs,
    n_training_steps,
)
print("Training Complete")

Save the final model to disk

In [None]:
print("Saving model...")
hf_model_name_formatted = hf_model_name.split("/")[-1]
dataset_name_formatted = dataset_name.split("/")[-1]
output_model_file = f"./{hf_model_name_formatted}_{dataset_name_formatted}.bin"
torch.save(classifier_model, output_model_file)
print("Model saved to", output_model_file)

Load model back up and perform inference on the test set

In [None]:
print("Loading model...")
classifier_model = torch.load(output_model_file)
print("Model loaded.")

print("Evaluating model on test set...")
test_accuracy, test_loss = infer(classifier_model, loss_function, test_dataloader, device)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}%")
print("Model evaluated.")