## Imports

In [None]:
!pip install transformers
!pip install datasets
!pip install evaluate

In [None]:
import pandas as pd
import numpy as np
import evaluate
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EvalPrediction
from datasets import Dataset, load_dataset, Features, ClassLabel, Value
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from matplotlib import pyplot as plt

## Define the paths and experiment parameters

In [None]:
project_path = "/content/drive/MyDrive/VA_Project"
data_path = f"{project_path}/data/original_dataset"
model_save_path = f"{project_path}/models/roberta_original_data/saved_models"
log_directory = f"{project_path}/models/roberta_original_data/logs"
plots_save_path = f"{project_path}/models/roberta_original_data/plots"

In [None]:
# Parameters
model_name = 'roberta-large'
learning_rate = 4.00e-05
warmup_proportion = 0.1
train_batch_size = 32
num_train_epochs = 5
gradient_accumulation_steps = 1
num_labels = 151

## Load the train, val and test datasets into a Dataset object

In [None]:
# Load datasets
data_files = {"train": f"{data_path}/original_train_data.csv", "validation": f"{data_path}/original_val_data.csv", "test": f"{data_path}/original_test_data.csv"}
class_names = sorted(pd.read_csv(f"{data_path}/original_train_data.csv")["label"].unique())
dataset_features = Features({'query': Value('string'), 'label': ClassLabel(num_classes=151, names=class_names)})

dataset = load_dataset("csv", data_files=data_files, skiprows=1, column_names=['query', 'label'], features=dataset_features, download_mode="force_redownload")

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['query', 'label'],
        num_rows: 15100
    })
    validation: Dataset({
        features: ['query', 'label'],
        num_rows: 3100
    })
    test: Dataset({
        features: ['query', 'label'],
        num_rows: 5500
    })
})

In [None]:
dataset['train'][0]

{'query': 'what products are on my shopping list', 'label': 112}

In [None]:
dataset['train'].features

{'query': Value(dtype='string', id=None),
 'label': ClassLabel(names=['accept_reservations', 'account_blocked', 'alarm', 'application_status', 'apr', 'are_you_a_bot', 'balance', 'bill_balance', 'bill_due', 'book_flight', 'book_hotel', 'calculator', 'calendar', 'calendar_update', 'calories', 'cancel', 'cancel_reservation', 'car_rental', 'card_declined', 'carry_on', 'change_accent', 'change_ai_name', 'change_language', 'change_speed', 'change_user_name', 'change_volume', 'confirm_reservation', 'cook_time', 'credit_limit', 'credit_limit_change', 'credit_score', 'current_location', 'damaged_card', 'date', 'definition', 'direct_deposit', 'directions', 'distance', 'do_you_have_pets', 'exchange_rate', 'expiration_date', 'find_phone', 'flight_status', 'flip_coin', 'food_last', 'freeze_account', 'fun_fact', 'gas', 'gas_type', 'goodbye', 'greeting', 'how_busy', 'how_old_are_you', 'improve_credit_score', 'income', 'ingredient_substitution', 'ingredients_list', 'insurance', 'insurance_change', 'in

## Tokenize the dataset

In [None]:
from datasets import set_caching_enabled
set_caching_enabled(False)

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["query"], padding=True, truncation=True, return_tensors="pt")

tokenizer = AutoTokenizer.from_pretrained(model_name)

encoded_dataset = dataset.map(tokenize_function, batched=True)

In [None]:
example = encoded_dataset['train'][0]
print(example.keys())
print(example)

dict_keys(['query', 'label', 'input_ids', 'attention_mask'])
{'query': 'what products are on my shopping list', 'label': 112, 'input_ids': [0, 12196, 785, 32, 15, 127, 3482, 889, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}


In [None]:
tokenizer.decode(example['input_ids'])

'<s>what products are on my shopping list</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>'

## Set up the model and Training Arguments

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.weight', 'classif

In [None]:
args = TrainingArguments(
    output_dir = model_save_path,
    logging_dir = log_directory,
    logging_strategy='epoch',
    evaluation_strategy = "epoch",
    save_strategy = "no",
    learning_rate=learning_rate,
    per_device_train_batch_size=train_batch_size,
    per_device_eval_batch_size=train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    num_train_epochs=num_train_epochs,
)

## Define the compute metrics function to evaluate the model with the desired metrics

In [None]:
def multi_label_metrics(predictions, labels):
    y_true = labels
    y_pred = predictions.argmax(axis=1)

    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
    accuracy = accuracy_score(y_true, y_pred)
    
    metrics = {'accuracy': accuracy,
               'precision': precision,
               'recall': recall,
               'f1': f1
               }

    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    result = multi_label_metrics(predictions=preds, labels=p.label_ids)

    return result

## Train the model

In [None]:
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6787,0.393161,0.916774,0.918715,0.916774,0.908644
2,0.1576,0.239302,0.953871,0.957469,0.953871,0.951619
3,0.0597,0.19977,0.963871,0.96498,0.963871,0.962835
4,0.0271,0.187592,0.97,0.971687,0.97,0.968765
5,0.0137,0.194149,0.968387,0.970142,0.968387,0.967065


  _warn_prf(average, modifier, msg_start, len(result))


TrainOutput(global_step=2360, training_loss=0.3873694405717365, metrics={'train_runtime': 1853.6761, 'train_samples_per_second': 40.73, 'train_steps_per_second': 1.273, 'total_flos': 5038743479243544.0, 'train_loss': 0.3873694405717365, 'epoch': 5.0})

## Evaluate the model on the validation set

In [None]:
trainer.evaluate()

{'eval_loss': 0.19414904713630676,
 'eval_accuracy': 0.9683870967741935,
 'eval_precision': 0.9701420962836774,
 'eval_recall': 0.9683870967741935,
 'eval_f1': 0.9670651652213134,
 'eval_runtime': 19.6594,
 'eval_samples_per_second': 157.685,
 'eval_steps_per_second': 4.934,
 'epoch': 5.0}

## Evaluate the model on the test set

In [None]:
test_eval = trainer.predict(encoded_dataset["test"])

In [None]:
test_preds = test_eval.predictions.argmax(-1)
test_labels = test_eval.label_ids

report = classification_report(test_labels, test_preds, target_names=class_names)

print("Classification report:")
print(report)

Classification report:
                           precision    recall  f1-score   support

      accept_reservations       0.88      1.00      0.94        30
          account_blocked       0.94      0.97      0.95        30
                    alarm       0.91      1.00      0.95        30
       application_status       0.94      1.00      0.97        30
                      apr       0.88      0.73      0.80        30
            are_you_a_bot       0.94      1.00      0.97        30
                  balance       0.86      1.00      0.92        30
             bill_balance       0.88      0.97      0.92        30
                 bill_due       0.88      0.97      0.92        30
              book_flight       0.93      0.87      0.90        30
               book_hotel       0.88      1.00      0.94        30
               calculator       0.80      0.93      0.86        30
                 calendar       0.96      0.87      0.91        30
          calendar_update       0.91  

In [None]:
trainer.save_model(model_save_path)