## Imports

In [None]:
!pip install transformers
!pip install datasets
!pip install evaluate

In [2]:
import pandas as pd
import numpy as np
import evaluate
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EvalPrediction
from datasets import Dataset, load_dataset, Features, ClassLabel, Value
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from matplotlib import pyplot as plt

## Define the paths and experiment parameters

In [3]:
project_path = "/content/drive/MyDrive/VA_Project"
data_path = f"{project_path}/data/augmented_dataset"
model_save_path = f"{project_path}/models/roberta_augmented_data/saved_models"
log_directory = f"{project_path}/models/roberta_augmented_data/logs"
plots_save_path = f"{project_path}/models/roberta_augmented_data/plots"

In [4]:
# Parameters
model_name = 'roberta-large'
learning_rate = 4.00e-05
warmup_proportion = 0.1
train_batch_size = 32
num_train_epochs = 5
gradient_accumulation_steps = 1
num_labels = 151

## Load the train, val and test datasets into a Dataset object

In [5]:
# Load datasets
data_files = {"train": f"{data_path}/augmented_train_data.csv", "validation": f"{data_path}/original_val_data.csv", "test": f"{data_path}/original_test_data.csv"}
class_names = sorted(pd.read_csv(f"{data_path}/augmented_train_data.csv")["label"].unique())
dataset_features = Features({'query': Value('string'), 'label': ClassLabel(num_classes=151, names=class_names)})

dataset = load_dataset("csv", data_files=data_files, skiprows=1, column_names=['query', 'label'], features=dataset_features)



  0%|          | 0/3 [00:00<?, ?it/s]

In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['query', 'label'],
        num_rows: 30200
    })
    validation: Dataset({
        features: ['query', 'label'],
        num_rows: 3100
    })
    test: Dataset({
        features: ['query', 'label'],
        num_rows: 5500
    })
})

In [7]:
dataset['train'][0]

{'query': 'can you check my reservations for mortons under david winters',
 'label': 26}

In [8]:
dataset['train'].features

{'query': Value(dtype='string', id=None),
 'label': ClassLabel(names=['accept_reservations', 'account_blocked', 'alarm', 'application_status', 'apr', 'are_you_a_bot', 'balance', 'bill_balance', 'bill_due', 'book_flight', 'book_hotel', 'calculator', 'calendar', 'calendar_update', 'calories', 'cancel', 'cancel_reservation', 'car_rental', 'card_declined', 'carry_on', 'change_accent', 'change_ai_name', 'change_language', 'change_speed', 'change_user_name', 'change_volume', 'confirm_reservation', 'cook_time', 'credit_limit', 'credit_limit_change', 'credit_score', 'current_location', 'damaged_card', 'date', 'definition', 'direct_deposit', 'directions', 'distance', 'do_you_have_pets', 'exchange_rate', 'expiration_date', 'find_phone', 'flight_status', 'flip_coin', 'food_last', 'freeze_account', 'fun_fact', 'gas', 'gas_type', 'goodbye', 'greeting', 'how_busy', 'how_old_are_you', 'improve_credit_score', 'income', 'ingredient_substitution', 'ingredients_list', 'insurance', 'insurance_change', 'in

## Tokenize the dataset

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["query"], padding=True, truncation=True, return_tensors="pt")

tokenizer = AutoTokenizer.from_pretrained(model_name)

encoded_dataset = dataset.map(tokenize_function, batched=True)

In [10]:
example = encoded_dataset['train'][0]
print(example.keys())
print(example)

dict_keys(['query', 'label', 'input_ids', 'attention_mask'])
{'query': 'can you check my reservations for mortons under david winters', 'label': 26, 'input_ids': [0, 7424, 47, 1649, 127, 13747, 13, 18631, 1790, 223, 44009, 31000, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}


In [11]:
tokenizer.decode(example['input_ids'])

'<s>can you check my reservations for mortons under david winters</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>'

## Set up the model and Training Arguments

In [12]:
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifie

In [13]:
args = TrainingArguments(
    output_dir = model_save_path,
    logging_dir = log_directory,
    logging_strategy='epoch',
    evaluation_strategy = "epoch",
    save_strategy = "no",
    learning_rate=learning_rate,
    per_device_train_batch_size=train_batch_size,
    per_device_eval_batch_size=train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    num_train_epochs=num_train_epochs,
)

## Define the compute metrics function to evaluate the model with the desired metrics

In [14]:
def multi_label_metrics(predictions, labels):
    y_true = labels
    y_pred = predictions.argmax(axis=1)

    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
    accuracy = accuracy_score(y_true, y_pred)
    
    metrics = {'accuracy': accuracy,
               'precision': precision,
               'recall': recall,
               'f1': f1
               }

    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    result = multi_label_metrics(predictions=preds, labels=p.label_ids)

    return result

## Train the model

In [15]:
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [16]:
trainer.train()

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0062,0.249344,0.953548,0.955628,0.953548,0.952038
2,0.1028,0.204775,0.962581,0.965107,0.962581,0.961174
3,0.0517,0.210052,0.966129,0.968725,0.966129,0.965392
4,0.0306,0.201167,0.97129,0.972883,0.97129,0.970688
5,0.0177,0.197308,0.97,0.971479,0.97,0.969375


TrainOutput(global_step=4720, training_loss=0.24181082895246603, metrics={'train_runtime': 3587.0749, 'train_samples_per_second': 42.096, 'train_steps_per_second': 1.316, 'total_flos': 1.0691734948124112e+16, 'train_loss': 0.24181082895246603, 'epoch': 5.0})

## Evaluate the model on the validation set

In [17]:
trainer.evaluate()

{'eval_loss': 0.19730795919895172,
 'eval_accuracy': 0.97,
 'eval_precision': 0.971478964603685,
 'eval_recall': 0.97,
 'eval_f1': 0.9693751636718027,
 'eval_runtime': 18.3952,
 'eval_samples_per_second': 168.522,
 'eval_steps_per_second': 5.273,
 'epoch': 5.0}

## Evaluate the model on the test set

In [18]:
test_eval = trainer.predict(encoded_dataset["test"])

In [19]:
test_preds = test_eval.predictions.argmax(-1)
test_labels = test_eval.label_ids

report = classification_report(test_labels, test_preds, target_names=class_names)

print("Classification report:")
print(report)

Classification report:
                           precision    recall  f1-score   support

      accept_reservations       0.94      1.00      0.97        30
          account_blocked       0.85      0.93      0.89        30
                    alarm       0.97      1.00      0.98        30
       application_status       0.91      1.00      0.95        30
                      apr       0.88      0.77      0.82        30
            are_you_a_bot       0.91      1.00      0.95        30
                  balance       0.78      0.97      0.87        30
             bill_balance       0.93      0.93      0.93        30
                 bill_due       0.88      0.93      0.90        30
              book_flight       0.96      0.87      0.91        30
               book_hotel       0.88      1.00      0.94        30
               calculator       0.80      0.93      0.86        30
                 calendar       0.93      0.87      0.90        30
          calendar_update       0.93  

In [20]:
trainer.save_model(model_save_path)