## Model training

In [None]:
from transformers import TrainingArguments, Trainer
import pickle
from transformers import (
    AutoModelForSequenceClassification,
    AutoModelForSeq2SeqLM,
    AutoConfig,
    BertModel,
)

import torch.nn as nn
from transformers.modeling_outputs import SequenceClassifierOutput

### Loading the data


In [None]:
with open('train_dataset_tokenized.pkl', 'rb') as file:
    train_dataset = pickle.load(file)

with open('val_data_tokenized.pkl', 'rb') as file:
    val_dataset = pickle.load(file)

with open('test_data_tokenized.pkl', 'rb') as file:
    test_dataset = pickle.load(file)

### Setting up the training arguments

In [None]:
args = TrainingArguments(
        output_dir="./output",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=3e-5,
        per_device_train_batch_size=8 ,
        per_device_eval_batch_size=8 ,
        num_train_epochs=20,
        warmup_ratio= 0.1,
        weight_decay= 0.001,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        save_total_limit=1,
            )

pre_trained_BERTmodel='bert-large-uncased'

### Modifying Bert for our classification Task

In [None]:
class BertModelWithCustomLossFunction(nn.Module):
    def __init__(self):
        super(BertModelWithCustomLossFunction, self).__init__()
        self.num_labels = len(df_train["intent"].unique())
        self.bert = BertModel.from_pretrained(
            pre_trained_BERTmodel, num_labels=self.num_labels
        )
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(1024, self.num_labels)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
        )

        output = self.dropout(outputs.pooler_output)
        logits = self.classifier(output)

        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels)

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

### Setting up metrics for accuracy, precision, recall and f1

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

### Training the model

In [None]:
best_accuracy = 0
for train_data in train_dataset:
  BERT_model = BertModelWithCustomLossFunction()
  trainer = Trainer(
        model = BERT_model,
        args = args,
        train_dataset=train_data,
        eval_dataset=eval_dataset,
        tokenizer=BERT_tokenizer,
        compute_metrics=compute_metrics,)
  trainer.train()
  evaluation_metrics = trainer.predict(test_dataset)
  accuracy = evaluation_metrics.metrics['test_accuracy']
  best_accuracy = max(accuracy, best_accuracy)
  print(f"Best Test Accuracy for this training dataset: {accuracy}")
  torch.cuda.empty_cache()