In [2]:
import torch
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import DataCollatorWithPadding, get_scheduler
from datasets import load_dataset
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
adept_data_path = "../datasets/adept/train-dev-test-split"

In [5]:
adept_data_path

'../datasets/adept/train-dev-test-split'

In [6]:
train_split = "train.json"
validation_split = "val.json"
test_split = "test.json"

In [7]:
data_files = {
    "train": "{}/{}".format(adept_data_path, train_split), 
    "validation": "{}/{}".format(adept_data_path, validation_split), 
    "test": "{}/{}".format(adept_data_path, test_split),
}

In [8]:
adept_dataset = load_dataset("json", data_files=data_files)
adept_dataset

DatasetDict({
    train: Dataset({
        features: ['sentence2', 'label', 'idx', 'sentence1', 'modifier', 'noun'],
        num_rows: 12892
    })
    validation: Dataset({
        features: ['sentence2', 'label', 'idx', 'sentence1', 'modifier', 'noun'],
        num_rows: 1611
    })
    test: Dataset({
        features: ['sentence2', 'label', 'idx', 'sentence1', 'modifier', 'noun'],
        num_rows: 1612
    })
})

In [9]:
models_dict = {
    "BERT": "bert-base-uncased",
    "ROBERTA": "grammarly/detexd-roberta-base",
    "DEBERTA": "sileod/deberta-v3-base-tasksource-nli"
}

In [10]:
params_dict = {
    "learning_rate"
}

In [11]:
# tokenized_dataset = tokenized_dataset.remove_columns(['sentence1', 'sentence2', 'idx'])
# tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
# tokenized_dataset = tokenized_dataset.with_format("torch")
# tokenized_dataset

In [12]:
import evaluate

In [13]:
roc_auc =  evaluate.load("roc_auc", "multiclass")

In [14]:
def compute_metrics(eval_preds):
    logits, labels = eval_preds
    logits_tensor = torch.from_numpy(logits)
    probabilities = torch.nn.functional.softmax(logits_tensor, dim=-1)
    # preds = np.argmax(logits, axis=-1)
    return roc_auc.compute(prediction_scores=probabilities, references=labels, multi_class='ovo', average="macro")

In [15]:
import optuna
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, DataCollatorWithPadding
from datasets import load_dataset
from sklearn.metrics import roc_auc_score

In [17]:
best_params = {'learning_rate': 3.660515504756857e-05,
 'num_train_epochs': 3,
 'model_name': 'microsoft/deberta-base'}
best_model_name = best_params["model_name"]

In [18]:

# Fine-tune the best model with the best hyperparameters
final_model = AutoModelForSequenceClassification.from_pretrained(best_model_name, num_labels=5, ignore_mismatched_sizes=True)
final_tokenizer = AutoTokenizer.from_pretrained(best_model_name)
final_data_collator = DataCollatorWithPadding(tokenizer=final_tokenizer)
final_tokenized_dataset = adept_dataset.map(lambda x:final_tokenizer(x['sentence2'], truncation=True))
final_tokenized_dataset = final_tokenized_dataset.remove_columns(['sentence1', 'sentence2', 'idx', 'modifier', 'noun'])
final_tokenized_dataset = final_tokenized_dataset.rename_column("label", "labels")
final_tokenized_dataset = final_tokenized_dataset.with_format("torch")

final_trainer = Trainer(
    model=final_model,
    args=TrainingArguments(
        output_dir=f"./final_output_{best_model_name}",
        learning_rate=best_params["learning_rate"],
        num_train_epochs=best_params["num_train_epochs"],
        evaluation_strategy="epoch",
        # add other training arguments
    ),
    data_collator=final_data_collator,
    train_dataset=final_tokenized_dataset["train"],
    eval_dataset=final_tokenized_dataset["validation"],
    compute_metrics=compute_metrics
)

final_trainer.train()

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Roc Auc
1,0.8776,0.888738,0.699447
2,0.7806,0.888433,0.727887
3,0.5858,1.040725,0.710056


TrainOutput(global_step=4836, training_loss=0.7740176495093762, metrics={'train_runtime': 842.6723, 'train_samples_per_second': 45.897, 'train_steps_per_second': 5.739, 'total_flos': 324596153516232.0, 'train_loss': 0.7740176495093762, 'epoch': 3.0})

In [19]:
test_results = final_trainer.evaluate(final_tokenized_dataset['test'])

In [20]:
test_results

{'eval_loss': 0.939876914024353,
 'eval_roc_auc': 0.7260289074261056,
 'eval_runtime': 5.2499,
 'eval_samples_per_second': 307.055,
 'eval_steps_per_second': 38.477,
 'epoch': 3.0}