In [None]:
import pandas as pd
import numpy as np
import evaluate
import torch
import gc
import os

from transformers import (
    AutoAdapterModel, 
    AutoTokenizer, 
    PfeifferConfig,
    TrainingArguments, 
    AdapterTrainer,
    AutoConfig, 
    TrainerCallback, 
    EarlyStoppingCallback
)
from datasets import (
    Dataset,
    DatasetDict
)

In [None]:
DATA_PATH = "../../data/processed"
MODEL_PATH = "../../models"

models_name = [
    "bert-base-cased",
    "roberta-base",
    "distilbert-base-cased",
    "microsoft/deberta-base",
    "facebook/FairBERTa",
]

tasks = [
    "buzzfeed",
    "politifact",
    "twittercovidq2",
    "clef22",
    "propaganda",
    "webis",
    "pheme",
    "basil",
    "shadesoftruth",
    "fingerprints",
    "clickbait",
]

dimensions = [
    "merged"
]

In [None]:
class AdapterDropTrainerCallback(TrainerCallback):
  def on_step_begin(self, args, state, control, **kwargs):
    skip_layers = list(range(np.random.randint(0, 11)))
    kwargs['model'].set_active_adapters(kwargs['model'].active_adapters[0], skip_layers=skip_layers)

  def on_evaluate(self, args, state, control, **kwargs):
    # Deactivate skipping layers during evaluation (otherwise it would use the
    # previous randomly chosen skip_layers and thus yield results not comparable
    # across different epochs)
    kwargs['model'].set_active_adapters(kwargs['model'].active_adapters[0], skip_layers=None)

In [None]:
for model_name in models_name:
    for task in tasks:
        for dimension in dimensions:

            CONFIG = {
                "task_name": task,
                "model_name": model_name,
                "max_length": 128,
                "batch_size": 32,
                "epochs": 20,
                "seed" : 0,
                "learning_rate": 5e-4,
            }

            dataset_path = f"{DATA_PATH}{os.sep}{dimension}"

            train_df = pd.read_csv(f"{dataset_path}{os.sep}{CONFIG['task_name']}_train.csv")
            test_df = pd.read_csv(f"{dataset_path}{os.sep}{CONFIG['task_name']}_test.csv")

            train_df = train_df.drop(columns=['text'])
            train_df.rename(columns = {'perturbed_text':'text'}, inplace = True)

            test_df = test_df.drop(columns=['text'])
            test_df.rename(columns = {'perturbed_text':'text'}, inplace = True)

            train_valid =  Dataset.from_pandas(train_df).train_test_split(0.2)
            test = Dataset.from_pandas(test_df)

            dataset = DatasetDict(
                {
                    "train": train_valid["train"],
                    "valid": train_valid["test"],
                    "test": test,
                }
            )
            dataset = dataset.class_encode_column("labels")

            tokenizer = AutoTokenizer.from_pretrained(CONFIG["model_name"])

            def tokenize_function(examples):
                return tokenizer(
                    examples["text"], padding="max_length", truncation=True, max_length=CONFIG["max_length"]
                )

            tokenized_datasets = dataset.map(tokenize_function, batched=True)

            train_dataset = tokenized_datasets["train"]
            valid_dataset = tokenized_datasets["valid"]
            test_dataset = tokenized_datasets["test"]

            num_labels = len(set(train_dataset["labels"]))

            labels = train_dataset.features["labels"].names
            id2label = {idx: label for idx, label in enumerate(labels)}
            label2id = {label: idx for idx, label in enumerate(labels)}

            def get_model():
                config = AutoConfig.from_pretrained(
                    CONFIG["model_name"],
                    num_labels=num_labels,
                    id2label=id2label,
                )
                task_model = AutoAdapterModel.from_pretrained(
                    CONFIG["model_name"],
                    config=config
                )
                adapter_config = PfeifferConfig()
                task_model.add_adapter(CONFIG["task_name"], config=adapter_config)
                task_model.train_adapter(CONFIG["task_name"])
                task_model.add_classification_head(
                    CONFIG["task_name"],
                    num_labels=len(id2label),
                    id2label=id2label,
                )
                task_model.set_active_adapters(CONFIG["task_name"])
                
                return task_model

            f1_metric = evaluate.load("f1")
            recall_metric = evaluate.load("accuracy")

            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                preds = np.argmax(logits, axis=-1)
                results = {}
                results.update(f1_metric.compute(predictions=preds, references=labels, average="macro"))
                results.update(recall_metric.compute(predictions=preds, references=labels))
                return results

            output_dir = f"{MODEL_PATH}{os.sep}{CONFIG['model_name']}{os.sep}{dimension}{os.sep}{task}"
            metric_for_best_model = "accuracy"
            strategy = "epoch"
            weight_decay = 0.01
            load_best_model_at_end = True
            save_total_limit = 1
            early_stopping_patience = 5
            overwrite_output_dir = True

            training_args = TrainingArguments(
                output_dir=output_dir,
                overwrite_output_dir=overwrite_output_dir,
                evaluation_strategy=strategy,
                logging_strategy=strategy,
                save_strategy=strategy,
                learning_rate=CONFIG["learning_rate"],
                per_device_train_batch_size=CONFIG["batch_size"],
                per_device_eval_batch_size=CONFIG["batch_size"],
                num_train_epochs=CONFIG["epochs"],
                weight_decay=weight_decay,
                load_best_model_at_end=load_best_model_at_end,
                metric_for_best_model=metric_for_best_model,
                save_total_limit=save_total_limit,
                seed=CONFIG["seed"]
            )

            trainer = AdapterTrainer(
                model_init=get_model,
                args=training_args,
                train_dataset=train_dataset,
                eval_dataset=valid_dataset,
                compute_metrics=compute_metrics,
                callbacks = [
                    EarlyStoppingCallback(early_stopping_patience=early_stopping_patience),
                    AdapterDropTrainerCallback()
                ]
            )

            trainer.train()
            trainer.evaluate(test_dataset, metric_key_prefix="test")

            gc.collect()
            torch.cuda.empty_cache()