# Finetuning DistilBERT on tweet-eval dataset

In [None]:
!git clone https://github.com/aXhyra/BERT-finetuning

In [None]:
!pip install -r requirements.txt
!git lfs install

In [1]:
from transformers import TrainingArguments
import os
import numpy as np

from helper import LoginHelper, Engine, Dataset
from helper.engine import retrieve_hyperparameter_config
from helper.dataset import compute_metrics

## HuggingFace and wandb login

In [None]:
os.environ["hf_token"] = "YOUR_HUGGINGFACE_TOKEN"
os.environ["wandb_token"] = "YOUR_WANDB_TOKEN"
LoginHelper(wandb_project_name="BERT-finetuning", wandb_save_models=True, tokenizer_parallelism=False)

### Run hyperparameter search and train a model per seed on the best parameters

In [None]:
tasks = ["irony", "emotion", "hate", "sentiment"]
seeds = [42, 31415, 1234567]

batch_size = 16
metric_name = "f1"

for task in tasks:
    name = f"{task}_hyperparameter_search"
    args = TrainingArguments(
        name,
        evaluation_strategy = "epoch",
        save_strategy = "no",
        learning_rate=2e-5,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=4,
        weight_decay=0.01,
        load_best_model_at_end=False,
        push_to_hub = False,
        metric_for_best_model=metric_name,
        report_to="wandb",
        save_total_limit=1,
        run_name=name
    )

    dataset = Dataset(task, "distilbert-base-uncased")
    engine = Engine(dataset, args)

    engine.hyperparameter_search(10)
    for seed in seeds:
        name = f"{task}_trained"
        print(f"\n\n [+] Training model: {name}")
        engine.train(4, seed=0, opt_name=name)
        os.system(f"rm -rf {name}")


### Retrieve configurations from best trained models and train them on three seeds

In [None]:
seeds = [42, 31415, 1234567]
model_repo = {"irony": "aXhyra", "emotion": "aXhyra", "hate": "aXhyra", "sentiment": "aXhyra"}

for model_name, author in model_repo.items():
    dataset = Dataset("model_name", "distilbert-base-uncased")
    model = f"{author}/{model_name}_trained"
    lr, batch_size, metric_name = retrieve_hyperparameter_config(model)
    for seed in seeds:
        name = f"{model_name}_trained_{seed}"
        args = TrainingArguments(
            name,
            seed=seed,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            learning_rate=lr,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=batch_size,
            num_train_epochs=4,
            weight_decay=0.01,
            load_best_model_at_end=True,
            push_to_hub=True,
            metric_for_best_model="f1",
            report_to="wandb",
            save_total_limit=1,
            run_name=name,
        )
    engine = Engine(dataset, args)
    engine.train(4, seed=seed, opt_name=model, use_given_args=True)

## Evaluate trained models on test set

In [None]:
tasks = ['irony', 'emotion', 'hate', 'sentiment']
seeds = [42, 31415, 1234567]

mean_test_eval = {}
std_test_eval = {}
best_test_eval = {}


for task in tasks:
    tmp_res = []
    for s in seeds:
        metric = 'f1' if task == 'irony' else 'recall'
        model_path = f"aXhyra/{task}_trained_{s}"
        res = Engine.test_eval(model_path, task, compute_metrics(metric))
        tmp_res.append(res[f'eval_{metric}'])
    mean_test_eval[task] = np.mean(tmp_res)
    std_test_eval[task] = np.std(tmp_res)
    best_test_eval[task] = max(tmp_res)

print(mean_test_eval)
print(std_test_eval)
print(best_test_eval)

In [None]:
engine.predict("I love this movie")

'positive'