In [1]:
import optuna
import torch
import numpy as np
import pandas as pd
from transformers import (
    DebertaV2Tokenizer,
    DebertaV2Config,
    DebertaV2ForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from sklearn.utils.class_weight import compute_class_weight
from datasets import Dataset
from sklearn.metrics import f1_score


  from .autonotebook import tqdm as notebook_tqdm
2025-03-04 18:34:06.996894: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741113247.021744  563638 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741113247.029354  563638 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-04 18:34:07.055625: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
train_df = pd.read_csv("Data/augmented_train_split.csv")
val_df = pd.read_csv("Data/val_split.csv")

train_df["text"] = train_df["text"].astype(str)
val_df["text"] = val_df["text"].astype(str)

train_df["label"] = train_df["label"].astype(int)
val_df["label"] = val_df["label"].astype(int)

In [3]:
# ✅ **Model Name**

MODEL_NAME = "microsoft/deberta-v3-small"

# Load pre-trained configuration and modify as needed
config = DebertaV2Config.from_pretrained(
    MODEL_NAME,
    num_labels=2,  # Ensure this matches your task
    hidden_dropout_prob=0.2,
)

# Load model with modified config
model = DebertaV2ForSequenceClassification.from_pretrained(MODEL_NAME, config=config)
# ✅ **Load Tokenizer**
tokenizer = DebertaV2Tokenizer.from_pretrained(MODEL_NAME)

# ✅ **Convert Pandas DataFrames to Hugging Face Datasets**
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

# ✅ **Tokenize & Remove "text" column**
train_dataset = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
val_dataset = val_dataset.map(tokenize_function, batched=True, remove_columns=["text"])

# ✅ **Compute Class Weights**
labels = train_df["label"].values
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(labels), y=labels)
log_class_weights = torch.tensor(np.log1p(class_weights), dtype=torch.float32).to("cuda")


class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        """Compute loss using weighted CrossEntropyLoss"""
        labels = inputs["labels"]  # Extract labels correctly
        outputs = model(**{k: v for k, v in inputs.items() if k != "labels"})
        logits = outputs.logits

        # Apply weighted loss
        loss_fct = torch.nn.CrossEntropyLoss(weight=log_class_weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))

        return (loss, outputs) if return_outputs else loss


# ✅ **Define Evaluation Metrics**
def compute_metrics(pred):
    predictions = np.argmax(pred.predictions, axis=1)
    return {"f1": f1_score(pred.label_ids, predictions)}


# ✅ **Define Objective Function for Optuna Tuning**
def objective(trial):
    """Objective function for Optuna to optimize learning rate and dropout"""

    # Sample hyperparameters
    learning_rate = trial.suggest_categorical("learning_rate", [1e-6, 5e-6, 1e-5])
    dropout = trial.suggest_categorical("dropout", [0.2, 0.4])

    # ✅ **Modify Model Config with Sampled Dropout**
    config = DebertaV2Config.from_pretrained(
        MODEL_NAME,
        num_labels=2,
        hidden_dropout_prob=dropout,  
        attention_probs_dropout_prob=dropout,  
    )

    # ✅ **Load Model**
    model = DebertaV2ForSequenceClassification.from_pretrained(MODEL_NAME, config=config)

    # ✅ **Training Arguments**
    training_args = TrainingArguments(
        output_dir="./results",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        num_train_epochs=5,  # Fewer epochs to speed up tuning
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        fp16=True,
        learning_rate=learning_rate,
        weight_decay=0.01,
        save_total_limit=1,
        load_best_model_at_end=True,
        metric_for_best_model="eval_f1",  # ✅ Optimize for best F1-score
        greater_is_better=True,  # ✅ Higher F1-score is better
        logging_dir="./logs",
        logging_steps=10,
        report_to="none",
        lr_scheduler_type="cosine",
        warmup_ratio=0.1,
    )

    # ✅ **Initialize Trainer**
    trainer = WeightedTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=2, early_stopping_threshold=0.0)],
    )

    # ✅ **Train the Model**
    trainer.train()

    # ✅ **Evaluate on Validation Set**
    eval_results = trainer.evaluate()

    # ✅ **Return F1 Score for Optuna Optimization**
    return eval_results.get("eval_f1", 0.0)  # Avoid KeyError


# ✅ **Run Optuna Optimization**
study = optuna.create_study(direction="maximize")  # Maximize F1-score
study.optimize(objective, n_trials=6)  # Ensure all 3×2 combinations are tested

# ✅ **Get Best Hyperparameters**
best_params = study.best_params
print("\n✅ Best Hyperparameters Found:")
print(f"Best Learning Rate: {best_params['learning_rate']}")
print(f"Best Dropout: {best_params['dropout']}")

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 12057/12057 [00:06<00:00, 1880.71 examples/s]
Map: 100%|██████████| 1675/1675 [00:00<00:00, 1998.65 examples/s]
[I 2025-03-04 18:34:23,068] A new study created in memory with name: no-name-1f1bbfc0-e62b-4694-8097-cac33249884e
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[W 2025-03-04 18:35:21,837] Trial 0 failed with parameters: {'learning_rate': 5e-06, 'dropout': 0.4} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/vol/bitbucket/lf524/nlp_cw/.venv/lib/python3.12/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_563638/466949655.py", line 106, in objective
    trainer.train()
  File "/vol/bitbucket/lf524/nlp_cw/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2241, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/vol/bitbucket/lf524/nlp_cw/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2581, in _inner_training_loop
    _grad_norm = self.accelerator.clip_grad_norm_(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/vol/bitbucket/lf524/nlp_cw/.venv/lib/python3.12/site-packages/accelerate/accelerator.py", line 2480, in clip

KeyboardInterrupt: 