In [None]:
# Step 0: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Step 1: Install dependencies and import libraries
!pip install -q transformers
!pip install -q torchinfo
!pip install -q datasets
!pip install -q evaluate
!pip install -q optuna
!pip install -q wandb

import wandb
wandb.login(key="Put wandb API key here")

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import optuna
import os


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/491.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m481.3/491.2 kB[0m [31m17.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/183.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mpeng_zhao[0m ([33mpeng_zhao-university-of-california-berkeley[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# Step 2: Load and preprocess the dataset (CDs_and_Vinyl)
dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_review_CDs_and_Vinyl", trust_remote_code=True)

# Shuffle the dataset and filter out rating == 3
shuffled_dataset = dataset["full"].shuffle(seed=42)
subset_dataset = shuffled_dataset.filter(lambda x: x["rating"] != 3)

# Initialize the DeBERTa tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-small")

# Define the tokenization and formatting function
def tokenize_and_format(examples):
    outputs = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=256)
    outputs["labels"] = [1 if rating > 3 else 0 for rating in examples["rating"]]
    return outputs

# Tokenize and save checkpoint
tokenized_dataset = subset_dataset.map(tokenize_and_format, batched=True)
tokenized_dataset.save_to_disk("/content/drive/MyDrive/FP/Checkpoints/tokenized_CDs_checkpoint")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/30.3k [00:00<?, ?B/s]

Amazon-Reviews-2023.py:   0%|          | 0.00/39.6k [00:00<?, ?B/s]

CDs_and_Vinyl.jsonl:   0%|          | 0.00/3.29G [00:00<?, ?B/s]

Generating full split: 0 examples [00:00, ? examples/s]

Filter:   0%|          | 0/4827273 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/578 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



Map:   0%|          | 0/4543582 [00:00<?, ? examples/s]

Saving the dataset (0/20 shards):   0%|          | 0/4543582 [00:00<?, ? examples/s]

In [None]:
# Step 2': Load from checkpoint if needed
from datasets import load_from_disk
tokenized_dataset = load_from_disk("/content/drive/MyDrive/FP/Checkpoints/tokenized_CDs_checkpoint")
tokenized_datasets = tokenized_dataset.train_test_split(test_size=0.2, seed=42)
tokenized_datasets.set_format("torch")


Loading dataset from disk:   0%|          | 0/20 [00:00<?, ?it/s]

In [None]:
# Step 3: Define model, metrics, and hyperparameter search space
def model_init():
    return AutoModelForSequenceClassification.from_pretrained("microsoft/deberta-v3-small", num_labels=2)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    acc = accuracy_score(labels, predictions)
    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

def hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [8, 16])
    }

# Set training arguments for hyperparameter tuning
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=256,
    per_device_eval_batch_size=128,
    gradient_accumulation_steps=1,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    fp16=True,
    dataloader_num_workers=8,
    warmup_steps=500,
    report_to=["wandb"],
    run_name="deberta_amazon_reviews_tune",
    logging_steps=50,
    logging_first_step=True
)

# Print total samples
total_train = len(tokenized_datasets["train"])
total_eval = len(tokenized_datasets["test"])
print("Total training samples:", total_train)
print("Total evaluation samples:", total_eval)

# Use partial subset for hyperparameter search
train_subset = tokenized_datasets["train"].shuffle(seed=42).select(range(int(0.002 * total_train)))
eval_subset = tokenized_datasets["test"].shuffle(seed=42).select(range(int(0.002 * total_eval)))

# Initialize the Trainer
trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=train_subset,
    eval_dataset=eval_subset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# Run Optuna hyperparameter search
best_run = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    n_trials=10,
    hp_space=hp_space,
)

print("Best run hyperparameters:", best_run.hyperparameters)




Total training samples: 3634865
Total evaluation samples: 908717


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/578 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/286M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[I 2025-04-03 04:13:27,858] A new study created in memory with name: no-name-cf9ac0c8-456c-4b02-8329-cd121da05c16


model.safetensors:   0%|          | 0.00/286M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1441,0.177541,0.952119,0.940916,0.949909,0.952119
2,0.1262,0.107393,0.962576,0.963781,0.965515,0.962576
3,0.0717,0.126953,0.971381,0.970164,0.969851,0.971381
4,0.0208,0.168301,0.966979,0.965574,0.965034,0.966979


[I 2025-04-03 04:18:55,383] Trial 0 finished with value: 3.864564888906435 and parameters: {'learning_rate': 9.687004414446243e-05, 'per_device_train_batch_size': 16}. Best is trial 0 with value: 3.864564888906435.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁▅█▆
eval/f1,▁▆█▇
eval/loss,█▁▃▇
eval/precision,▁▆█▆
eval/recall,▁▅█▆
eval/runtime,█▁▁▁
eval/samples_per_second,▁███
eval/steps_per_second,▁███
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
eval/accuracy,0.96698
eval/f1,0.96557
eval/loss,0.1683
eval/precision,0.96503
eval/recall,0.96698
eval/runtime,4.6214
eval/samples_per_second,393.168
eval/steps_per_second,3.246
total_flos,1925879640551424.0
train/epoch,4.0


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0926,0.100131,0.971932,0.971539,0.971247,0.971932
2,0.1026,0.135321,0.964777,0.966885,0.97096,0.964777
3,0.096,0.119439,0.974684,0.974062,0.973756,0.974684


[I 2025-04-03 04:24:16,005] Trial 1 finished with value: 3.8971850556511907 and parameters: {'learning_rate': 1.2297283363089677e-05, 'per_device_train_batch_size': 8}. Best is trial 1 with value: 3.8971850556511907.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▆▁█
eval/f1,▆▁█
eval/loss,▁█▅
eval/precision,▂▁█
eval/recall,▆▁█
eval/runtime,▁▅█
eval/samples_per_second,█▄▁
eval/steps_per_second,█▄▁
train/epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/accuracy,0.97468
eval/f1,0.97406
eval/loss,0.11944
eval/precision,0.97376
eval/recall,0.97468
eval/runtime,4.6539
eval/samples_per_second,390.429
eval/steps_per_second,3.223
total_flos,1444409730413568.0
train/epoch,3.0


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2596,0.272411,0.933957,0.902063,0.872276,0.933957
2,0.1726,0.154663,0.952119,0.948533,0.947112,0.952119
3,0.1413,0.243044,0.955421,0.951843,0.950854,0.955421
4,0.0634,0.261913,0.951018,0.948349,0.946839,0.951018


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-03 04:31:25,125] Trial 2 finished with value: 3.7972247894531597 and parameters: {'learning_rate': 6.689328988720986e-05, 'per_device_train_batch_size': 8}. Best is trial 1 with value: 3.8971850556511907.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁▇█▇
eval/f1,▁███
eval/loss,█▁▆▇
eval/precision,▁███
eval/recall,▁▇█▇
eval/runtime,▃▁█▄
eval/samples_per_second,▆█▁▅
eval/steps_per_second,▆█▁▅
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇███

0,1
eval/accuracy,0.95102
eval/f1,0.94835
eval/loss,0.26191
eval/precision,0.94684
eval/recall,0.95102
eval/runtime,4.6585
eval/samples_per_second,390.044
eval/steps_per_second,3.22
total_flos,1925879640551424.0
train/epoch,4.0


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1363,0.122713,0.961475,0.95947,0.958671,0.961475
2,0.1081,0.151974,0.957072,0.96,0.965382,0.957072
3,0.1019,0.141254,0.966428,0.965232,0.964643,0.966428


[I 2025-04-03 04:36:50,167] Trial 3 finished with value: 3.8627312287336415 and parameters: {'learning_rate': 3.0935926137566615e-05, 'per_device_train_batch_size': 8}. Best is trial 1 with value: 3.8971850556511907.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▄▁█
eval/f1,▁▂█
eval/loss,▁█▅
eval/precision,▁█▇
eval/recall,▄▁█
eval/runtime,█▅▁
eval/samples_per_second,▁▄█
eval/steps_per_second,▁▄█
train/epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
train/global_step,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
eval/accuracy,0.96643
eval/f1,0.96523
eval/loss,0.14125
eval/precision,0.96464
eval/recall,0.96643
eval/runtime,4.6588
eval/samples_per_second,390.018
eval/steps_per_second,3.22
total_flos,1444409730413568.0
train/epoch,3.0


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1324,0.122821,0.962576,0.963657,0.965159,0.962576
2,0.1068,0.121496,0.961475,0.962457,0.963762,0.961475
3,0.0939,0.138523,0.974133,0.973328,0.973025,0.974133
4,0.0149,0.151416,0.970281,0.970936,0.971849,0.970281


[I 2025-04-03 04:44:00,815] Trial 4 finished with value: 3.8833463720905703 and parameters: {'learning_rate': 1.3804741314326077e-05, 'per_device_train_batch_size': 8}. Best is trial 1 with value: 3.8971850556511907.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▂▁█▆
eval/f1,▂▁█▆
eval/loss,▁▁▅█
eval/precision,▂▁█▇
eval/recall,▂▁█▆
eval/runtime,▁█▆▇
eval/samples_per_second,█▁▃▂
eval/steps_per_second,█▁▃▂
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇██
train/global_step,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇██

0,1
eval/accuracy,0.97028
eval/f1,0.97094
eval/loss,0.15142
eval/precision,0.97185
eval/recall,0.97028
eval/runtime,4.6807
eval/samples_per_second,388.19
eval/steps_per_second,3.205
total_flos,1925879640551424.0
train/epoch,4.0


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1244,0.098443,0.967529,0.967075,0.966717,0.967529
2,0.0844,0.109826,0.96973,0.970655,0.972083,0.96973
3,0.0702,0.111513,0.975784,0.976058,0.976402,0.975784


[I 2025-04-03 04:49:25,535] Trial 5 finished with value: 3.904028995479929 and parameters: {'learning_rate': 6.855780050461059e-06, 'per_device_train_batch_size': 8}. Best is trial 5 with value: 3.904028995479929.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁▃█
eval/f1,▁▄█
eval/loss,▁▇█
eval/precision,▁▅█
eval/recall,▁▃█
eval/runtime,▁▇█
eval/samples_per_second,█▂▁
eval/steps_per_second,█▁▁
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
eval/accuracy,0.97578
eval/f1,0.97606
eval/loss,0.11151
eval/precision,0.9764
eval/recall,0.97578
eval/runtime,4.6908
eval/samples_per_second,387.358
eval/steps_per_second,3.198
total_flos,1444409730413568.0
train/epoch,3.0


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1002,0.097612,0.971381,0.970297,0.969932,0.971381
2,0.0868,0.119197,0.96863,0.97009,0.972742,0.96863
3,0.058,0.113649,0.977986,0.977812,0.977673,0.977986


[I 2025-04-03 04:54:50,367] Trial 6 finished with value: 3.911456029074672 and parameters: {'learning_rate': 1.2026872819004038e-05, 'per_device_train_batch_size': 8}. Best is trial 6 with value: 3.911456029074672.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▃▁█
eval/f1,▁▁█
eval/loss,▁█▆
eval/precision,▁▄█
eval/recall,▃▁█
eval/runtime,█▅▁
eval/samples_per_second,▁▄█
eval/steps_per_second,▁▄█
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████

0,1
eval/accuracy,0.97799
eval/f1,0.97781
eval/loss,0.11365
eval/precision,0.97767
eval/recall,0.97799
eval/runtime,4.6724
eval/samples_per_second,388.877
eval/steps_per_second,3.21
total_flos,1444409730413568.0
train/epoch,3.0


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1544,0.129363,0.933957,0.902063,0.872276,0.933957


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-03 04:56:09,383] Trial 7 pruned. 
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▂▃▃▄▅▆▆▇██
train/global_step,▁▂▃▃▄▅▆▆▇██

0,1
eval/accuracy,0.93396
eval/f1,0.90206
eval/loss,0.12936
eval/precision,0.87228
eval/recall,0.93396
eval/runtime,4.6871
eval/samples_per_second,387.66
eval/steps_per_second,3.2
train/epoch,1.0
train/global_step,455.0


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1473,0.127159,0.952119,0.945383,0.945895,0.952119
2,0.1248,0.109847,0.959274,0.96098,0.963571,0.959274


[I 2025-04-03 04:58:52,493] Trial 8 pruned. 
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/f1,▁█
eval/loss,█▁
eval/precision,▁█
eval/recall,▁█
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▂▂▃▃▃▄▄▄▄▅▅▆▆▆▇▇███
train/global_step,▁▁▂▂▃▃▃▄▄▄▄▅▅▆▆▆▇▇███

0,1
eval/accuracy,0.95927
eval/f1,0.96098
eval/loss,0.10985
eval/precision,0.96357
eval/recall,0.95927
eval/runtime,4.6964
eval/samples_per_second,386.893
eval/steps_per_second,3.194
train/epoch,2.0
train/global_step,910.0


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1392,0.104566,0.959824,0.959094,0.958515,0.959824
2,0.1236,0.109165,0.959824,0.962831,0.968914,0.959824


[I 2025-04-03 05:01:32,592] Trial 9 pruned. 


Best run hyperparameters: {'learning_rate': 1.2026872819004038e-05, 'per_device_train_batch_size': 8}


In [None]:
# Step 4: Final training with best hyperparameters
updated_training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=best_run.hyperparameters["learning_rate"],
    per_device_train_batch_size=best_run.hyperparameters["per_device_train_batch_size"],
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    report_to=["wandb"],
    run_name="deberta_amazon_reviews_final",
    logging_steps=50
)

# Reload subset for final training
train_subset_final = tokenized_datasets["train"].shuffle(seed=42).select(range(int(0.002 * total_train)))
eval_subset_final = tokenized_datasets["test"].shuffle(seed=42).select(range(int(0.002 * total_eval)))

# Train final model
final_trainer = Trainer(
    model_init=model_init,
    args=updated_training_args,
    train_dataset=train_subset_final,
    eval_dataset=eval_subset_final,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

train_result = final_trainer.train()


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.117,0.134048,0.964227,0.963113,0.962454,0.964227
2,0.0696,0.140895,0.964777,0.966471,0.969401,0.964777
3,0.0549,0.138558,0.96863,0.968569,0.96851,0.96863


In [None]:
# Save final model
final_trainer.save_model("/content/drive/MyDrive/FP/Checkpoints/final_checkpoint_CDs_deberta")