In [None]:
import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

os.environ["WANDB_DISABLED"] = "true"
os.environ["NCCL_P2P_DISABLE"] = "1"
os.environ["NCCL_IB_DISABLE"] = "1"
os.environ["NCCL_DEBUG"] = "INFO"


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


dataset = load_dataset("dair-ai/emotion")
label_map = {label: i for i, label in enumerate(dataset["train"].features["label"].names)}
num_labels = len(label_map)


tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-base")
model = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/deberta-base",
    num_labels=num_labels
).to(device)

# Tokenize function (same as before)
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=128,
        return_tensors="pt"
    )

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Training arguments (adjusted for DeBERTa)
training_args = TrainingArguments(
    output_dir="deberta-models/",
    evaluation_strategy="epoch",
    num_train_epochs=100,  # DeBERTa typically needs fewer epochs
    per_device_train_batch_size=32,  # Reduced due to DeBERTa's larger size
    per_device_eval_batch_size=32,
    learning_rate=5e-5,  # Lower learning rate recommended
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="logs/",
    report_to=[],
    save_strategy="epoch",
    fp16=torch.cuda.is_available(),  # Enable mixed precision
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
)

# Train and save
trainer.train()
model.save_pretrained("deberta-models/")
tokenizer.save_pretrained("deberta-models/")

  from .autonotebook import tqdm as notebook_tqdm
2025-03-25 12:06:21.561651: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-25 12:06:21.573379: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742904381.587856  893500 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742904381.592196  893500 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-25 12:06:21.607106: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorF

Using device: cuda


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 2000/2000 [00:00<00:00, 10487.83 examples/s]


02aa490fc410:893500:893500 [0] NCCL INFO cudaDriverVersion 12020
02aa490fc410:893500:893500 [0] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
02aa490fc410:893500:893500 [0] NCCL INFO NET/Plugin: No plugin found (libnccl-net.so)
02aa490fc410:893500:893500 [0] NCCL INFO NET/Plugin: Plugin load returned 2 : libnccl-net.so: cannot open shared object file: No such file or directory : when loading libnccl-net.so
02aa490fc410:893500:893500 [0] NCCL INFO NET/Plugin: Using internal network plugin.
NCCL version 2.21.5+cuda12.4
02aa490fc410:893500:893831 [0] NCCL INFO NCCL_IB_DISABLE set by environment to 1.
02aa490fc410:893500:893831 [0] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
02aa490fc410:893500:893831 [0] NCCL INFO Using non-device net plugin version 0
02aa490fc410:893500:893831 [0] NCCL INFO Using network Socket
02aa490fc410:893500:893833 [2] NCCL INFO Using non-device net plugin version 0
02aa490fc410:893500:893833 [2] NCCL INFO Using network Socket
02aa490fc410:893500:893832 [1]



Epoch,Training Loss,Validation Loss
1,No log,0.837698
2,No log,0.244179
3,No log,0.157147
4,0.576300,0.139141
5,0.576300,0.109388
6,0.576300,0.162981
7,0.576300,0.14182
8,0.099200,0.14869
9,0.099200,0.205494
10,0.099200,0.271662




('deberta-models/tokenizer_config.json',
 'deberta-models/special_tokens_map.json',
 'deberta-models/vocab.json',
 'deberta-models/merges.txt',
 'deberta-models/added_tokens.json',
 'deberta-models/tokenizer.json')

In [None]:
import numpy as np
from sklearn.metrics import f1_score


test_dataset = tokenized_datasets["test"]


predictions = trainer.predict(test_dataset)
preds = np.argmax(predictions.predictions, axis=-1)  # Get predicted labels
labels = test_dataset["label"]  # True labels

# Calculate F1 Macro score
f1_macro = f1_score(labels, preds, average="macro")
print(f"F1 Macro Score: {f1_macro:.4f}")




F1 Macro Score: 0.8920


In [None]:
import torch
from sklearn.metrics import f1_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification


# Load trained model and tokenizer
model_path = "deberta-models/"  # Path where the model was saved
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# New data with simple sentences
new_data = [
    {"text": "I’m scared my credit score will never recover.", "label": "fear"},
    {"text": "It’s unfair how high the debt settlement fees are!", "label": "anger"},
    {"text": "My low credit score makes me feel hopeless.", "label": "sadness"},
    {"text": "I’m so happy my credit score is improving!", "label": "joy"},
    {"text": "What if my credit score drops even more?", "label": "fear"},
    {"text": "Why does debt settlement take so long?", "label": "anger"},
    {"text": "I feel defeated seeing my credit card debt.", "label": "sadness"},
    {"text": "It’s great that I finally paid off my debt!", "label": "joy"},
    {"text": "Lenders charging extra interest disgusts me.", "label": "anger"},
    {"text": "I’m terrified of my debt going to collections.", "label": "fear"},
    {"text": "Credit card fees are way too high!", "label": "anger"},
    {"text": "I feel lost trying to fix my bad credit.", "label": "sadness"},
    {"text": "I’m relieved my loan got approved!", "label": "joy"},
    {"text": "I hate that my interest rate keeps rising.", "label": "disgust"},
    {"text": "I am scared and What if my loan application gets rejected?", "label": "fear"},
    {"text": "Why does fixing credit take so long?", "label": "anger"}
]


# Split new data into texts and labels
texts = [item["text"] for item in new_data]
labels = [item["label"] for item in new_data]


inputs = tokenizer(texts, padding=True, truncation=True, max_length=128, return_tensors="pt")
inputs = {key: value.to(model.device) for key, value in inputs.items()}  # Move inputs to correct device


model.eval()  # Set model to evaluation mode
with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=1)  # Get label with highest probability

label_map = {i: label for i, label in enumerate(dataset["train"].features["label"].names)}  # Assuming label_map is defined
predicted_labels = [label_map[pred.item()] for pred in predictions]

print(predicted_labels)
# Calculate F1 score
f1 = f1_score(labels, predicted_labels, average="weighted")  # Use "weighted" for imbalanced classes
print(f"F1 Score: {f1:.4f}")

['fear', 'anger', 'sadness', 'joy', 'joy', 'fear', 'sadness', 'joy', 'anger', 'fear', 'anger', 'sadness', 'joy', 'anger', 'fear', 'surprise']
F1 Score: 0.7440
