In [None]:
!pip install --quiet --upgrade transformers accelerate

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
import torch
import numpy as np
from sklearn.metrics import f1_score, accuracy_score
import transformers
print(transformers.__version__)

import os
os.environ["WANDB_DISABLED"] = "true"

from collections import Counter
import pandas as pd
import math


ds = load_dataset("go_emotions", "simplified")
label_names = ds["train"].features["labels"].feature.names
num_labels = len(label_names)


train_data = ds["train"].to_pandas()


def create_multi_hot_labels(labels_list, num_labels):
    vec = [0.0] * num_labels
    for i in labels_list:
        vec[i] = 1.0
    return vec


train_data['multi_hot_labels'] = train_data['labels'].apply(lambda x: create_multi_hot_labels(x, num_labels))

for i, emotion in enumerate(label_names):
    train_data[emotion] = train_data['labels'].apply(lambda x: 1 if i in x else 0)

label_cols = label_names
emotion_counts = train_data[label_cols].sum().sort_values()

print("📊 Original Emotion Counts:")
print(emotion_counts)


rare_emotions = emotion_counts.head(10).index.tolist()
print("🔍 Rare emotions:", rare_emotions)

target_count = emotion_counts.median()
print(f"🎯 Target count: {target_count}")


augmented_df = train_data.copy()

# Go through each rare emotion and duplicate rows containing that emotion
for emotion in rare_emotions:
    current_count = emotion_counts[emotion]
    multiplier = math.floor(target_count / current_count) - 1


    emotion_rows = train_data[train_data[emotion] == 1].copy()


    for _ in range(multiplier):
        augmented_df = pd.concat([augmented_df, emotion_rows], ignore_index=True)

    print(f"✅ Duplicated rows with {emotion} x{multiplier} → Added {len(emotion_rows) * multiplier} rows")

# Shuffle dataset
augmented_df = augmented_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Check new counts
new_counts = augmented_df[label_cols].sum().sort_values()
print("\n📈 New Emotion Counts:")
print(new_counts)

print(f"\n📊 Dataset size: {len(train_data)} → {len(augmented_df)} (+{len(augmented_df) - len(train_data)} rows)")

# Keep only necessary columns and convert multi_hot_labels back to the format expected by training
augmented_train_data = augmented_df[['text', 'multi_hot_labels']].copy()
augmented_train_data = augmented_train_data.rename(columns={'multi_hot_labels': 'labels'})

augmented_train_ds = Dataset.from_pandas(augmented_train_data)

# 80/10/10 SPLIT
split_ds = augmented_train_ds.train_test_split(test_size=0.2, seed=42)
train_ds = split_ds["train"]
temp_ds = split_ds["test"]

val_test_split = temp_ds.train_test_split(test_size=0.5, seed=42)
val_ds = val_test_split["train"]
test_ds = val_test_split["test"]

print(f"\n📊 Dataset splits:")
print(f"Train: {len(train_ds)} ({len(train_ds)/len(augmented_train_ds)*100:.1f}%)")
print(f"Val:   {len(val_ds)} ({len(val_ds)/len(augmented_train_ds)*100:.1f}%)")
print(f"Test:  {len(test_ds)} ({len(test_ds)/len(augmented_train_ds)*100:.1f}%)")


checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)

train_ds = train_ds.map(tokenize, batched=True)
val_ds = val_ds.map(tokenize, batched=True)
test_ds = test_ds.map(tokenize, batched=True)


columns = ["input_ids", "attention_mask", "labels"]
train_ds.set_format(type="torch", columns=columns)
val_ds.set_format(type="torch", columns=columns)
test_ds.set_format(type="torch", columns=columns)


model = AutoModelForSequenceClassification.from_pretrained(
    checkpoint,
    num_labels=num_labels,
    problem_type="multi_label_classification"
)


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.tensor(logits))
    y_pred = (probs > 0.5).int().numpy()
    y_true = labels
    acc = (y_pred == y_true).mean()
    f1_micro = f1_score(y_true, y_pred, average="micro", zero_division=0)
    f1_macro = f1_score(y_true, y_pred, average="macro", zero_division=0)
    return {
        "accuracy": acc,
        "f1_micro": f1_micro,
        "f1_macro": f1_macro,
    }

4.55.0


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

simplified/train-00000-of-00001.parquet:   0%|          | 0.00/2.77M [00:00<?, ?B/s]

simplified/validation-00000-of-00001.par(…):   0%|          | 0.00/350k [00:00<?, ?B/s]

simplified/test-00000-of-00001.parquet:   0%|          | 0.00/347k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/43410 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5426 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5427 [00:00<?, ? examples/s]

📊 Original Emotion Counts:
grief                77
pride               111
relief              153
nervousness         164
embarrassment       303
remorse             545
fear                596
desire              641
disgust             793
excitement          853
surprise           1060
caring             1087
realization        1110
disappointment     1269
sadness            1326
confusion          1368
joy                1452
anger              1567
optimism           1581
disapproval        2022
love               2086
curiosity          2191
amusement          2328
annoyance          2470
gratitude          2662
approval           2939
admiration         4130
neutral           14219
dtype: int64
🔍 Rare emotions: ['grief', 'pride', 'relief', 'nervousness', 'embarrassment', 'remorse', 'fear', 'desire', 'disgust', 'excitement']
🎯 Target count: 1297.5
✅ Duplicated rows with grief x15 → Added 1155 rows
✅ Duplicated rows with pride x10 → Added 1110 rows
✅ Duplicated rows with relief x

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/41653 [00:00<?, ? examples/s]

Map:   0%|          | 0/5207 [00:00<?, ? examples/s]

Map:   0%|          | 0/5207 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/fine_tuned_model",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1, #MAKE IT 3
    weight_decay=0.01,
    logging_dir='/content/logs',
    logging_steps=100,
    save_steps=500,
    eval_steps=500,
    save_total_limit=1,
    do_eval=True,
    do_train=True,
    report_to="none"
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


print("\n🚀 Starting training...")
trainer.train()


print("\n📊 Validation Results:")
val_results = trainer.evaluate(eval_dataset=val_ds)
print(val_results)


print("\n🎯 Final Test Results:")
test_results = trainer.evaluate(eval_dataset=test_ds)
print(test_results)


trainer.save_model()
print("\n💾 Model saved!")

  trainer = Trainer(



🚀 Starting training...


Step,Training Loss
100,0.1263
200,0.1153
300,0.0986
400,0.0947
500,0.0886
600,0.1076
700,0.1039
800,0.0999
900,0.0996
1000,0.0957



📊 Validation Results:


{'eval_loss': 0.08287400752305984, 'eval_accuracy': 0.9713092265905786, 'eval_f1_micro': 0.6006682577565633, 'eval_f1_macro': 0.5508386244220841, 'eval_runtime': 38.5754, 'eval_samples_per_second': 134.982, 'eval_steps_per_second': 8.451, 'epoch': 1.0}

🎯 Final Test Results:
{'eval_loss': 0.08137676864862442, 'eval_accuracy': 0.9716521715273396, 'eval_f1_micro': 0.6015617468427649, 'eval_f1_macro': 0.5475534538039282, 'eval_runtime': 37.4865, 'eval_samples_per_second': 138.903, 'eval_steps_per_second': 8.696, 'epoch': 1.0}

💾 Model saved!
