In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"

import pandas as pd
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset, DatasetDict
from transformers import XLMRobertaTokenizer, XLMRobertaModel, Trainer, TrainingArguments, PreTrainedModel
import torch
from torch import nn
import numpy as np
from sklearn.metrics import classification_report

# Load Data
df = pd.read_csv('/content/final_cleaned_emotion_categories (3).csv')
df = df.dropna(subset=['text', 'sentiment', 'emotion', 'moral_foundation'])

# Encode Labels
label_encoders = {}
for col in ['sentiment', 'emotion', 'moral_foundation']:
    le = LabelEncoder()
    df[col + '_label'] = le.fit_transform(df[col])
    label_encoders[col] = le

# Train-Test Split
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df[['sentiment_label', 'emotion_label', 'moral_foundation_label']])

# Huggingface Dataset
ds = DatasetDict({
    "train": Dataset.from_pandas(train_df[['text', 'sentiment_label', 'emotion_label', 'moral_foundation_label']]),
    "validation": Dataset.from_pandas(val_df[['text', 'sentiment_label', 'emotion_label', 'moral_foundation_label']]),
})

# Tokenizer
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")

def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=128)

ds = ds.map(tokenize_function, batched=True)
ds = ds.rename_column("sentiment_label", "label_sentiment")
ds = ds.rename_column("emotion_label", "label_emotion")
ds = ds.rename_column("moral_foundation_label", "label_moral")
ds.set_format("torch", columns=["input_ids", "attention_mask", "label_sentiment", "label_emotion", "label_moral"])

# Model Definition
class MultiHeadModel(nn.Module):
    def __init__(self, base_model_name, num_sentiment, num_emotion, num_moral):
        super().__init__()
        self.base = XLMRobertaModel.from_pretrained(base_model_name)
        hidden_size = self.base.config.hidden_size
        self.dropout = nn.Dropout(0.2)
        self.sentiment = nn.Linear(hidden_size, num_sentiment)
        self.emotion = nn.Linear(hidden_size, num_emotion)
        self.moral = nn.Linear(hidden_size, num_moral)

    def forward(self, input_ids, attention_mask, labels=None, **kwargs):
        outputs = self.base(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0]
        pooled_output = self.dropout(pooled_output)
        logits_sentiment = self.sentiment(pooled_output)
        logits_emotion = self.emotion(pooled_output)
        logits_moral = self.moral(pooled_output)
        return {
            "logits_sentiment": logits_sentiment,
            "logits_emotion": logits_emotion,
            "logits_moral": logits_moral
        }

# Custom Trainer
class MultiHeadTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels_sentiment = inputs.pop("label_sentiment")
        labels_emotion = inputs.pop("label_emotion")
        labels_moral = inputs.pop("label_moral")
        outputs = model(**inputs)
        loss_fn = nn.CrossEntropyLoss()
        loss_sentiment = loss_fn(outputs["logits_sentiment"], labels_sentiment)
        loss_emotion = loss_fn(outputs["logits_emotion"], labels_emotion)
        loss_moral = loss_fn(outputs["logits_moral"], labels_moral)
        loss = loss_sentiment + loss_emotion + loss_moral
        return (loss, outputs) if return_outputs else loss

# Model Init
model = MultiHeadModel(
    base_model_name="xlm-roberta-base",
    num_sentiment=len(label_encoders['sentiment'].classes_),
    num_emotion=len(label_encoders['emotion'].classes_),
    num_moral=len(label_encoders['moral_foundation'].classes_)
)

# Training Args
training_args = TrainingArguments(
    output_dir="./multihead_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    save_total_limit=2,
    load_best_model_at_end=True,
)

trainer = MultiHeadTrainer(
    model=model,
    args=training_args,
    train_dataset=ds["train"],
    eval_dataset=ds["validation"],
    tokenizer=tokenizer
)

# Train
trainer.train()


In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"  # disable external logging

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict
from transformers import (
    XLMRobertaTokenizer,
    XLMRobertaModel,
    Trainer,
    TrainingArguments
)
import torch
from torch import nn

# 1) Load the CSV
df = pd.read_csv('/content/final_cleaned_emotion_categories (3).csv')

# Drop any rows missing text or labels
df = df.dropna(subset=['Details', 'Sentiment', 'Emotion', 'Moral_Foundation']).reset_index(drop=True)

# 2) Encode each target label to integers
label_encoders = {}
for col in ['Sentiment', 'Emotion', 'Moral_Foundation']:
    le = LabelEncoder()
    df[col + '_id'] = le.fit_transform(df[col])
    label_encoders[col] = le

# 3) Train/Validation split (stratify on all three to preserve distributions)
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df[['Sentiment_id', 'Emotion_id', 'Moral_Foundation_id']]
)

# 4) Convert to HuggingFace DatasetDict
def to_hf_dataset(pandas_df):
    return Dataset.from_pandas(
        pandas_df[['Details', 'Sentiment_id', 'Emotion_id', 'Moral_Foundation_id']]
    )

hf_datasets = DatasetDict({
    'train': to_hf_dataset(train_df),
    'validation': to_hf_dataset(val_df)
})

# 5) Tokenization
tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')

def tokenize_batch(batch):
    return tokenizer(
        batch['Details'],
        padding='max_length',
        truncation=True,
        max_length=128
    )

hf_datasets = hf_datasets.map(tokenize_batch, batched=True)
hf_datasets = hf_datasets.rename_column('Sentiment_id', 'label_sentiment')
hf_datasets = hf_datasets.rename_column('Emotion_id', 'label_emotion')
hf_datasets = hf_datasets.rename_column('Moral_Foundation_id', 'label_moral')
hf_datasets.set_format(
    type='torch',
    columns=['input_ids', 'attention_mask', 'label_sentiment', 'label_emotion', 'label_moral']
)

# 6) Define a multi-head classification model
class MultiHeadXLMRoberta(nn.Module):
    def __init__(self, base_model, num_sent, num_emo, num_moral):
        super().__init__()
        self.backbone = XLMRobertaModel.from_pretrained(base_model)
        hidden_size = self.backbone.config.hidden_size
        self.dropout = nn.Dropout(0.2)
        self.fc_sentiment = nn.Linear(hidden_size, num_sent)
        self.fc_emotion   = nn.Linear(hidden_size, num_emo)
        self.fc_moral     = nn.Linear(hidden_size, num_moral)

    def forward(self, input_ids, attention_mask, **kwargs):
        outputs = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
        pooled = outputs.last_hidden_state[:, 0]            # [CLS] token
        pooled = self.dropout(pooled)
        return {
            'logits_sentiment': self.fc_sentiment(pooled),
            'logits_emotion':    self.fc_emotion(pooled),
            'logits_moral':      self.fc_moral(pooled)
        }

# 7) Custom Trainer to sum 3 losses
class MultiHeadTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels_sent = inputs.pop('label_sentiment')
        labels_emo  = inputs.pop('label_emotion')
        labels_moral = inputs.pop('label_moral')
        outputs = model(**inputs)
        loss_fn = nn.CrossEntropyLoss()
        loss = (
            loss_fn(outputs['logits_sentiment'], labels_sent) +
            loss_fn(outputs['logits_emotion'],    labels_emo)  +
            loss_fn(outputs['logits_moral'],      labels_moral)
        )
        return (loss, outputs) if return_outputs else loss

# Instantiate model
model = MultiHeadXLMRoberta(
    base_model='xlm-roberta-base',
    num_sent=len(label_encoders['Sentiment'].classes_),
    num_emo =len(label_encoders['Emotion'].classes_),
    num_moral=len(label_encoders['Moral_Foundation'].classes_)
)

# 8) Training arguments
training_args = TrainingArguments(
    output_dir='./model_output',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=50
)

trainer = MultiHeadTrainer(
    model=model,
    args=training_args,
    train_dataset=hf_datasets['train'],
    eval_dataset= hf_datasets['validation'],
    tokenizer=tokenizer
)

# 9) Train!
trainer.train()

# 10) Evaluate & Save
metrics = trainer.evaluate()
print("Evaluation metrics:", metrics)
trainer.save_model('./final_multihead_model')

# 11) To use on new sentences:
#    tokens = tokenizer(["Your new text"], padding="max_length", truncation=True, max_length=128, return_tensors="pt")
#    outputs = model(**tokens)
#    preds_sent = outputs['logits_sentiment'].argmax(dim=1)
#    decoded_sent = label_encoders['Sentiment'].inverse_transform(preds_sent.cpu().numpy())



ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.

In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"  # disable Weights & Biases

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict
from transformers import (
    XLMRobertaTokenizer,
    XLMRobertaModel,
    Trainer,
    TrainingArguments
)
import torch
from torch import nn

# 1) Load & clean data
df = pd.read_csv('/content/final_cleaned_emotion_categories (3).csv')
df = df.dropna(subset=['Details', 'Sentiment', 'Emotion', 'Moral_Foundation']).reset_index(drop=True)

# 2) Encode labels to integer IDs
label_encoders = {}
for col in ['Sentiment', 'Emotion', 'Moral_Foundation']:
    le = LabelEncoder()
    df[col + '_id'] = le.fit_transform(df[col])
    label_encoders[col] = le

# 3) Drop rare (Sentiment, Emotion, Moral) combinations (<2 examples)
combo_counts = (
    df
    .groupby(['Sentiment_id','Emotion_id','Moral_Foundation_id'])
    .size()
    .reset_index(name='count')
)
valid_combos = combo_counts[combo_counts['count'] >= 2][
    ['Sentiment_id','Emotion_id','Moral_Foundation_id']
]
df = df.merge(
    valid_combos.assign(keep=1),
    on=['Sentiment_id','Emotion_id','Moral_Foundation_id'],
    how='left'
)
df = df[df['keep']==1].drop(columns='keep').reset_index(drop=True)
print(f"Filtered dataset size: {len(df):,} examples")

# 4) Stratified train/validation split on all three IDs
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df[['Sentiment_id','Emotion_id','Moral_Foundation_id']]
)
print(f"Train: {len(train_df):,} | Validation: {len(val_df):,}")

# 5) Convert to HuggingFace Dataset
def make_hf_dataset(pandas_df):
    return Dataset.from_pandas(
        pandas_df[['Details','Sentiment_id','Emotion_id','Moral_Foundation_id']]
    )

hf_datasets = DatasetDict({
    'train':      make_hf_dataset(train_df),
    'validation': make_hf_dataset(val_df)
})

# 6) Tokenize
tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')

def tokenize_fn(batch):
    return tokenizer(
        batch['Details'],
        padding='max_length',
        truncation=True,
        max_length=128
    )

hf_datasets = hf_datasets.map(tokenize_fn, batched=True)
hf_datasets = hf_datasets.rename_column('Sentiment_id', 'label_sentiment')
hf_datasets = hf_datasets.rename_column('Emotion_id',   'label_emotion')
hf_datasets = hf_datasets.rename_column('Moral_Foundation_id', 'label_moral')
hf_datasets.set_format(
    type='torch',
    columns=['input_ids','attention_mask','label_sentiment','label_emotion','label_moral']
)

# 7) Multi-head model definition
class MultiHeadXLMRoberta(nn.Module):
    def __init__(self, base, n_sent, n_emo, n_moral):
        super().__init__()
        self.backbone = XLMRobertaModel.from_pretrained(base)
        hidden = self.backbone.config.hidden_size
        self.dropout = nn.Dropout(0.2)
        self.fc_sent = nn.Linear(hidden, n_sent)
        self.fc_emo  = nn.Linear(hidden, n_emo)
        self.fc_moral= nn.Linear(hidden, n_moral)

    def forward(self, input_ids, attention_mask, **kwargs):
        out = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
        pooled = out.last_hidden_state[:,0]  # [CLS]
        pooled = self.dropout(pooled)
        return {
            'logits_sentiment': self.fc_sent(pooled),
            'logits_emotion':    self.fc_emo(pooled),
            'logits_moral':      self.fc_moral(pooled),
        }

# 8) Custom Trainer to sum three cross‑entropy losses
class MultiHeadTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        sent = inputs.pop('label_sentiment')
        emo  = inputs.pop('label_emotion')
        moral= inputs.pop('label_moral')
        outputs = model(**inputs)
        loss_fn = nn.CrossEntropyLoss()
        loss  = loss_fn(outputs['logits_sentiment'], sent)
        loss += loss_fn(outputs['logits_emotion'],    emo)
        loss += loss_fn(outputs['logits_moral'],      moral)
        return (loss, outputs) if return_outputs else loss

# 9) Instantiate model
model = MultiHeadXLMRoberta(
    base='xlm-roberta-base',
    n_sent=len(label_encoders['Sentiment'].classes_),
    n_emo =len(label_encoders['Emotion'].classes_),
    n_moral=len(label_encoders['Moral_Foundation'].classes_)
)

# 10) Training arguments
training_args = TrainingArguments(
    output_dir='./multihead_output',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=50
)

trainer = MultiHeadTrainer(
    model=model,
    args=training_args,
    train_dataset=hf_datasets['train'],
    eval_dataset= hf_datasets['validation'],
    tokenizer=tokenizer
)

# 11) Train!
trainer.train()

# 12) Evaluate & Save
metrics = trainer.evaluate()
print("Eval metrics:", metrics)
trainer.save_model('./final_multihead_model')

# ----- Inference example -----
# tokens = tokenizer(
#     ["Your new text here"],
#     padding='max_length',
#     truncation=True,
#     max_length=128,
#     return_tensors='pt'
# )
# out = model(**tokens)
# pred_ids = {
#     'sentiment': out['logits_sentiment'].argmax(dim=1).cpu().numpy(),
#     'emotion':   out['logits_emotion'].argmax(dim=1).cpu().numpy(),
#     'moral':     out['logits_moral'].argmax(dim=1).cpu().numpy(),
# }
# decoded = {
#     key: label_encoders[label].inverse_transform(pred_ids[key])
#     for key, label in zip(pred_ids, ['Sentiment','Emotion','Moral_Foundation'])
# }
# print(decoded)


Filtered dataset size: 975 examples
Train: 780 | Validation: 195


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

Map:   0%|          | 0/780 [00:00<?, ? examples/s]

Map:   0%|          | 0/195 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

TypeError: TrainingArguments.__init__() got an unexpected keyword argument 'evaluation_strategy'

In [None]:
!pip uninstall -y transformers huggingface-hub peft accelerate
!pip cache purge

# Install specific compatible versions
# transformers 4.40.1 is known to work well with peft 0.10.0
!pip install --no-cache-dir --force-reinstall transformers==4.40.1 huggingface-hub==0.23.0 peft==0.10.0 datasets accelerate==0.30.1

# Kill the current process to ensure the new libraries are loaded
import os
os.kill(os.getpid(), 9)

Found existing installation: transformers 4.52.4
Uninstalling transformers-4.52.4:
  Successfully uninstalled transformers-4.52.4
Found existing installation: huggingface-hub 0.33.0
Uninstalling huggingface-hub-0.33.0:
  Successfully uninstalled huggingface-hub-0.33.0
Found existing installation: peft 0.15.2
Uninstalling peft-0.15.2:
  Successfully uninstalled peft-0.15.2
Found existing installation: accelerate 1.7.0
Uninstalling accelerate-1.7.0:
  Successfully uninstalled accelerate-1.7.0
[0mFiles removed: 0
Collecting transformers==4.40.1
  Downloading transformers-4.40.1-py3-none-any.whl.metadata (137 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.0/138.0 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub==0.23.0
  Downloading huggingface_hub-0.23.0-py3-none-any.whl.metadata (12 kB)
Collecting peft==0.10.0
  Downloading peft-0.10.0-py3-none-any.whl.metadata (13 kB)
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl

In [None]:
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU device:", torch.cuda.get_device_name(0))


CUDA available: True
GPU device: Tesla T4


In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"  # disable Weights & Biases

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict
from transformers import (
    XLMRobertaTokenizer,
    XLMRobertaModel,
    Trainer,
    TrainingArguments
)
import torch
from torch import nn

# 1) Load & clean data
df = pd.read_csv('/content/final_cleaned_emotion_categories (3).csv')
df = df.dropna(subset=['Details', 'Sentiment', 'Emotion', 'Moral_Foundation']).reset_index(drop=True)

# 2) Encode labels
label_encoders = {}
for col in ['Sentiment', 'Emotion', 'Moral_Foundation']:
    le = LabelEncoder()
    df[col + '_id'] = le.fit_transform(df[col])
    label_encoders[col] = le
with open('/content/label_encoders.pkl', 'wb') as f:
    pickle.dump(label_encoders, f)

print("✅ Saved label_encoders.pkl")

# 3) Drop rare triplets
combo_counts = (
    df
    .groupby(['Sentiment_id','Emotion_id','Moral_Foundation_id'])
    .size()
    .reset_index(name='count')
)
valid_combos = combo_counts[combo_counts['count'] >= 2][
    ['Sentiment_id','Emotion_id','Moral_Foundation_id']
]
df = df.merge(
    valid_combos.assign(keep=1),
    on=['Sentiment_id','Emotion_id','Moral_Foundation_id'],
    how='left'
)
df = df[df['keep']==1].drop(columns='keep').reset_index(drop=True)
print(f"Filtered dataset size: {len(df):,} examples")

# 4) Stratified split on all three
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df[['Sentiment_id','Emotion_id','Moral_Foundation_id']]
)
print(f"Train: {len(train_df):,} | Val: {len(val_df):,}")

# 5) HF DatasetDict
def make_hf(ds):
    return Dataset.from_pandas(
        ds[['Details','Sentiment_id','Emotion_id','Moral_Foundation_id']]
    )

hf_datasets = DatasetDict({
    'train':      make_hf(train_df),
    'validation': make_hf(val_df)
})

# 6) Tokenize
tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')
def tokenize_fn(batch):
    return tokenizer(batch['Details'], padding='max_length', truncation=True, max_length=128)

hf_datasets = hf_datasets.map(tokenize_fn, batched=True)
hf_datasets = hf_datasets.rename_column('Sentiment_id','label_sentiment')
hf_datasets = hf_datasets.rename_column('Emotion_id','label_emotion')
hf_datasets = hf_datasets.rename_column('Moral_Foundation_id','label_moral')
hf_datasets.set_format('torch', ['input_ids','attention_mask','label_sentiment','label_emotion','label_moral'])

# 7) Model
class MultiHeadXLMRoberta(nn.Module):
    def __init__(self, base, n_sent, n_emo, n_moral):
        super().__init__()
        self.backbone = XLMRobertaModel.from_pretrained(base)
        hidden = self.backbone.config.hidden_size
        self.dropout = nn.Dropout(0.2)
        self.fc_sent = nn.Linear(hidden, n_sent)
        self.fc_emo  = nn.Linear(hidden, n_emo)
        self.fc_moral= nn.Linear(hidden, n_moral)

    def forward(self, input_ids, attention_mask, label_sentiment=None, label_emotion=None, label_moral=None, **kwargs):
        out = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
        pooled = out.last_hidden_state[:,0]
        pooled = self.dropout(pooled)
        return {
            'logits_sentiment': self.fc_sent(pooled),
            'logits_emotion':    self.fc_emo(pooled),
            'logits_moral':      self.fc_moral(pooled),
            'label_sentiment': label_sentiment,
            'label_emotion': label_emotion,
            'label_moral': label_moral,
        }

# 8) Custom Trainer
class MultiHeadTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        # Pop labels before passing inputs to the model's forward method
        labels_sentiment = inputs.pop('label_sentiment')
        labels_emotion = inputs.pop('label_emotion')
        labels_moral = inputs.pop('label_moral')

        outputs = model(**inputs)

        loss_fn = nn.CrossEntropyLoss()
        loss  = loss_fn(outputs['logits_sentiment'], labels_sentiment)
        loss += loss_fn(outputs['logits_emotion'],    labels_emotion)
        loss += loss_fn(outputs['logits_moral'],      labels_moral)

        return (loss, outputs) if return_outputs else loss


# 9) Instantiate
model = MultiHeadXLMRoberta(
    base='xlm-roberta-base',
    n_sent=len(label_encoders['Sentiment'].classes_),
    n_emo =len(label_encoders['Emotion'].classes_),
    n_moral=len(label_encoders['Moral_Foundation'].classes_)
)

# 10) TrainingArguments
batch_size = 8
steps_per_epoch = len(hf_datasets['train']) // batch_size

training_args = TrainingArguments(
    output_dir='./multihead_output',
    num_train_epochs=3,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,

    evaluation_strategy="steps", # Explicitly set evaluation strategy
    save_strategy="steps",       # Explicitly set save strategy
    eval_steps=steps_per_epoch,       # one evaluation per epoch
    save_steps=steps_per_epoch,       # one save per epoch
    save_total_limit=2,
    load_best_model_at_end=True,

    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=steps_per_epoch // 2,  # log twice per epoch
    fp16=True,
)

trainer = MultiHeadTrainer(
    model=model,
    args=training_args,
    train_dataset=hf_datasets['train'],
    eval_dataset=hf_datasets['validation'],
    tokenizer=tokenizer
)

# 11) Train
trainer.train()

# 12) Evaluate & Save
metrics = trainer.evaluate()
print("Eval metrics:", metrics)
trainer.save_model('./final_multihead_model')
# after training…
#trainer.save_model('./final_multihead_model')      # saves model weights + config.json
tokenizer.save_pretrained('./final_multihead_model')  # saves tokenizer files


✅ Saved label_encoders.pkl
Filtered dataset size: 975 examples
Train: 780 | Val: 195




Map:   0%|          | 0/780 [00:00<?, ? examples/s]

Map:   0%|          | 0/195 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss
97,5.0449,5.14666
194,5.1282,4.898461
291,5.0349,4.894531


Eval metrics: {'eval_loss': 4.89453125, 'eval_runtime': 0.5628, 'eval_samples_per_second': 346.454, 'eval_steps_per_second': 44.417, 'epoch': 3.0}


('./final_multihead_model/tokenizer_config.json',
 './final_multihead_model/special_tokens_map.json',
 './final_multihead_model/sentencepiece.bpe.model',
 './final_multihead_model/added_tokens.json')

In [None]:
import shutil
from google.colab import files

# 1) Create a ZIP archive of your model folder
shutil.make_archive('model_archive', 'zip', './final_multihead_model')

# 2) Download the ZIP to your local machine
files.download('model_archive.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import drive
import shutil
import os

# 1) Mount your Drive
drive.mount('/content/drive')

# 2) Define paths
MODEL_DIR = './final_multihead_model'     # or wherever your model is
DRIVE_TARGET = '/content/drive/MyDrive/final_multihead_model'    # target folder
DRIVE_ZIP    = '/content/drive/MyDrive/final_multihead_model.zip'  # target zip

# 3) (Option A) Copy the folder directly
if os.path.isdir(MODEL_DIR):
    # Remove existing folder in Drive if desired:
    # shutil.rmtree(DRIVE_TARGET, ignore_errors=True)
    shutil.copytree(MODEL_DIR, DRIVE_TARGET)
    print(f"✅ Model folder copied to Drive at: {DRIVE_TARGET}")
else:
    print(f"❌ Model folder not found: {MODEL_DIR}")

# 4) (Option B) Or create a ZIP and copy that
shutil.make_archive('/content/final_multihead_model', 'zip', MODEL_DIR)
shutil.move('/content/final_multihead_model.zip', DRIVE_ZIP)
print(f"✅ Model ZIP saved to Drive at: {DRIVE_ZIP}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Model folder copied to Drive at: /content/drive/MyDrive/final_multihead_model
✅ Model ZIP saved to Drive at: /content/drive/MyDrive/final_multihead_model.zip


In [None]:
from transformers import XLMRobertaConfig, XLMRobertaTokenizer

MODEL_DIR = "/content/final_multihead_model"

# 1a) Download and save the config.json
config = XLMRobertaConfig.from_pretrained("xlm-roberta-base")
config.save_pretrained(MODEL_DIR)

# 1b) (Optional but recommended) save tokenizer files here too
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")
tokenizer.save_pretrained(MODEL_DIR)

# Verify
!ls -lh $MODEL_DIR




total 1.1G
-rw-r--r-- 1 root root  640 Jun 18 16:32 config.json
-rw-r--r-- 1 root root 1.1G Jun 18 16:22 model.safetensors
-rw-r--r-- 1 root root 4.9M Jun 18 16:32 sentencepiece.bpe.model
-rw-r--r-- 1 root root  280 Jun 18 16:32 special_tokens_map.json
-rw-r--r-- 1 root root 1.2K Jun 18 16:32 tokenizer_config.json
-rw-r--r-- 1 root root 5.3K Jun 18 16:22 training_args.bin


In [None]:
!pip install safetensors

import torch
from safetensors.torch import load_file





In [None]:
import torch
from safetensors.torch import load_file

# 1) Load safetensors into memory
state_dict = load_file("/content/final_multihead_model/model.safetensors", device="cpu")

# 2) Save as the standard PyTorch bin
torch.save(state_dict, "/content/final_multihead_model/pytorch_model.bin")
print("✅ Written pytorch_model.bin")

# 3) Now you can do:
model.load_state_dict(torch.load("/content/final_multihead_model/pytorch_model.bin"), strict=False)


✅ Written pytorch_model.bin


_IncompatibleKeys(missing_keys=['fc_s.weight', 'fc_s.bias', 'fc_e.weight', 'fc_e.bias', 'fc_m.weight', 'fc_m.bias'], unexpected_keys=['fc_emo.bias', 'fc_emo.weight', 'fc_moral.bias', 'fc_moral.weight', 'fc_sent.bias', 'fc_sent.weight'])

In [None]:
# 1) Imports
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from transformers import XLMRobertaTokenizer
import pickle

# 2) Define your exact Multi‐Head class (same as in training)
import torch.nn as nn
from transformers import XLMRobertaModel

class MultiHeadXLMRoberta(nn.Module):
    def __init__(self, base, n_sent, n_emo, n_moral):
        super().__init__()
        self.backbone = XLMRobertaModel.from_pretrained(base)
        hid = self.backbone.config.hidden_size
        self.drop = nn.Dropout(0.2)
        self.fc_s = nn.Linear(hid, n_sent)
        self.fc_e = nn.Linear(hid, n_emo)
        self.fc_m = nn.Linear(hid, n_moral)
    def forward(self, input_ids, attention_mask):
        out = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
        cls = out.last_hidden_state[:, 0]
        cls = self.drop(cls)
        return {
            'logits_sentiment': self.fc_s(cls),
            'logits_emotion':    self.fc_e(cls),
            'logits_moral':      self.fc_m(cls),
        }

# 3) Load your data
df = pd.read_csv('/content/all_ebglish_newspaper.csv')
#    → replace with the path to the CSV you want to label
texts = df['Details'].fillna("").tolist()

# 4) Load tokenizer & model
MODEL_DIR = '/content/final_multihead_model'  # or wherever you unzipped
tokenizer = XLMRobertaTokenizer.from_pretrained(MODEL_DIR)

# 5) Load LabelEncoders (that you created & pickle-dumped in training)
with open('/content/label_encoders.pkl','rb') as f:
    label_encoders = pickle.load(f)
# label_encoders is a dict: {'Sentiment': LabelEncoder(), ...}

# 6) Rebuild model architecture and load weights
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = MultiHeadXLMRoberta(
    base=MODEL_DIR,
    n_sent=len(label_encoders['Sentiment'].classes_),
    n_emo =len(label_encoders['Emotion'].classes_),
    n_moral=len(label_encoders['Moral_Foundation'].classes_)
)
model.load_state_dict(
    torch.load(f"{MODEL_DIR}/pytorch_model.bin", map_location=device),
    strict=False
)
model.to(device)
model.eval()

# 7) Tokenize & batch
enc = tokenizer(
    texts,
    padding='max_length',
    truncation=True,
    max_length=128,
    return_tensors='pt'
)
dataset = TensorDataset(enc['input_ids'], enc['attention_mask'])
loader  = DataLoader(dataset, batch_size=16)

# 8) Inference loop
all_s, all_e, all_m = [], [], []
with torch.no_grad():
    for input_ids, attention_mask in loader:
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        out = model(input_ids=input_ids, attention_mask=attention_mask)
        preds_s = out['logits_sentiment'].argmax(dim=-1).cpu().numpy()
        preds_e = out['logits_emotion'].argmax(dim=-1).cpu().numpy()
        preds_m = out['logits_moral'].argmax(dim=-1).cpu().numpy()
        all_s.extend(preds_s)
        all_e.extend(preds_e)
        all_m.extend(preds_m)

# 9) Decode back to strings
df['pred_sentiment']      = label_encoders['Sentiment'].inverse_transform(all_s)
df['pred_emotion']        = label_encoders['Emotion'].inverse_transform(all_e)
df['pred_moral_foundation'] = label_encoders['Moral_Foundation'].inverse_transform(all_m)

# 10) Save the newly labeled file
df.to_csv('/content/labeled_with_model.csv', index=False)
print("✅ Done! Labeled CSV saved to /content/labeled_with_model.csv")


Some weights of XLMRobertaModel were not initialized from the model checkpoint at /content/final_multihead_model and are newly initialized: ['embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.weight', 'encoder.layer.0.output.dense.bias

✅ Done! Labeled CSV saved to /content/labeled_with_model.csv


In [None]:
af=pd.read_csv("/content/labeled_with_model.csv")
af.groupby(["pred_emotion"]).size()

Unnamed: 0_level_0,0
pred_emotion,Unnamed: 1_level_1
Respect,17697


In [None]:
af.groupby(["pred_sentiment"]).size()

NameError: name 'af' is not defined

In [None]:
af.groupby(["pred_moral_foundation"]).size()

Unnamed: 0_level_0,0
pred_moral_foundation,Unnamed: 1_level_1
Care/Harm,421
Liberty/Oppression,9545
Loyalty/Betrayal,7731
