In [60]:
import pandas as pd
from transformers import AutoTokenizer,DataCollatorWithPadding,Trainer,EarlyStoppingCallback
from transformers import AutoModelForSequenceClassification, TrainingArguments,SchedulerType
from transformers import trainer_utils
from transformers.trainer_utils import IntervalStrategy as SaveStrategy
from datasets import load_dataset
from datasets import Dataset, DatasetDict
import torch
from torch.serialization import add_safe_globals
import evaluate
import numpy as np
from sklearn.metrics import accuracy_score, log_loss, classification_report
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import DataLoader
from torch.nn import CrossEntropyLoss
from sklearn.model_selection import train_test_split
from typing import Dict, List, Tuple, Any
import matplotlib.pyplot as plt
import seaborn as sns
import re
import random
import math
import ipywidgets
import os



In [61]:
from datasets.utils.logging import enable_progress_bar
enable_progress_bar()

In [62]:
import sys, torch
print("Python:", sys.version)
print("PyTorch:", torch.__version__)
import transformers; print("Transformers:", transformers.__version__, transformers.__file__)
import subprocess; subprocess.run([sys.executable, "-m", "pip", "show", "transformers"])

Python: 3.10.12 (main, May 27 2025, 17:12:29) [GCC 11.4.0]
PyTorch: 2.7.0
Transformers: 4.55.0 /home/ubuntu/.local/lib/python3.10/site-packages/transformers/__init__.py
Name: transformers
Version: 4.55.0
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache 2.0 License
Location: /home/ubuntu/.local/lib/python3.10/site-packages
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm
Required-by: 


CompletedProcess(args=['/usr/bin/python3', '-m', 'pip', 'show', 'transformers'], returncode=0)

In [63]:
print("PyTorch version:", torch.__version__)
print("Transformers version:", transformers.__version__)

PyTorch version: 2.7.0
Transformers version: 4.55.0


In [64]:
import importlib
import finetuning_eval_func
importlib.reload(finetuning_eval_func)
from finetuning_eval_func import process_csv_social_bias, conf_matrix


/usr/bin/python3


In [65]:
train_df, val_df, test_df, label2id, id2label = process_csv_social_bias('./')

Train size: 29790
Validation size: 3724
Test size: 3724


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'] = df['bias_type'].map(label2id)


In [66]:
print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")
os.environ["TRANSFORMERS_NO_TF"] = "1"

CUDA available: True
GPU name: NVIDIA RTX A6000


In [67]:
print(torch.version.cuda)         # CUDA version PyTorch 
print(torch.backends.cudnn.version())  # cuDNN version
print(torch.cuda.is_available())

12.8
90800
True


In [68]:
def tokenize_function(examples):
    # use truncation only-collator does padding dynamically
    return tokenizer(examples["post"], truncation=True)

### Loading the training arguments from the best cross entropy loss Hatebert model from sweep1_htebert.ipynb

In [69]:
def compute_class_weights(train_df, label2id):
    # If train_df['label'] are strings, map them to ids 
    y = train_df['label']
    if not np.issubdtype(y.dtype, np.number):
        y = y.map(label2id).values
    else:
        y = y.values

    classes = np.array(sorted(label2id.values()))
    weights = compute_class_weight(class_weight='balanced', classes=classes, y=y)
    return torch.tensor(weights, dtype=torch.float)

class WeightedTrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.class_weights = class_weights  # torch tensor, move to device in compute loss

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits  # [batch, num_labels] for sequence classification

        # Ensure weights are on the same device as logits
        if self.class_weights is not None:
            class_weights = self.class_weights.to(logits.device)
            loss_fct = CrossEntropyLoss(weight=class_weights)
        else:
            loss_fct = CrossEntropyLoss()

        # Flattening to be sure, logits in (batch_size,num-labels) and labels (batch_size, )
        loss = loss_fct(logits.view(-1, logits.size(-1)), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

In [70]:
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1_macro": f1.compute(predictions=preds, references=labels, average="macro")["f1"],
    }

In [71]:
print("torch.version.cuda:", torch.version.cuda)       # None on CPU build
print("torch.cuda.is_available():", torch.cuda.is_available())  # False
print("CUDA_VISIBLE_DEVICES:", os.environ.get("CUDA_VISIBLE_DEVICES"))

torch.version.cuda: 12.8
torch.cuda.is_available(): True
CUDA_VISIBLE_DEVICES: None


In [72]:
# Device: CUDA if available, else CPU (explicitly avoid Metal)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

def set_seed(seed=42):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)


Using device: cuda


In [73]:
#Loading from checkpoing and inheriting parameters
CKPT_DIR = "checkpoint-2236"  # .../checkpoint-2500
DATA_DIR = "./"              # dir that contains social_bias.csv

torch.set_float32_matmul_precision("high")   # A6000-friendly
torch.backends.cuda.matmul.allow_tf32 = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

try:
    use_bf16 = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
except AttributeError:
    use_bf16 = False
use_fp16 = torch.cuda.is_available() and not use_bf16


# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(CKPT_DIR, use_fast=True)
model = AutoModelForSequenceClassification.from_pretrained(CKPT_DIR).to(device)
data_collator = DataCollatorWithPadding(
    tokenizer=tokenizer,
    pad_to_multiple_of=8  # nvidia Tensor Cores optimized for matrixes multiple of 8 -set to None if CPU-only
)

# --- STRICT hyperparam recovery (no defaults) ----
def load_training_args_best_effort(ckpt_dir):
    args_bin  = os.path.join(ckpt_dir, "training_args.bin")
    args_json = os.path.join(ckpt_dir, "training_args.json")
    hp = None

    if os.path.exists(args_bin):
        try:
            hp = torch.load(args_bin, map_location="cpu", weights_only=False)  # No safe_globals
            print("Loaded training_args from BIN.")
        except Exception as e:
            print("BIN load failed:", repr(e))

    if hp is None and os.path.exists(args_json):
        try:
            with open(args_json, "r") as f:
                hp = json.load(f)
            print("Loaded training_args from JSON.")
        except Exception as e:
            print("JSON load failed:", repr(e))

    # Normalize to dict; fill only what exists; tolerate missing keys
    def getv(obj, k):
        return obj.get(k, None) if isinstance(obj, dict) else getattr(obj, k, None)

    d = {}
    if hp is not None:
        d["learning_rate"]  = getv(hp, "learning_rate")
        d["weight_decay"]   = getv(hp, "weight_decay")
        # Some runs store warmup as steps, not ratio
        wr = getv(hp, "warmup_ratio")
        ws = getv(hp, "warmup_steps")
        d["warmup_ratio"]   = wr if wr is not None else (0.0 if ws else None)  # we’ll set our own anyway
        lst = getv(hp, "lr_scheduler_type")
        d["lr_scheduler_type"] = str(lst) if lst is not None else None
        # Nice-to-haves:
        for k in ["adam_beta1","adam_beta2","adam_epsilon","max_grad_norm",
                  "per_device_train_batch_size","per_device_eval_batch_size",
                  "gradient_accumulation_steps","eval_steps","logging_steps","save_steps"]:
            v = getv(hp, k)
            if v is not None:
                d[k] = v
    return {k:v for k,v in d.items() if v is not None}

saved_hp = load_training_args_best_effort(CKPT_DIR)
print("Recovered HPs (non-strict):", saved_hp)




Loaded training_args from BIN.
Recovered HPs (non-strict): {'learning_rate': 1.7011977627094936e-05, 'weight_decay': 0.01, 'warmup_ratio': 0.0, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'gradient_accumulation_steps': 1, 'logging_steps': 500, 'save_steps': 500}


In [74]:
# pick the text column your tokenize_function expects
TEXT_COL = "text" if "text" in train_df.columns else "post"

# ensure label is clean ints (if not done)
for df_ in (train_df, val_df, test_df):
    assert "label" in df_.columns and df_["label"].notna().all(), "Labels must be present and non-NaN"
    df_["label"] = df_["label"].astype(int)

# build datasets (avoid index column)
train_dataset = Dataset.from_pandas(train_df[[TEXT_COL, "label"]], preserve_index=False)
val_dataset   = Dataset.from_pandas(val_df[[TEXT_COL, "label"]],   preserve_index=False)
test_dataset  = Dataset.from_pandas(test_df[[TEXT_COL, "label"]],  preserve_index=False)

# tokenize (tokenize_function should use batch[TEXT_COL])
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset   = val_dataset.map(tokenize_function, batched=True)
test_dataset  = test_dataset.map(tokenize_function, batched=True)

# keep only what Trainer/model need (reassign!)
cols_to_keep = {"input_ids", "attention_mask", "label"}
train_dataset = train_dataset.remove_columns([c for c in train_dataset.column_names if c not in cols_to_keep])
val_dataset   = val_dataset.remove_columns([c for c in val_dataset.column_names   if c not in cols_to_keep])
test_dataset  = test_dataset.remove_columns([c for c in test_dataset.column_names if c not in cols_to_keep])

# correct column name 
for d in (train_dataset, val_dataset, test_dataset):
    if "label" in d.column_names and "labels" not in d.column_names:
        d = d.rename_column("label", "labels")


Map:   0%|          | 0/29790 [00:00<?, ? examples/s]

Map:   0%|          | 0/3724 [00:00<?, ? examples/s]

Map:   0%|          | 0/3724 [00:00<?, ? examples/s]

In [None]:
# precision choice (A6000 supports bf16)
# Use the recovered hyperparams
OUTPUT_DIR = "./hatebert_fresh_linear_try2"  # changed folder from prev run

PER_DEV_TRAIN_BS = saved_hp.get("per_device_train_batch_size", 8)
PER_DEV_EVAL_BS  = saved_hp.get("per_device_eval_batch_size", 8)
GRAD_ACCUM       = saved_hp.get("gradient_accumulation_steps", 1)

NEW_LR        = saved_hp.get("learning_rate", 1.7011977627094936e-05)
WARMUP_RATIO  = saved_hp.get("warmup_ratio", 0.0)
SCHEDULER_TYPE = SchedulerType.LINEAR
LOG_STEPS     = saved_hp.get("logging_steps", 500)
SAVE_STEPS    = saved_hp.get("save_steps", 500)

steps_per_epoch = math.ceil(len(train_dataset) / (PER_DEV_TRAIN_BS * max(1, GRAD_ACCUM)))
eval_save_steps = SAVE_STEPS  # keep exactly as original (or use custom steps_per_epoch logic)

args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    overwrite_output_dir=True,

    per_device_train_batch_size=PER_DEV_TRAIN_BS,
    per_device_eval_batch_size=PER_DEV_EVAL_BS,
    gradient_accumulation_steps=GRAD_ACCUM,

    num_train_epochs=3,                 # my choice
    learning_rate=NEW_LR,
    weight_decay=saved_hp.get("weight_decay", 0.01),
    warmup_ratio=WARMUP_RATIO,
    lr_scheduler_type=SCHEDULER_TYPE,
    adam_beta1=saved_hp.get("adam_beta1", 0.9),
    adam_beta2=saved_hp.get("adam_beta2", 0.999),
    adam_epsilon=saved_hp.get("adam_epsilon", 1e-8),
    max_grad_norm=saved_hp.get("max_grad_norm", 1.0),

    eval_strategy="steps",
    logging_strategy="steps",
    logging_steps=LOG_STEPS,
    save_steps=SAVE_STEPS,
    save_total_limit=12,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    greater_is_better=True,

    bf16=use_bf16,
    fp16=not use_bf16 and torch.cuda.is_available(),
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=6)],
)




  trainer = Trainer(


In [78]:
# Train (set resume=True for optimizer/scheduler state restored from CKPT_DIR)


resume = False
trainer.train(resume_from_checkpoint=CKPT_DIR if resume else None)

# Validation
val_metrics = trainer.evaluate()
print("Validation:", val_metrics)

# Test (metrics only)
test_metrics = trainer.evaluate(eval_dataset=test_dataset)
print("Test:", test_metrics)


# See which checkpoint was best we are looking at F1 Macro (since load_best_model_at_end=True)
print("Best checkpoint:", trainer.state.best_model_checkpoint)

# Save best model + tokenizer
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

Step,Training Loss,Validation Loss,Accuracy,F1 Macro
500,0.4916,0.976831,0.728786,0.469537
1000,0.6705,0.832447,0.723416,0.497234
1500,0.6429,0.810982,0.723147,0.498839
2000,0.5914,0.797871,0.741676,0.498611
2500,0.5906,0.820657,0.748389,0.502603
3000,0.6143,0.780047,0.73362,0.520379
3500,0.6008,0.751781,0.727444,0.513104
4000,0.5203,0.876658,0.736842,0.519432
4500,0.4349,0.908754,0.73174,0.521054
5000,0.4452,0.92772,0.727981,0.507205


Validation: {'eval_loss': 0.9087539911270142, 'eval_accuracy': 0.7317400644468314, 'eval_f1_macro': 0.5210544696997942, 'eval_runtime': 5.7546, 'eval_samples_per_second': 647.138, 'eval_steps_per_second': 80.979, 'epoch': 2.0139634801288935}
Test: {'eval_loss': 0.8874104619026184, 'eval_accuracy': 0.7363050483351236, 'eval_f1_macro': 0.5171527831822613, 'eval_runtime': 6.4741, 'eval_samples_per_second': 575.216, 'eval_steps_per_second': 71.979, 'epoch': 2.0139634801288935}
Best checkpoint: ./hatebert_fresh_linear_try2/checkpoint-4500


('./hatebert_fresh_linear_try2/tokenizer_config.json',
 './hatebert_fresh_linear_try2/special_tokens_map.json',
 './hatebert_fresh_linear_try2/vocab.txt',
 './hatebert_fresh_linear_try2/added_tokens.json',
 './hatebert_fresh_linear_try2/tokenizer.json')

In [79]:
## evaluating THE best model at step 2500 in test data set for accuracy

In [80]:
from dataclasses import replace as dc_replace

eval_args = dc_replace(
    args,
    output_dir="./eval_best_tmp",
    load_best_model_at_end=False,   # <- turn off
    save_strategy="no",             # <- match eval
    eval_strategy="no",
    logging_strategy="no",
    report_to="none",
)

In [None]:

BEST_CKPT = "./hatebert_fresh_linear_try2/checkpoint-2500"  # <- your best checkpoint
model_best = AutoModelForSequenceClassification.from_pretrained(BEST_CKPT).to(device)


trainer_best = Trainer(
    model=model_best,
    args=eval_args,
    eval_dataset=test_dataset,          # not required here, but fine
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# 1) Standard HF metrics on TEST
test_metrics = trainer_best.evaluate(eval_dataset=test_dataset)
print("TEST (HF):", test_metrics)

# 2) Full classification report + confusion matrix
pred = trainer_best.predict(test_dataset)
y_true = pred.label_ids
y_pred = pred.predictions.argmax(axis=-1)

# Label names (robust to dict with int or str keys, or a list)
id2label = model_best.config.id2label
if isinstance(id2label, dict):
    id2label_norm = { (int(k) if not isinstance(k, int) else k): v
                      for k, v in id2label.items() }
    target_names = [id2label_norm[i] for i in range(model_best.config.num_labels)]
else:
    target_names = list(id2label)

# Optional pretty rename
target_names = ["Neutral" if isinstance(n, str) and n.lower() == "none" else n
                for n in target_names]

# Optional: display 'None' as 'Neutral' for readability
target_names = ["Neutral" if n.lower() == "none" else n for n in target_names]



  trainer_best = Trainer(


TEST (HF): {'eval_loss': 0.7733328938484192, 'eval_model_preparation_time': 0.0037, 'eval_accuracy': 0.7548335123523093, 'eval_f1_macro': 0.5231976620479473, 'eval_runtime': 5.6129, 'eval_samples_per_second': 663.474, 'eval_steps_per_second': 83.023}


In [90]:
print("\nClassification report (TEST):")
print(classification_report(y_true, y_pred, target_names=target_names, digits=6))





Classification report (TEST):
              precision    recall  f1-score   support

     Neutral   0.830012  0.904215  0.865526      2349
        body   0.318182  0.159091  0.212121        44
     culture   0.613014  0.724696  0.664193       247
    disabled   0.586957  0.720000  0.646707        75
      gender   0.572581  0.401130  0.471761       354
        race   0.684211  0.603093  0.641096       388
      social   0.387097  0.375000  0.380952        64
      victim   0.439252  0.231527  0.303226       203

    accuracy                       0.754834      3724
   macro avg   0.553913  0.514844  0.523198      3724
weighted avg   0.736102  0.754834  0.740251      3724



In [96]:
# Evaluate best model on test set
test_metrics = trainer.evaluate(eval_dataset=test_dataset)

# Print all metrics
print(test_metrics)

# Access just the accuracy
print("Overall test accuracy:", test_metrics["eval_accuracy"])

{'eval_loss': 0.8874104619026184, 'eval_accuracy': 0.7363050483351236, 'eval_f1_macro': 0.5171527831822613, 'eval_runtime': 6.5817, 'eval_samples_per_second': 565.81, 'eval_steps_per_second': 70.802, 'epoch': 2.0139634801288935}
Overall test accuracy: 0.7363050483351236


In [97]:
# accuracy
print("Overall test accuracy:", test_metrics["eval_accuracy"])

Overall test accuracy: 0.7363050483351236


In [91]:
SAVE_DIR = "./eval_exports"
os.makedirs(SAVE_DIR, exist_ok=True)

In [92]:
report_dict = classification_report(
    y_true, y_pred, target_names=target_names, output_dict=True, zero_division=0
)
df_report = pd.DataFrame(report_dict).transpose()
# (optional) reorder columns
cols = ["precision", "recall", "f1-score", "support"]
df_report = df_report[cols]
df_report.to_csv(f"{SAVE_DIR}/test_classification_report.csv", index=True)

In [93]:
id2label_norm = ({int(k): v for k, v in model_best.config.id2label.items()}
                 if isinstance(model_best.config.id2label, dict)
                 else {i: lbl for i, lbl in enumerate(model_best.config.id2label)})

df_eval = pd.DataFrame({
    "actual_bias_type":   [id2label_norm[i] for i in y_true],
    "predicted_bias_type":[id2label_norm[i] for i in y_pred],
})

cm_df = conf_matrix(df_eval, id2label_norm, label2id)   # returns a DataFrame
print(cm_df)

          None  body  culture  disabled  gender  race  social  victim
None      2124     6       22        27      75    47      21      27
body        22     7        0         2       6     1       0       6
culture     23     0      179         1       0    30       5       9
disabled    16     1        1        54       1     1       0       1
gender     168     5        6         1     142    14       4      14
race        93     0       37         4      12   234       7       1
social      22     0        6         1       4     5      24       2
victim      91     3       41         2       8    10       1      47


In [94]:
cm_df.to_csv(f"{SAVE_DIR}/test_confusion_matrix_counts.csv")

In [95]:

# If you already ran: pred = trainer_best.predict(test_dataset)
y_true = pred.label_ids
logits  = pred.predictions
y_pred  = logits.argmax(axis=1)

# Label names from the model (works for dict or list)
id2label_cfg = model_best.config.id2label
id2label = ({int(k): v for k, v in id2label_cfg.items()}
            if isinstance(id2label_cfg, dict)
            else {i: v for i, v in enumerate(id2label_cfg)})
names = [id2label[i] for i in range(len(id2label))]
names = ["Neutral" if str(n).lower() == "none" else n for n in names]

# Softmax probabilities
probs = torch.from_numpy(logits).softmax(dim=1).numpy()

# Top-1 and second-best probs
top2_sorted = np.sort(probs, axis=1)[:, -2:]
pred_prob   = top2_sorted[:, -1]
second_prob = top2_sorted[:, -2]

# Build raw dataframe
df_raw = pd.DataFrame({
    "true_label_id": y_true,
    "true_label":    [names[i] for i in y_true],
    "pred_label_id": y_pred,
    "pred_label":    [names[i] for i in y_pred],
    "pred_prob":     pred_prob,
    "second_prob":   second_prob,
    "confidence_margin": pred_prob - second_prob,
    "is_error":      (y_true != y_pred),
})

# (Optional) attach original text if present in your dataset
for col in ["text", "content", "post"]:
    if col in test_dataset.column_names:
        df_raw[col] = test_dataset[col]
        break
if "__index_level_0__" in test_dataset.column_names:
    df_raw["orig_index"] = test_dataset["__index_level_0__"]

probs_df = pd.DataFrame(probs, columns=names)
df_raw_full = pd.concat([df_raw, probs_df], axis=1)


df_raw_full.to_csv(f"{SAVE_DIR}/test_raw_predictions_with_probs.csv", index=False)


