In [None]:
# =============================
# 02. DistilBERT Experiments (Fixed)
# =============================

import os
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import joblib
import json

# ===================================
# Paths
# ===================================
DATA_PATH = "../data/processed/jigsaw_multilevel_features.csv"
MODEL_PATH = "../models/saved/bert"

# ===================================
# Load Data
# ===================================
df = pd.read_csv(DATA_PATH)
X = df["text"].tolist()
y = df["label"].tolist()
# ===================================
# Load Fine-Tuned DistilBERT Model
# ===================================
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForSequenceClassification.from_pretrained(os.path.join(MODEL_PATH, "checkpoint-17500"))
model.eval()

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)

# ===================================
# Load Labels from Model
# ===================================
id2label = joblib.load(os.path.join(MODEL_PATH, "id2label.pkl"))
label2id = joblib.load(os.path.join(MODEL_PATH, "label2id.pkl"))

class_names = [id2label[i] for i in range(len(id2label))]
num_labels = len(class_names)

# ===================================
# Tokenize and Predict in Batches
# ===================================
batch_size = 16
preds = []
true_labels = []

for i in range(0, len(X), batch_size):
    batch_texts = X[i:i + batch_size]
    batch_labels = y[i:i + batch_size]
    
    encodings = tokenizer(batch_texts, truncation=True, padding=True, max_length=256, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**encodings)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=1).cpu().numpy()
        preds.extend(predictions)
        true_labels.extend(batch_labels)

# ===================================
# Decode Predictions
# ===================================
y_pred = [id2label[p] for p in preds]

# ===================================
# Classification Report
# ===================================
print("=== DistilBERT Evaluation ===")
print(classification_report(true_labels, y_pred, target_names=class_names, digits=4))

# ===================================
# Confusion Matrix
# ===================================
cm = confusion_matrix(true_labels, y_pred, labels=class_names)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title("DistilBERT Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.show()

# ===================================
# Per-Class F1 Visualization
# ===================================
report = classification_report(true_labels, y_pred, target_names=class_names, output_dict=True)
f1_scores = [report[label]['f1-score'] for label in class_names]

df_f1 = pd.DataFrame({
    "class": class_names,
    "f1_score": f1_scores
})

plt.figure(figsize=(6, 4))
sns.barplot(data=df_f1, x="class", y="f1_score", color="green")
plt.title("DistilBERT - Per-Class F1 Score")
plt.ylabel("F1 Score")
plt.ylim(0, 1)
plt.tight_layout()
plt.show()

# ===================================
# Save Metrics
# ===================================
results_dir = "../results/metrics"
os.makedirs(results_dir, exist_ok=True)

results = {
    "model": "DistilBERT",
    "accuracy": np.mean(np.array(true_labels) == np.array(y_pred)),
    "macro_f1": np.mean(f1_scores)
}

with open(os.path.join(results_dir, "phase2_distilBert_metrics.json"), "w") as f:
    json.dump(results, f, indent=4)

print(f"\nMetrics saved to {os.path.join(results_dir, 'phase2_distilBert_metrics.json')}")



UnpicklingError: Weights only load failed. In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
Please file an issue with the following so that we can make `weights_only=True` compatible with your use case: WeightsUnpickler error: 

Unsupported operand 149

Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.