## Setup — imports, config, paths (CPU-only, cache-based)

In [None]:
# ============================================================
# 0) Setup — imports, config, paths (CPU-only, cache-based)
# ============================================================

import os
import json
import warnings
from pathlib import Path

warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from sklearn.metrics import f1_score

# ---- Hard-force CPU-ish env (not strictly needed, but safe) ----
os.environ["CUDA_VISIBLE_DEVICES"] = ""
os.environ["ACCELERATE_USE_CPU"] = "1"
os.environ["HF_ACCELERATE_USE_CPU"] = "1"
os.environ["USE_TORCH_MPS"] = "0"
os.environ["WANDB_DISABLED"] = "true"

# ======= Config you can edit =======
LANG = "eng"          # e.g. "eng", "ben", "hin", ...

# label orders (must match training notebooks)
T2_LABELS = ["gender/sexual", "political", "religious", "racial/ethnic", "other"]
T3_LABELS = [
    "vilification", "extreme_language", "stereotype",
    "invalidation", "lack_of_empathy", "dehumanization"
]

lang_fname = LANG

# Roots for each method
XLMR_ART_ROOT   = Path("artifacts") / "xlmr" / LANG
DEBERTA_ART_ROOT= Path("artifacts") / "deberta" / LANG

XLMR_CACHE_ROOT   = Path("cache") / "xlmr" / LANG
DEBERTA_CACHE_ROOT= Path("cache") / "deberta" / LANG

# Ensemble submission root
SUB_ROOT_ENS = Path("submissions") / "ensemble"
for sub in ["subtask_1", "subtask_2", "subtask_3"]:
    (SUB_ROOT_ENS / sub).mkdir(parents=True, exist_ok=True)

print("Using LANG =", LANG)
print("XLMR cache:", XLMR_CACHE_ROOT)
print("DeBERTa cache:", DEBERTA_CACHE_ROOT)
print("Ensemble submissions root:", SUB_ROOT_ENS)

Using LANG = eng
XLMR cache: cache/xlmr/eng
DeBERTa cache: cache/deberta/eng
Ensemble submissions root: submissions/ensemble


## Helpers

In [2]:
# ============================================================
# 1) Helpers
# ============================================================

def macro_f1(y_true, y_pred):
    return f1_score(y_true, y_pred, average="macro", zero_division=0)

def load_json(path: Path):
    with open(path, "r") as f:
        return json.load(f)

## Subtask 1 — Polarization (binary), ensemble on TRAIN+DEV

In [3]:
# ============================================================
# 2) Subtask 1 — Polarization (binary), ensemble on TRAIN+DEV
# ============================================================

print("\n==================== Subtask 1 (polarization) ====================")

# 2.1 Load cached calibrated probabilities for TRAIN
t1_train_x = pd.read_csv(XLMR_CACHE_ROOT / "t1_train_probs.csv")   # id, prob_pos, label
t1_train_d = pd.read_csv(DEBERTA_CACHE_ROOT / "t1_train_probs.csv")

t1_train = t1_train_x.merge(
    t1_train_d, on="id", suffixes=("_xlmr", "_deberta")
)
print("T1 train merged shape:", t1_train.shape)

y_train_t1 = t1_train["label_xlmr"].astype(int).values  # should match label_deberta

p_x_t1 = t1_train["prob_pos_xlmr"].values
p_d_t1 = t1_train["prob_pos_deberta"].values

# 2.2 Load per-model calibration thresholds
cal_x_t1 = load_json(XLMR_ART_ROOT / "calib_t1_native.json")
cal_d_t1 = load_json(DEBERTA_ART_ROOT / "calib_t1_native.json")

thr_x_t1 = float(cal_x_t1["threshold"])
thr_d_t1 = float(cal_d_t1["threshold"])
thr_ens_t1 = (thr_x_t1 + thr_d_t1) / 2.0

print(f"XLMR T1 threshold={thr_x_t1:.3f}")
print(f"DeBERTa T1 threshold={thr_d_t1:.3f}")
print(f"Ensemble T1 threshold (avg)={thr_ens_t1:.3f}")

# 2.3 TRAIN F1: XLMR, DeBERTa, Ensemble
pred_x_train_t1 = (p_x_t1 >= thr_x_t1).astype(int)
pred_d_train_t1 = (p_d_t1 >= thr_d_t1).astype(int)
p_ens_train_t1 = 0.5 * (p_x_t1 + p_d_t1)
pred_ens_train_t1 = (p_ens_train_t1 >= thr_ens_t1).astype(int)

print("T1 Macro-F1 (TRAIN) XLMR:   ", macro_f1(y_train_t1, pred_x_train_t1))
print("T1 Macro-F1 (TRAIN) DeBERTa:", macro_f1(y_train_t1, pred_d_train_t1))
print("T1 Macro-F1 (TRAIN) ENS:    ", macro_f1(y_train_t1, pred_ens_train_t1))

# 2.4 DEV: load cached probs, ensemble, write submission
t1_dev_x = pd.read_csv(XLMR_CACHE_ROOT / "t1_dev_probs.csv")   # id, prob_pos
t1_dev_d = pd.read_csv(DEBERTA_CACHE_ROOT / "t1_dev_probs.csv")

t1_dev = t1_dev_x.merge(
    t1_dev_d, on="id", suffixes=("_xlmr", "_deberta")
)
print("T1 dev merged shape:", t1_dev.shape)

p_x_dev_t1 = t1_dev["prob_pos_xlmr"].values
p_d_dev_t1 = t1_dev["prob_pos_deberta"].values
p_ens_dev_t1 = 0.5 * (p_x_dev_t1 + p_d_dev_t1)

pred_ens_dev_t1 = (p_ens_dev_t1 >= thr_ens_t1).astype(int)

sub1 = pd.DataFrame({
    "id": t1_dev["id"].astype(str),
    "polarization": pred_ens_dev_t1.astype(int),
})
sub1_path = SUB_ROOT_ENS / "subtask_1" / f"pred_{lang_fname}.csv"
sub1.to_csv(sub1_path, index=False)
print("Subtask 1 ensemble submission written to:", sub1_path)


T1 train merged shape: (3222, 5)
XLMR T1 threshold=0.450
DeBERTa T1 threshold=0.350
Ensemble T1 threshold (avg)=0.400
T1 Macro-F1 (TRAIN) XLMR:    0.8370522654243957
T1 Macro-F1 (TRAIN) DeBERTa: 0.9552514501844848
T1 Macro-F1 (TRAIN) ENS:     0.935225374944574
T1 dev merged shape: (160, 3)
Subtask 1 ensemble submission written to: submissions/ensemble/subtask_1/pred_eng.csv


## Subtask 2 — Type classification (multi-label: 5)

In [4]:
# ============================================================
# 3) Subtask 2 — Type classification (multi-label: 5)
# ============================================================

print("\n==================== Subtask 2 (type multi-label) ====================")

# 3.1 TRAIN: load cached calibrated probs
t2_train_x = pd.read_csv(XLMR_CACHE_ROOT / "t2_train_probs.csv")
t2_train_d = pd.read_csv(DEBERTA_CACHE_ROOT / "t2_train_probs.csv")

t2_train = t2_train_x.merge(
    t2_train_d, on="id", suffixes=("_xlmr", "_deberta")
)
print("T2 train merged shape:", t2_train.shape)

# true labels (take from xlmr side)
Y2_true_train = t2_train[[f"label_{lab}_xlmr" for lab in T2_LABELS]].values.astype(int)

# model probabilities
P2_x_train = np.stack(
    [t2_train[f"prob_{lab}_xlmr"].values for lab in T2_LABELS],
    axis=1,
)
P2_d_train = np.stack(
    [t2_train[f"prob_{lab}_deberta"].values for lab in T2_LABELS],
    axis=1,
)

# 3.2 Load per-model calibration thresholds
cal_x_t2 = load_json(XLMR_ART_ROOT / "calib_t2_native.json")
cal_d_t2 = load_json(DEBERTA_ART_ROOT / "calib_t2_native.json")

thr_x_map_t2 = {lab: float(cal_x_t2["thresholds"][lab]) for lab in T2_LABELS}
thr_d_map_t2 = {lab: float(cal_d_t2["thresholds"][lab]) for lab in T2_LABELS}
thr_ens_map_t2 = {
    lab: 0.5 * (thr_x_map_t2[lab] + thr_d_map_t2[lab]) for lab in T2_LABELS
}

print("T2 thresholds XLMR:", thr_x_map_t2)
print("T2 thresholds DeBERTa:", thr_d_map_t2)
print("T2 thresholds ENS (avg):", thr_ens_map_t2)

# 3.3 TRAIN F1: XLMR, DeBERTa, Ensemble
P2_x_pred = np.zeros_like(P2_x_train, dtype=int)
P2_d_pred = np.zeros_like(P2_d_train, dtype=int)
P2_ens_pred = np.zeros_like(P2_x_train, dtype=int)

for j, lab in enumerate(T2_LABELS):
    thrx = thr_x_map_t2[lab]
    thrd = thr_d_map_t2[lab]
    thrE = thr_ens_map_t2[lab]
    P2_x_pred[:, j]   = (P2_x_train[:, j] >= thrx).astype(int)
    P2_d_pred[:, j]   = (P2_d_train[:, j] >= thrd).astype(int)
    P2_ens_pred[:, j] = ((0.5*(P2_x_train[:, j] + P2_d_train[:, j])) >= thrE).astype(int)

print("T2 Macro-F1 (TRAIN) XLMR:   ", macro_f1(Y2_true_train, P2_x_pred))
print("T2 Macro-F1 (TRAIN) DeBERTa:", macro_f1(Y2_true_train, P2_d_pred))
print("T2 Macro-F1 (TRAIN) ENS:    ", macro_f1(Y2_true_train, P2_ens_pred))

# 3.4 DEV: load cached probs, ensemble, submission
t2_dev_x = pd.read_csv(XLMR_CACHE_ROOT / "t2_dev_probs.csv")
t2_dev_d = pd.read_csv(DEBERTA_CACHE_ROOT / "t2_dev_probs.csv")

t2_dev = t2_dev_x.merge(
    t2_dev_d, on="id", suffixes=("_xlmr", "_deberta")
)
print("T2 dev merged shape:", t2_dev.shape)

P2_x_dev = np.stack(
    [t2_dev[f"prob_{lab}_xlmr"].values for lab in T2_LABELS],
    axis=1,
)
P2_d_dev = np.stack(
    [t2_dev[f"prob_{lab}_deberta"].values for lab in T2_LABELS],
    axis=1,
)
P2_ens_dev = 0.5 * (P2_x_dev + P2_d_dev)

P2_ens_pred_dev = np.zeros_like(P2_ens_dev, dtype=int)
for j, lab in enumerate(T2_LABELS):
    thrE = thr_ens_map_t2[lab]
    P2_ens_pred_dev[:, j] = (P2_ens_dev[:, j] >= thrE).astype(int)

# Build submission in Codabench column order:
# id,political,racial/ethnic,religious,gender/sexual,other
idx2 = {lab: i for i, lab in enumerate(T2_LABELS)}

sub2 = pd.DataFrame({
    "id": t2_dev["id"].astype(str),
    "political":      P2_ens_pred_dev[:, idx2["political"]],
    "racial/ethnic":  P2_ens_pred_dev[:, idx2["racial/ethnic"]],
    "religious":      P2_ens_pred_dev[:, idx2["religious"]],
    "gender/sexual":  P2_ens_pred_dev[:, idx2["gender/sexual"]],
    "other":          P2_ens_pred_dev[:, idx2["other"]],
})
sub2_path = SUB_ROOT_ENS / "subtask_2" / f"pred_{lang_fname}.csv"
sub2.to_csv(sub2_path, index=False)
print("Subtask 2 ensemble submission written to:", sub2_path)


T2 train merged shape: (3222, 21)
T2 thresholds XLMR: {'gender/sexual': 0.85, 'political': 0.35, 'religious': 0.95, 'racial/ethnic': 0.9, 'other': 0.7}
T2 thresholds DeBERTa: {'gender/sexual': 0.7999999999999999, 'political': 0.49999999999999994, 'religious': 0.9, 'racial/ethnic': 0.65, 'other': 0.75}
T2 thresholds ENS (avg): {'gender/sexual': 0.825, 'political': 0.42499999999999993, 'religious': 0.925, 'racial/ethnic': 0.775, 'other': 0.725}
T2 Macro-F1 (TRAIN) XLMR:    0.4713820066883467
T2 Macro-F1 (TRAIN) DeBERTa: 0.5422365856535791
T2 Macro-F1 (TRAIN) ENS:     0.5478476095333539
T2 dev merged shape: (160, 11)
Subtask 2 ensemble submission written to: submissions/ensemble/subtask_2/pred_eng.csv


## Subtask 3 — Manifestation (multi-label: 6)

In [5]:
# ============================================================
# 4) Subtask 3 — Manifestation (multi-label: 6)
# ============================================================

print("\n==================== Subtask 3 (manifestation) ====================")

# 4.1 TRAIN: load cached calibrated probs
t3_train_x = pd.read_csv(XLMR_CACHE_ROOT / "t3_train_probs.csv")
t3_train_d = pd.read_csv(DEBERTA_CACHE_ROOT / "t3_train_probs.csv")

t3_train = t3_train_x.merge(
    t3_train_d, on="id", suffixes=("_xlmr", "_deberta")
)
print("T3 train merged shape:", t3_train.shape)

Y3_true_train = t3_train[[f"label_{lab}_xlmr" for lab in T3_LABELS]].values.astype(int)

P3_x_train = np.stack(
    [t3_train[f"prob_{lab}_xlmr"].values for lab in T3_LABELS],
    axis=1,
)
P3_d_train = np.stack(
    [t3_train[f"prob_{lab}_deberta"].values for lab in T3_LABELS],
    axis=1,
)

# 4.2 Load per-model calibration thresholds
cal_x_t3 = load_json(XLMR_ART_ROOT / "calib_t3_native.json")
cal_d_t3 = load_json(DEBERTA_ART_ROOT / "calib_t3_native.json")

thr_x_map_t3 = {lab: float(cal_x_t3["thresholds"][lab]) for lab in T3_LABELS}
thr_d_map_t3 = {lab: float(cal_d_t3["thresholds"][lab]) for lab in T3_LABELS}
thr_ens_map_t3 = {
    lab: 0.5 * (thr_x_map_t3[lab] + thr_d_map_t3[lab]) for lab in T3_LABELS
}

print("T3 thresholds XLMR:", thr_x_map_t3)
print("T3 thresholds DeBERTa:", thr_d_map_t3)
print("T3 thresholds ENS (avg):", thr_ens_map_t3)

# 4.3 TRAIN F1: XLMR, DeBERTa, ENS
P3_x_pred = np.zeros_like(P3_x_train, dtype=int)
P3_d_pred = np.zeros_like(P3_d_train, dtype=int)
P3_ens_pred = np.zeros_like(P3_x_train, dtype=int)

for j, lab in enumerate(T3_LABELS):
    thrx = thr_x_map_t3[lab]
    thrd = thr_d_map_t3[lab]
    thrE = thr_ens_map_t3[lab]
    P3_x_pred[:, j]   = (P3_x_train[:, j] >= thrx).astype(int)
    P3_d_pred[:, j]   = (P3_d_train[:, j] >= thrd).astype(int)
    P3_ens_pred[:, j] = ((0.5*(P3_x_train[:, j] + P3_d_train[:, j])) >= thrE).astype(int)

print("T3 Macro-F1 (TRAIN) XLMR:   ", macro_f1(Y3_true_train, P3_x_pred))
print("T3 Macro-F1 (TRAIN) DeBERTa:", macro_f1(Y3_true_train, P3_d_pred))
print("T3 Macro-F1 (TRAIN) ENS:    ", macro_f1(Y3_true_train, P3_ens_pred))

# 4.4 DEV: load cached probs, ensemble, submission
t3_dev_x = pd.read_csv(XLMR_CACHE_ROOT / "t3_dev_probs.csv")
t3_dev_d = pd.read_csv(DEBERTA_CACHE_ROOT / "t3_dev_probs.csv")

t3_dev = t3_dev_x.merge(
    t3_dev_d, on="id", suffixes=("_xlmr", "_deberta")
)
print("T3 dev merged shape:", t3_dev.shape)

P3_x_dev = np.stack(
    [t3_dev[f"prob_{lab}_xlmr"].values for lab in T3_LABELS],
    axis=1,
)
P3_d_dev = np.stack(
    [t3_dev[f"prob_{lab}_deberta"].values for lab in T3_LABELS],
    axis=1,
)
P3_ens_dev = 0.5 * (P3_x_dev + P3_d_dev)

P3_ens_pred_dev = np.zeros_like(P3_ens_dev, dtype=int)
for j, lab in enumerate(T3_LABELS):
    thrE = thr_ens_map_t3[lab]
    P3_ens_pred_dev[:, j] = (P3_ens_dev[:, j] >= thrE).astype(int)

# Build submission in Codabench order:
# id,stereotype,vilification,dehumanization,extreme_language,lack_of_empathy,invalidation
idx3 = {lab: i for i, lab in enumerate(T3_LABELS)}

sub3 = pd.DataFrame({
    "id": t3_dev["id"].astype(str),
    "stereotype":       P3_ens_pred_dev[:, idx3["stereotype"]],
    "vilification":     P3_ens_pred_dev[:, idx3["vilification"]],
    "dehumanization":   P3_ens_pred_dev[:, idx3["dehumanization"]],
    "extreme_language": P3_ens_pred_dev[:, idx3["extreme_language"]],
    "lack_of_empathy":  P3_ens_pred_dev[:, idx3["lack_of_empathy"]],
    "invalidation":     P3_ens_pred_dev[:, idx3["invalidation"]],
})
sub3_path = SUB_ROOT_ENS / "subtask_3" / f"pred_{lang_fname}.csv"
sub3.to_csv(sub3_path, index=False)
print("Subtask 3 ensemble submission written to:", sub3_path)

print("\nAll ensemble submissions ready.")
print("Zip one of these folders for Codabench:")
print("  ", SUB_ROOT_ENS / "subtask_1")
print("  ", SUB_ROOT_ENS / "subtask_2")
print("  ", SUB_ROOT_ENS / "subtask_3")


T3 train merged shape: (3222, 25)
T3 thresholds XLMR: {'vilification': 0.65, 'extreme_language': 0.6, 'stereotype': 0.7, 'invalidation': 0.65, 'lack_of_empathy': 0.65, 'dehumanization': 0.7}
T3 thresholds DeBERTa: {'vilification': 0.44999999999999996, 'extreme_language': 0.65, 'stereotype': 0.75, 'invalidation': 0.49999999999999994, 'lack_of_empathy': 0.65, 'dehumanization': 0.7}
T3 thresholds ENS (avg): {'vilification': 0.55, 'extreme_language': 0.625, 'stereotype': 0.725, 'invalidation': 0.575, 'lack_of_empathy': 0.65, 'dehumanization': 0.7}
T3 Macro-F1 (TRAIN) XLMR:    0.5175818625510322
T3 Macro-F1 (TRAIN) DeBERTa: 0.6336362476490647
T3 Macro-F1 (TRAIN) ENS:     0.614528150933099
T3 dev merged shape: (160, 13)
Subtask 3 ensemble submission written to: submissions/ensemble/subtask_3/pred_eng.csv

All ensemble submissions ready.
Zip one of these folders for Codabench:
   submissions/ensemble/subtask_1
   submissions/ensemble/subtask_2
   submissions/ensemble/subtask_3
