In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import f1_score
from torch.optim import AdamW
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from tqdm.auto import tqdm

from sklearn.model_selection import train_test_split   # ← TAMBAHKAN BARIS INI


# ============================================================
# 1. DEVICE → PAKAI GPU JIKA ADA
# ============================================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)





# ============================================================
# 2. LOAD DATA FULL (SINGLE CSV) + SPLIT 80:20
# ============================================================
df = pd.read_csv("traindata_final_fixed_rulebased.csv")

label_cols = ["admiration","amusement","gratitude","love","pride","relief","remorse"]

print("Full data shape:", df.shape)

# Split 80:20 → test_size = 0.2
train_df, dev_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42
)

print("Train shape (80%):", train_df.shape)
print("Dev/Test shape (20%):", dev_df.shape)

# ------------------------------------------------------------
# OPSIONAL: subset train (misal maksimal 4000 baris)
# kalau mau pakai semua data train: N_TRAIN = len(train_df)
# ------------------------------------------------------------
N_TRAIN = len(train_df)  # ganti ke 4000 kalau mau dibatasi

train_df_small = train_df.sample(
    n=min(N_TRAIN, len(train_df)),
    random_state=42
).reset_index(drop=True)

dev_df_full = dev_df.reset_index(drop=True)  # dipakai untuk eval + save prediksi

print("Subset Train shape:", train_df_small.shape)
print("Full Dev/Test shape:", dev_df_full.shape)

# ============================================================
# 3. SIAPKAN X, y UNTUK TRAIN & DEV
# ============================================================
X_train = train_df_small["text"].astype(str).tolist()
y_train = train_df_small[label_cols].values.astype("float32")

X_dev = dev_df_full["text"].astype(str).tolist()
y_dev = dev_df_full[label_cols].values.astype("float32")

# ============================================================
# 4. TOKENIZER (DistilRoBERTa) & DATASET / DATALOADER
# ============================================================
model_name = "distilroberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

MAX_LEN = 96

print("Tokenizing train...")
train_enc = tokenizer(
    X_train,
    padding=True,
    truncation=True,
    max_length=MAX_LEN,
    return_tensors="pt"
)

print("Tokenizing dev...")
dev_enc = tokenizer(
    X_dev,
    padding=True,
    truncation=True,
    max_length=MAX_LEN,
    return_tensors="pt"
)

train_dataset = TensorDataset(
    train_enc["input_ids"],
    train_enc["attention_mask"],
    torch.tensor(y_train, dtype=torch.float32)
)

dev_dataset = TensorDataset(
    dev_enc["input_ids"],
    dev_enc["attention_mask"],
    torch.tensor(y_dev, dtype=torch.float32)
)

batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader   = DataLoader(dev_dataset,   batch_size=32, shuffle=False)

print("Train batches:", len(train_loader))
print("Dev batches:", len(dev_loader))






# ============================================================
# 4. MODEL (DistilRoBERTa Multi-Label)
# ============================================================
num_labels = len(label_cols)

print("Loading model:", model_name)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    problem_type="multi_label_classification"
).to(device)

optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = nn.BCEWithLogitsLoss()

# ============================================================
# 5. EVALUATION FUNCTION (FULL DEV)
# ============================================================
def evaluate(threshold=0.5):
    model.eval()
    all_logits = []
    all_labels = []

    with torch.no_grad():
        for ids, mask, labels in dev_loader:
            ids, mask = ids.to(device), mask.to(device)
            outputs = model(input_ids=ids, attention_mask=mask)
            logits = outputs.logits

            all_logits.append(logits.cpu().numpy())
            all_labels.append(labels.numpy())

    all_logits = np.concatenate(all_logits)
    all_labels = np.concatenate(all_labels)

    probs = 1 / (1 + np.exp(-all_logits))
    preds = (probs >= threshold).astype(int)

    micro = f1_score(all_labels, preds, average="micro")
    macro = f1_score(all_labels, preds, average="macro")

    return micro, macro, probs, preds, all_labels

# ============================================================
# 6. TRAINING LOOP (SUBSET TRAIN, FULL DEV)
# ============================================================
epochs = 10   # 5 epoch biasanya cukup, tiap epoch cuma 4000/16=250 langkah

best_micro = -1.0
best_state = None

for epoch in range(1, epochs+1):
    model.train()
    total_loss = 0.0

    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}")
    for ids, mask, labels in pbar:
        ids, mask, labels = ids.to(device), mask.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=ids, attention_mask=mask)
        logits = outputs.logits
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * ids.size(0)
        pbar.set_postfix({"loss": f"{loss.item():.4f}"})

    train_loss = total_loss / len(train_loader.dataset)
    dev_micro, dev_macro, _, _, _ = evaluate(threshold=0.5)

    print(f"\nEpoch {epoch} done | train_loss={train_loss:.4f} | dev_micro={dev_micro:.4f} | dev_macro={dev_macro:.4f}")

    if dev_micro > best_micro:
        best_micro = dev_micro
        best_state = model.state_dict().copy()
        print(f"  → New best model (dev Micro-F1={best_micro:.4f})")

# restore best model
if best_state is not None:
    model.load_state_dict(best_state)
    print("\nBest model restored!")

# ============================================================
# 7. CARI THRESHOLD GLOBAL TERBAIK DI FULL DEV
# ============================================================
print("\nSearching best global threshold on FULL dev...")

best_t = 0.5
best_t_micro = -1.0

for t in np.arange(0.1, 0.9, 0.05):
    micro_t, _, _, _, _ = evaluate(threshold=t)
    print(f"t={t:.2f} → Dev Micro-F1 = {micro_t:.4f}")
    if micro_t > best_t_micro:
        best_t_micro = micro_t
        best_t = t

print(f"\nBEST threshold: {best_t:.2f}")
print(f"BEST dev Micro-F1: {best_t_micro:.4f}")

# ============================================================
# 8. FINAL EVAL + PER-LABEL F1 DI FULL DEV
# ============================================================
final_micro, final_macro, final_probs, final_preds, final_true = evaluate(threshold=best_t)

print("\n========================")
print(" FINAL DEV PERFORMANCE  ")
print("========================")
print(f"Micro-F1 (t={best_t:.2f}): {final_micro:.2f}\n")

for i, col in enumerate(label_cols):
    f1 = f1_score(final_true[:, i], final_preds[:, i])
    print(f"{col.capitalize()} F1: {f1:.2f}")

# ============================================================
# 9. SAVE dev_predictions.csv (FULL DEV)
# ============================================================
output_df = dev_df_full.copy()
for i, col in enumerate(label_cols):
    output_df[col] = final_preds[:, i]

output_df.to_csv("dev_predictions.csv", index=False)
print("\nSaved dev_predictions.csv!")


Using device: cuda
Full data shape: (16386, 8)
Train shape (80%): (13108, 8)
Dev/Test shape (20%): (3278, 8)
Subset Train shape: (13108, 8)
Full Dev/Test shape: (3278, 8)
Tokenizing train...
Tokenizing dev...
Train batches: 820
Dev batches: 103
Loading model: distilroberta-base


2025-12-13 17:57:40.620743: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-13 17:57:40.668314: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-13 17:57:41.768858: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
Some weights of Robe

Epoch 1/10:   0%|          | 0/820 [00:00<?, ?it/s]


Epoch 1 done | train_loss=0.1969 | dev_micro=0.8319 | dev_macro=0.7407
  → New best model (dev Micro-F1=0.8319)


Epoch 2/10:   0%|          | 0/820 [00:00<?, ?it/s]


Epoch 2 done | train_loss=0.1199 | dev_micro=0.8368 | dev_macro=0.7445
  → New best model (dev Micro-F1=0.8368)


Epoch 3/10:   0%|          | 0/820 [00:00<?, ?it/s]


Epoch 3 done | train_loss=0.1043 | dev_micro=0.8390 | dev_macro=0.7568
  → New best model (dev Micro-F1=0.8390)


Epoch 4/10:   0%|          | 0/820 [00:00<?, ?it/s]


Epoch 4 done | train_loss=0.0924 | dev_micro=0.8368 | dev_macro=0.7466


Epoch 5/10:   0%|          | 0/820 [00:00<?, ?it/s]


Epoch 5 done | train_loss=0.0824 | dev_micro=0.8349 | dev_macro=0.7583


Epoch 6/10:   0%|          | 0/820 [00:00<?, ?it/s]


Epoch 6 done | train_loss=0.0735 | dev_micro=0.8374 | dev_macro=0.7675


Epoch 7/10:   0%|          | 0/820 [00:00<?, ?it/s]


Epoch 7 done | train_loss=0.0653 | dev_micro=0.8354 | dev_macro=0.7715


Epoch 8/10:   0%|          | 0/820 [00:00<?, ?it/s]


Epoch 8 done | train_loss=0.0553 | dev_micro=0.8368 | dev_macro=0.7694


Epoch 9/10:   0%|          | 0/820 [00:00<?, ?it/s]


Epoch 9 done | train_loss=0.0463 | dev_micro=0.8216 | dev_macro=0.7986


Epoch 10/10:   0%|          | 0/820 [00:00<?, ?it/s]


Epoch 10 done | train_loss=0.0420 | dev_micro=0.8294 | dev_macro=0.7717

Best model restored!

Searching best global threshold on FULL dev...
t=0.10 → Dev Micro-F1 = 0.8035
t=0.15 → Dev Micro-F1 = 0.8092
t=0.20 → Dev Micro-F1 = 0.8124
t=0.25 → Dev Micro-F1 = 0.8141
t=0.30 → Dev Micro-F1 = 0.8173
t=0.35 → Dev Micro-F1 = 0.8217
t=0.40 → Dev Micro-F1 = 0.8235
t=0.45 → Dev Micro-F1 = 0.8258
t=0.50 → Dev Micro-F1 = 0.8294
t=0.55 → Dev Micro-F1 = 0.8318
t=0.60 → Dev Micro-F1 = 0.8309
t=0.65 → Dev Micro-F1 = 0.8326
t=0.70 → Dev Micro-F1 = 0.8327
t=0.75 → Dev Micro-F1 = 0.8329
t=0.80 → Dev Micro-F1 = 0.8326
t=0.85 → Dev Micro-F1 = 0.8326

BEST threshold: 0.75
BEST dev Micro-F1: 0.8329

 FINAL DEV PERFORMANCE  
Micro-F1 (t=0.75): 0.83

Admiration F1: 0.86
Amusement F1: 0.95
Gratitude F1: 0.95
Love F1: 0.90
Pride F1: 0.49
Relief F1: 0.25
Remorse F1: 0.89

Saved dev_predictions.csv!
