In [1]:
import ast
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    hamming_loss,
    accuracy_score
)
from tqdm import tqdm

DATASET_DIR = "/kaggle/input/dataset-t5"

train_df = pd.read_csv(f"{DATASET_DIR}/train.csv")
test_df  = pd.read_csv(f"{DATASET_DIR}/test.csv")

print("Train:", train_df.shape)
print("Test :", test_df.shape)



2026-01-03 08:25:46.616039: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1767428747.047108      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1767428747.162541      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1767428748.291304      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767428748.291346      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767428748.291349      55 computation_placer.cc:177] computation placer alr

Train: (180000, 2)
Test : (20000, 2)


In [2]:
all_labels = set()
for t in train_df["tag_list"]:
    if isinstance(t, str):
        all_labels.update(ast.literal_eval(t))

all_labels = sorted(list(all_labels))
label_to_idx = {l: i for i, l in enumerate(all_labels)}

print("Total label:", len(all_labels))


Total label: 24998


In [3]:
MODEL_PATH = "/kaggle/input/model-t5/t5_final_model"

tokenizer = T5Tokenizer.from_pretrained(MODEL_PATH)
model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

print("Device:", device)


Device: cuda


In [4]:
class T5EvalDataset(Dataset):
    def __init__(self, df, tokenizer, max_len=512):
        self.texts = df["text"].tolist()
        self.labels = df["tag_list"].tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(
            "classify: " + self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )

        return {
            "input_ids": enc["input_ids"].squeeze(),
            "attention_mask": enc["attention_mask"].squeeze(),
            "labels": self.labels[idx]
        }



In [5]:
eval_dataset = T5EvalDataset(test_df, tokenizer)
eval_loader  = DataLoader(eval_dataset, batch_size=8, shuffle=False)

In [6]:
all_preds = []
all_true  = []

with torch.no_grad():
    for batch in tqdm(eval_loader, desc="Evaluating"):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)

        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_length=128
        )

        preds = tokenizer.batch_decode(outputs, skip_special_tokens=True)

        all_preds.extend(preds)
        all_true.extend(batch["labels"])


Evaluating: 100%|██████████| 2500/2500 [13:47<00:00,  3.02it/s]


In [7]:
def to_binary(tags):
    vec = np.zeros(len(all_labels), dtype=int)
    for t in tags:
        if t in label_to_idx:
            vec[label_to_idx[t]] = 1
    return vec


In [8]:
y_true = []
y_pred = []

for true_t, pred_t in zip(all_true, all_preds):

    # ===== TRUE LABEL =====
    if isinstance(true_t, str):
        true_tags = ast.literal_eval(true_t)
    elif isinstance(true_t, list):
        true_tags = true_t
    else:
        true_tags = []

    # ===== PREDICTED LABEL =====
    pred_tags = [x.strip() for x in pred_t.split(",") if x.strip()]

    y_true.append(to_binary(true_tags))
    y_pred.append(to_binary(pred_tags))

y_true = np.array(y_true)
y_pred = np.array(y_pred)

print("y_true shape:", y_true.shape)
print("y_pred shape:", y_pred.shape)


y_true shape: (20000, 24998)
y_pred shape: (20000, 24998)


In [9]:
precision_micro = precision_score(y_true, y_pred, average="micro", zero_division=0)
recall_micro    = recall_score(y_true, y_pred, average="micro", zero_division=0)
f1_micro        = f1_score(y_true, y_pred, average="micro", zero_division=0)

precision_macro = precision_score(y_true, y_pred, average="macro", zero_division=0)
recall_macro    = recall_score(y_true, y_pred, average="macro", zero_division=0)
f1_macro        = f1_score(y_true, y_pred, average="macro", zero_division=0)

hamming = hamming_loss(y_true, y_pred)
subset  = accuracy_score(y_true, y_pred)


In [10]:
print("\n=== HASIL EVALUASI MULTI-LABEL T5 ===")
print(f"Precision (micro) : {precision_micro:.4f}")
print(f"Recall (micro)    : {recall_micro:.4f}")
print(f"F1-score (micro)  : {f1_micro:.4f}")
print(f"Precision (macro) : {precision_macro:.4f}")
print(f"Recall (macro)    : {recall_macro:.4f}")
print(f"F1-score (macro)  : {f1_macro:.4f}")
print(f"Hamming Loss      : {hamming:.6f}")
print(f"Subset Accuracy   : {subset:.4f}")



=== HASIL EVALUASI MULTI-LABEL T5 ===
Precision (micro) : 0.5820
Recall (micro)    : 0.5266
F1-score (micro)  : 0.5529
Precision (macro) : 0.0957
Recall (macro)    : 0.0807
F1-score (macro)  : 0.0829
Hamming Loss      : 0.000103
Subset Accuracy   : 0.0924
