
# Fine-Tuning IndoBERTweet untuk Analisis Sentimen Layanan KAI

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Import & Konfigurasi Awal
import os
import numpy as np
import pandas as pd
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split

from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix,
)
import shutil
import torch
import evaluate

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments
)

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

Device: cuda


In [4]:
import torch
print(torch.cuda.is_available())
print(torch.version.cuda)
print(torch.__version__)


True
12.6
2.9.0+cu126


In [5]:
# Konfigurasi File & Skenario P1–P4
# Path file hasil preprocessing (gabungan A + C)
DATA_FILE = "preprocessed_merged_stepwise.csv"

# Mapping skenario preprocessing ke nama kolom teks
SCENARIO_TO_TEXTCOL = {
    "P1": "text_clean",               # only cleaning
    "P2": "text_nostop_text",         # stopword removal only
    "P3": "text_stemmed_nostopword",  # stemming only
    "P4": "text_stemmed",             # stopword + stemming
}

# pilih skenario yang mau dijalankan (bisa kamu ganti-ganti)
SCENARIO = "P1"   # misal awalnya P1

label_col = "label"

print("Skenario aktif:", SCENARIO, "-> kolom teks:", SCENARIO_TO_TEXTCOL[SCENARIO])


Skenario aktif: P1 -> kolom teks: text_clean


In [6]:
# Data Loading & Normalisasi Label
df = pd.read_csv(DATA_FILE, encoding="utf-8-sig")
df.columns = [c.strip().lower() for c in df.columns]
print("Kolom tersedia:", df.columns.tolist())

text_col = SCENARIO_TO_TEXTCOL[SCENARIO]

# pastikan kolom yang dibutuhkan ada
assert text_col in df.columns, f"Kolom teks '{text_col}' tidak ditemukan."
assert label_col in df.columns, f"Kolom label '{label_col}' tidak ditemukan."

# buang baris yang teks atau labelnya kosong
df = df.dropna(subset=[text_col, label_col])

# normalisasi label ke 3 kelas
df[label_col] = (
    df[label_col]
    .astype(str)
    .str.strip()
    .str.lower()
    .replace({
        "negatif": "neg",
        "negative": "neg",
        "positif": "pos",
        "positive": "pos",
        "netral": "neu",
        "neutral": "neu",
    })
)

# keep hanya 3 label utama
df = df[df[label_col].isin(["neg", "neu", "pos"])]

print("\nDistribusi label setelah normalisasi:")
print(df[label_col].value_counts())


Kolom tersedia: ['id_str', 'created_at', 'user_id_str', 'conversation_id_str', 'full_text', 'label', 'text_casefold', 'text_clean', 'tokens', 'tokens_nostop', 'text_nostop_text', 'tokens_stem', 'text_stemmed', 'emoji_drop_from_raw', 'emoji_map_from_raw', 'emoji_drop_from_stemmed', 'emoji_map_from_stemmed', 'text_stemmed_nostopword']

Distribusi label setelah normalisasi:
label
neu    721
neg    585
pos    128
Name: count, dtype: int64


Mapping Label & Split Train / Validasi / Test (80/10/10)

In [7]:
label2id = {"neg": 0, "neu": 1, "pos": 2}
id2label = {v: k for k, v in label2id.items()}

# 1) Split train + temp (80% train, 20% temp)
train_texts, temp_texts, train_labels, temp_labels = train_test_split(
    df[text_col].astype(str),
    df[label_col],
    test_size=0.2,
    random_state=SEED,
    stratify=df[label_col]
)

# 2) Split temp jadi validation + test (10% + 10%)
val_texts, test_texts, val_labels, test_labels = train_test_split(
    temp_texts,
    temp_labels,
    test_size=0.5,
    random_state=SEED,
    stratify=temp_labels
)

train_df = pd.DataFrame({"text": train_texts, "label": train_labels})
val_df   = pd.DataFrame({"text": val_texts, "label": val_labels})
test_df  = pd.DataFrame({"text": test_texts, "label": test_labels})

# mapping label -> id
train_df["label"] = train_df["label"].map(label2id)
val_df["label"]   = val_df["label"].map(label2id)
test_df["label"]  = test_df["label"].map(label2id)

print("TRAIN:", train_df["label"].value_counts().to_dict())
print("VAL  :", val_df["label"].value_counts().to_dict())
print("TEST :", test_df["label"].value_counts().to_dict())


TRAIN: {1: 577, 0: 468, 2: 102}
VAL  : {1: 72, 0: 58, 2: 13}
TEST : {1: 72, 0: 59, 2: 13}


Konversi ke HuggingFace Dataset & Tokenisasi

In [8]:
ds = DatasetDict({
    "train": Dataset.from_pandas(train_df, preserve_index=False),
    "validation": Dataset.from_pandas(val_df, preserve_index=False),
    "test": Dataset.from_pandas(test_df, preserve_index=False),
})

print(ds)

MODEL_NAME = "indolem/indobertweet-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=128,
    )

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1147
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 143
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 144
    })
})


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [9]:
# Definisi Metrik Evaluasi
metric_accuracy = evaluate.load("accuracy")
metric_f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc = metric_accuracy.compute(predictions=preds, references=labels)
    f1_macro = metric_f1.compute(predictions=preds, references=labels, average="macro")
    return {
        "accuracy": acc["accuracy"],
        "f1_macro": f1_macro["f1"],
    }


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Fungsi Utility: Jalankan 1 Eksperimen (untuk H1–H3 dan P1–P4)

In [10]:
def run_experiment(
    ds_encoded,
    learning_rate=2e-5,
    batch_size=16,
    num_epochs=5,
    output_dir="indobertweet-exp",
    run_name="",
):
    print(f"\n=== Running experiment: {run_name} ===")
    print(f"lr={learning_rate}, batch_size={batch_size}, epochs={num_epochs}")

    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=3,
        id2label=id2label,
        label2id=label2id
    ).to(device)

    args = TrainingArguments(
        output_dir=output_dir,
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=learning_rate,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=num_epochs,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="eval_f1_macro",
        report_to="none",
        logging_dir=os.path.join(output_dir, "logs"),
        logging_strategy="epoch",
        seed=SEED,
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=ds_encoded["train"],
        eval_dataset=ds_encoded["validation"],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )

    trainer.train()

    # evaluasi di validation (untuk pilih hyperparameter)
    val_results = trainer.evaluate(ds_encoded["validation"])
    print("\n[VALIDATION RESULTS]")
    for k, v in val_results.items():
        if isinstance(v, float):
            print(f"{k}: {v:.4f}")

    # evaluasi akhir di test
    test_results = trainer.evaluate(ds_encoded["test"])
    print("\n[TEST RESULTS]")
    for k, v in test_results.items():
        if isinstance(v, float):
            print(f"{k}: {v:.4f}")

    # simpan model & tokenizer (opsional, bisa pakai run_name)
    save_dir = os.path.join(output_dir, "best_model")
    trainer.save_model(save_dir)
    tokenizer.save_pretrained(save_dir)
    print(f"\n✅ Model & tokenizer disimpan di: {save_dir}")

    return val_results, test_results, trainer


In [11]:
def run_h1_lr_search(
    ds_encoded,
    learning_rates,
    batch_size,
    num_epochs,
    base_output_dir="exp_H1",
):
    """
    H1: Mencari learning rate terbaik dengan batch_size & epoch tetap.
    """
    results = []
    for lr in learning_rates:
        run_name = f"H1_lr_{lr}"
        out_dir = f"{base_output_dir}_lr_{lr}"

        val_res, test_res, _ = run_experiment(
            ds_encoded,
            learning_rate=lr,
            batch_size=batch_size,
            num_epochs=num_epochs,
            output_dir=out_dir,
            run_name=run_name,
        )

        results.append({
            "lr": lr,
            "val_f1_macro": val_res["eval_f1_macro"],
            "val_accuracy": val_res["eval_accuracy"],
            "test_f1_macro": test_res["eval_f1_macro"],
            "test_accuracy": test_res["eval_accuracy"],
        })

    return pd.DataFrame(results)


def run_h2_epoch_search(
    ds_encoded,
    epochs_list,
    learning_rate,
    batch_size,
    base_output_dir="exp_H2",
):
    """
    H2: Mencari jumlah epoch terbaik dengan lr & batch_size tetap.
    """
    results = []
    for n_ep in epochs_list:
        run_name = f"H2_ep_{n_ep}"
        out_dir = f"{base_output_dir}_ep_{n_ep}"

        val_res, test_res, _ = run_experiment(
            ds_encoded,
            learning_rate=learning_rate,
            batch_size=batch_size,
            num_epochs=n_ep,
            output_dir=out_dir,
            run_name=run_name,
        )

        results.append({
            "epochs": n_ep,
            "val_f1_macro": val_res["eval_f1_macro"],
            "val_accuracy": val_res["eval_accuracy"],
            "test_f1_macro": test_res["eval_f1_macro"],
            "test_accuracy": test_res["eval_accuracy"],
        })

    return pd.DataFrame(results)


def run_h3_batch_search(
    ds_encoded,
    batch_sizes,
    learning_rate,
    num_epochs,
    base_output_dir="exp_H3",
):
    """
    H3: Mencari batch size terbaik dengan lr & epoch tetap.
    """
    results = []
    for bs in batch_sizes:
        run_name = f"H3_bs_{bs}"
        out_dir = f"{base_output_dir}_bs_{bs}"

        val_res, test_res, _ = run_experiment(
            ds_encoded,
            learning_rate=learning_rate,
            batch_size=bs,
            num_epochs=num_epochs,
            output_dir=out_dir,
            run_name=run_name,
        )

        results.append({
            "batch_size": bs,
            "val_f1_macro": val_res["eval_f1_macro"],
            "val_accuracy": val_res["eval_accuracy"],
            "test_f1_macro": test_res["eval_f1_macro"],
            "test_accuracy": test_res["eval_accuracy"],
        })

    return pd.DataFrame(results)


Jalankan P1–P4 dengan Hyperparameter Terbaik

In [12]:
# %%
def load_dataset_for_scenario(
    scenario: str,
    data_file: str,
    scenario_to_textcol: dict,
    label_col: str,
    seed: int,
    tokenizer,
):
    """
    Load data, normalisasi label, split train/val/test,
    dan kembalikan DatasetDict yang sudah ditokenisasi
    untuk satu skenario preprocessing (P1–P4).
    """
    text_col = scenario_to_textcol[scenario]

    print("\n==============================")
    print("Skenario:", scenario, "| text col:", text_col)
    print("==============================")

    # --- Load & cleaning dasar ---
    df = pd.read_csv(data_file, encoding="utf-8-sig")
    df.columns = [c.strip().lower() for c in df.columns]

    assert text_col in df.columns, f"Kolom teks '{text_col}' tidak ditemukan."
    assert label_col in df.columns, f"Kolom label '{label_col}' tidak ditemukan."

    df = df.dropna(subset=[text_col, label_col])

    # --- Normalisasi label ke 3 kelas ---
    df[label_col] = (
        df[label_col]
        .astype(str).str.strip().str.lower()
        .replace({
            "negatif": "neg", "negative": "neg",
            "positif": "pos", "positive": "pos",
            "netral": "neu", "neutral": "neu",
        })
    )
    df = df[df[label_col].isin(["neg", "neu", "pos"])]

    # --- Split 80/10/10: train / validation / test ---
    train_texts, temp_texts, train_labels, temp_labels = train_test_split(
        df[text_col].astype(str),
        df[label_col],
        test_size=0.2,
        random_state=seed,
        stratify=df[label_col]
    )
    val_texts, test_texts, val_labels, test_labels = train_test_split(
        temp_texts,
        temp_labels,
        test_size=0.5,
        random_state=seed,
        stratify=temp_labels
    )

    train_df = pd.DataFrame({"text": train_texts, "label": train_labels})
    val_df   = pd.DataFrame({"text": val_texts, "label": val_labels})
    test_df  = pd.DataFrame({"text": test_texts, "label": test_labels})

    train_df["label"] = train_df["label"].map(label2id)
    val_df["label"]   = val_df["label"].map(label2id)
    test_df["label"]  = test_df["label"].map(label2id)

    print("TRAIN:", train_df["label"].value_counts().to_dict())
    print("VAL  :", val_df["label"].value_counts().to_dict())
    print("TEST :", test_df["label"].value_counts().to_dict())

    ds = DatasetDict({
        "train": Dataset.from_pandas(train_df, preserve_index=False),
        "validation": Dataset.from_pandas(val_df, preserve_index=False),
        "test": Dataset.from_pandas(test_df, preserve_index=False),
    })

    # --- Tokenisasi ---
    ds_encoded = ds.map(tokenize, batched=True)

    return ds_encoded


Step 0: Pastikan dataset & ds_encoded siap (misal pakai P1 dulu)

## UNDERSAMPLING

In [13]:
best_lr = 3e-05
best_epochs = 10
best_batch = 16

print("Best hyperparameters:")
print("lr     :", best_lr)
print("epochs :", best_epochs)
print("batch  :", best_batch)

Best hyperparameters:
lr     : 3e-05
epochs : 10
batch  : 16


In [15]:
# ============================================================
# Undersampling pada Skenario Terbaik (P1)
# ============================================================

# 1. Load ulang data P1 untuk keperluan resampling
df_res = pd.read_csv(DATA_FILE, encoding="utf-8-sig")
df_res.columns = [c.strip().lower() for c in df_res.columns]

text_col_p1 = SCENARIO_TO_TEXTCOL["P1"]

# pastikan kolom ada dan buang NA
df_res = df_res.dropna(subset=[text_col_p1, label_col])

# normalisasi label ke 3 kelas (neg, neu, pos)
df_res[label_col] = (
    df_res[label_col]
    .astype(str).str.strip().str.lower()
    .replace({
        "negatif": "neg", "negative": "neg",
        "positif": "pos", "positive": "pos",
        "netral": "neu", "neutral": "neu",
    })
)
df_res = df_res[df_res[label_col].isin(["neg", "neu", "pos"])]

# split 80/10/10 seperti sebelumnya (pakai SEED yang sama)
train_texts_r, temp_texts_r, train_labels_r, temp_labels_r = train_test_split(
    df_res[text_col_p1].astype(str),
    df_res[label_col],
    test_size=0.2,
    random_state=SEED,
    stratify=df_res[label_col]
)

val_texts_r, test_texts_r, val_labels_r, test_labels_r = train_test_split(
    temp_texts_r,
    temp_labels_r,
    test_size=0.5,
    random_state=SEED,
    stratify=temp_labels_r
)

train_df_r = pd.DataFrame({"text": train_texts_r, "label": train_labels_r})
val_df_r   = pd.DataFrame({"text": val_texts_r, "label": val_labels_r})
test_df_r  = pd.DataFrame({"text": test_texts_r, "label": test_labels_r})

# mapping label -> id
train_df_r["label"] = train_df_r["label"].map(label2id)
val_df_r["label"]   = val_df_r["label"].map(label2id)
test_df_r["label"]  = test_df_r["label"].map(label2id)

print("\n[UNDERSAMPLING P1] Distribusi label TRAIN sebelum undersampling:")
print(train_df_r["label"].value_counts())

# # 2. Oversampling kelas minoritas pada data latih
# max_count = train_df_r["label"].value_counts().max()
# train_resampled = (
#     train_df_r
#     .groupby("label", group_keys=False)
#     .apply(lambda x: x.sample(n=max_count, replace=True))
#     .reset_index(drop=True)
# )

# 2. Random Undersampling kelas mayoritas pada data latih
min_count = train_df_r["label"].value_counts().min()
train_resampled = (
    train_df_r
    .groupby("label", group_keys=False)
    .apply(lambda x: x.sample(n=min_count, replace=False, random_state=SEED))
    .reset_index(drop=True)
)


print("\n[UNDERSAMPLING P1] Distribusi label TRAIN setelah undersampling:")
print(train_resampled["label"].value_counts())

# 3. Bangun DatasetDict baru & tokenisasi
ds_resampled = DatasetDict({
    "train": Dataset.from_pandas(train_resampled, preserve_index=False),
    "validation": Dataset.from_pandas(val_df_r, preserve_index=False),
    "test": Dataset.from_pandas(test_df_r, preserve_index=False),
})

ds_resampled_encoded = ds_resampled.map(tokenize, batched=True)

print("\n[UNDERSAMPLING P1] Dataset dengan resampling:")
print(ds_resampled_encoded)

# 4. Jalankan ulang eksperimen dengan hyperparameter terbaik yang sama
val_res_resampled, test_res_resampled, final_trainer_res = run_experiment(
    ds_resampled_encoded,
    learning_rate=best_lr,
    batch_size=best_batch,
    num_epochs=best_epochs,
    output_dir="final_indobertweet_model_p1_undersampled",
    run_name="final_run_p1_undersampled",
)

# (opsional) evaluasi lengkap di test set untuk perbandingan
pred_output_res = final_trainer_res.predict(ds_resampled_encoded["test"])
y_true_res = pred_output_res.label_ids
y_pred_res = np.argmax(pred_output_res.predictions, axis=-1)

test_acc_res = accuracy_score(y_true_res, y_pred_res)
prec_macro_res, rec_macro_res, f1_macro_res, _ = precision_recall_fscore_support(
    y_true_res, y_pred_res, average="macro"
)

print("\n=== EVALUASI LENGKAP TEST SET - P1 + UNDERSAMPLING ===")
print(f"Akurasi (sklearn)         : {test_acc_res:.4f}")
print(f"Precision Macro           : {prec_macro_res:.4f}")
print(f"Recall Macro              : {rec_macro_res:.4f}")
print(f"F1-Score Macro (sklearn)  : {f1_macro_res:.4f}")
print(f"F1-Score Macro (Trainer)  : {test_res_resampled['eval_f1_macro']:.4f}")
print(f"Akurasi (Trainer)         : {test_res_resampled['eval_accuracy']:.4f}")



[UNDERSAMPLING P1] Distribusi label TRAIN sebelum undersampling:
label
1    577
0    468
2    102
Name: count, dtype: int64

[UNDERSAMPLING P1] Distribusi label TRAIN setelah undersampling:
label
0    102
1    102
2    102
Name: count, dtype: int64


  .apply(lambda x: x.sample(n=min_count, replace=False, random_state=SEED))


Map:   0%|          | 0/306 [00:00<?, ? examples/s]

Map:   0%|          | 0/143 [00:00<?, ? examples/s]

Map:   0%|          | 0/144 [00:00<?, ? examples/s]


[UNDERSAMPLING P1] Dataset dengan resampling:
DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 306
    })
    validation: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 143
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 144
    })
})

=== Running experiment: final_run_p1_undersampled ===
lr=3e-05, batch_size=16, epochs=10


model.safetensors:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None}.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,1.0606,0.913283,0.615385,0.51964
2,0.9213,0.961702,0.538462,0.442409
3,0.7674,0.822009,0.664336,0.590264
4,0.5744,0.853951,0.657343,0.583862
5,0.4668,0.811923,0.65035,0.616364
6,0.3495,0.89348,0.622378,0.599794
7,0.2615,0.917623,0.622378,0.57102
8,0.1659,0.934497,0.601399,0.56765
9,0.1204,0.922544,0.643357,0.602159
10,0.1027,0.972642,0.629371,0.585565



[VALIDATION RESULTS]
eval_loss: 0.8119
eval_accuracy: 0.6503
eval_f1_macro: 0.6164
eval_runtime: 1.1644
eval_samples_per_second: 122.8100
eval_steps_per_second: 7.7290
epoch: 10.0000

[TEST RESULTS]
eval_loss: 0.7808
eval_accuracy: 0.6528
eval_f1_macro: 0.5932
eval_runtime: 1.1397
eval_samples_per_second: 126.3480
eval_steps_per_second: 7.8970
epoch: 10.0000

✅ Model & tokenizer disimpan di: final_indobertweet_model_p1_undersampled/best_model

=== EVALUASI LENGKAP TEST SET - P1 + UNDERSAMPLING ===
Akurasi (sklearn)         : 0.6528
Precision Macro           : 0.5817
Recall Macro              : 0.6190
F1-Score Macro (sklearn)  : 0.5932
F1-Score Macro (Trainer)  : 0.5932
Akurasi (Trainer)         : 0.6528


In [16]:
# Laporan per kelas (neg, neu, pos)
target_names = ["neg", "neu", "pos"]
print("\n--- Classification Report per Kelas - P1 + UNDERSAMPLING ---")
print(classification_report(y_true_res, y_pred_res, target_names=target_names, digits=4))

# Confusion Matrix
cm_res = confusion_matrix(y_true_res, y_pred_res)
cm_res_df = pd.DataFrame(cm_res, index=target_names, columns=target_names)
print("\n--- Confusion Matrix - P1 + UNDERSAMPLING (rows = true, cols = pred) ---")
print(cm_res_df)

save_dir_res = "final_indobertweet_model_undersampling/best_model"
final_trainer_res.save_model(save_dir_res)
tokenizer.save_pretrained(save_dir_res)
print(f"\n✅ Final model & tokenizer disimpan di: {save_dir_res}")

# ============================================================
# Menyimpan Model ke Google Drive
# ============================================================

drive_save_dir_res = "/content/drive/MyDrive/ModelSkripsi/skripsi_indobertweet_final_model_undersampling"

# Kalau folder tujuan sudah ada, hapus dulu (opsional, supaya bersih)
if os.path.exists(drive_save_dir_res):
    shutil.rmtree(drive_save_dir_res)

shutil.copytree(save_dir_res, drive_save_dir_res)
print(f"✅ Model & tokenizer juga disalin ke Google Drive: {drive_save_dir_res}")


--- Classification Report per Kelas - P1 + UNDERSAMPLING ---
              precision    recall  f1-score   support

         neg     0.6667    0.6102    0.6372        59
         neu     0.7286    0.7083    0.7183        72
         pos     0.3500    0.5385    0.4242        13

    accuracy                         0.6528       144
   macro avg     0.5817    0.6190    0.5932       144
weighted avg     0.6690    0.6528    0.6585       144


--- Confusion Matrix - P1 + UNDERSAMPLING (rows = true, cols = pred) ---
     neg  neu  pos
neg   36   14    9
neu   17   51    4
pos    1    5    7

✅ Final model & tokenizer disimpan di: final_indobertweet_model_undersampling/best_model
✅ Model & tokenizer juga disalin ke Google Drive: /content/drive/MyDrive/ModelSkripsi/skripsi_indobertweet_final_model_undersampling


Kode Buat Penulisan

In [None]:
# Menampilkan distribusi label sebelum undersampling
print("\n[UNDERSAMPLING P1] Distribusi label TRAIN sebelum undersampling:")
print(train_df_r["label"].value_counts())

# Menentukan target jumlah sampel tiap kelas (mengikuti kelas dengan jumlah paling sedikit)
min_count = train_df_r["label"].value_counts().min()

# Random Undersampling: kurangi kelas mayoritas hingga semua kelas = min_count
train_resampled = (
    train_df_r
    .groupby("label", group_keys=False)
    .apply(lambda x: x.sample(n=min_count, replace=False, random_state=SEED))
    .reset_index(drop=True)
)

# Menampilkan distribusi label setelah undersampling
print("\n[UNDERSAMPLING P1] Distribusi label TRAIN setelah undersampling:")
print(train_resampled["label"].value_counts())

# Membentuk dataset kembali (train hasil undersampling, val & test tetap)
ds_resampled = DatasetDict({
    "train": Dataset.from_pandas(train_resampled, preserve_index=False),
    "validation": Dataset.from_pandas(val_df_r, preserve_index=False),
    "test": Dataset.from_pandas(test_df_r, preserve_index=False),
})

# Tokenisasi agar siap dipakai IndoBERTweet
ds_resampled_encoded = ds_resampled.map(tokenize, batched=True)

# Fine-tuning dan evaluasi menggunakan hyperparameter terbaik yang sama
val_res_under, test_res_under, final_trainer_under = run_experiment(
    ds_resampled_encoded,
    learning_rate=best_lr,
    batch_size=best_batch,
    num_epochs=best_epochs,
    output_dir="final_indobertweet_model_p1_undersampled",
    run_name="final_run_p1_undersampled",
)

# Evaluasi tambahan (opsional) untuk laporan lengkap test set
pred_output_under = final_trainer_under.predict(ds_resampled_encoded["test"])
y_true_under = pred_output_under.label_ids
y_pred_under = np.argmax(pred_output_under.predictions, axis=-1)

print("\n--- Classification Report per Kelas - P1 + UNDERSAMPLING ---")
print(classification_report(y_true_under, y_pred_under, target_names=["neg", "neu", "pos"], digits=4))

cm_under = confusion_matrix(y_true_under, y_pred_under)
cm_under_df = pd.DataFrame(cm_under, index=["neg", "neu", "pos"], columns=["neg", "neu", "pos"])
print("\n--- Confusion Matrix - P1 + UNDERSAMPLING ---")
print(cm_under_df)
