In [1]:
import os
import numpy as np
import pandas as pd
import random
import copy

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

from PIL import Image
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.transforms import functional as F

import re
from collections import Counter
from transformers import (
    AutoConfig,
    AutoTokenizer, 
    AutoModel,
    AutoModelForSequenceClassification
)

from tqdm import tqdm
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, 
    classification_report, roc_auc_score, confusion_matrix
)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

# %%
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# %%
seed = 0
def seed_everything(seed):
    os.environ["PYTHONHASHSEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(seed)

# %%
# Generator untuk DataLoader
g = torch.Generator()
g.manual_seed(seed)

def seed_worker(worker_id):
    """Fungsi untuk memastikan setiap worker memiliki seed yang sama"""
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

# %%
judi_images_path = "/kaggle/input/gamblingdet-id/situsjudiid-full/judi"
nonjudi_images_path = "/kaggle/input/gamblingdet-id/situsjudiid-full/non-judi"

train_csv_path = '/kaggle/input/train-test/train_data.csv'
test_csv_path = '/kaggle/input/train-test/test_data.csv'

# %%
# Load data
train_df = pd.read_csv(train_csv_path)
test_df = pd.read_csv(test_csv_path)
train_df

# %%
# Drop missing values
print('Missing values in Train:\n', train_df.isna().sum())
train_df = train_df.dropna()

# %%
# Add function to get full image path based on class
def get_image_path(filename, class_label):
    if class_label == 'judi':
        return os.path.join(judi_images_path, filename)
    else:  # non-judi
        return os.path.join(nonjudi_images_path, filename)

# %%
import re

def clean_texts(texts):
    cleaned_texts = []

    # Kata 1–2 huruf yang penting dan tidak boleh dihapus
    exceptions = {"di", "ke", "ya", "jl"}

    for text in texts:
        # ----- BASIC CLEANING -----
        text = re.sub(r"http\S+", "", text)                    # Hapus URL
        text = re.sub(r"\n", " ", text)                        # Ganti newline dengan spasi
        text = re.sub(r"[^a-zA-Z]", " ", text)             # Sisakan huruf, angka, apostrof
        text = re.sub(r"\s{2,}", " ", text).strip().lower()    # Hapus spasi ganda & ke lowercase

        # ----- FILTERING -----
        words = text.split()
        filtered_words = [
            w for w in words
            if (len(w) > 2 or w in exceptions)
            and not re.fullmatch(r"[aeiou]{3,}", w)            # Hindari vokal berulang ("aaa")
            and not re.fullmatch(r"[bcdfghjklmnpqrstvwxyz]{3,}", w)  # Hindari konsonan berulang ("kkk")
            and len(w) <= 20                                   # Batasi kata terlalu panjang
        ]

        cleaned_texts.append(" ".join(filtered_words))

    return cleaned_texts

# %%
# Bersihkan teks
train_df['cleaned_text'] = clean_texts(train_df['Extracted Text'])
test_df['cleaned_text'] = clean_texts(test_df['Extracted Text'])

# %%
# Drop rows with less than 5 words
train_df = train_df[train_df['cleaned_text'].apply(lambda x: len(str(x).split()) >= 5)]
test_df = test_df[test_df['cleaned_text'].apply(lambda x: len(str(x).split()) >= 5)]
train_df

# %%
# Cek jumlah duplikasi sebelum dihapus
print("Duplikasi di train:", train_df.duplicated(subset='cleaned_text').sum())
print("Duplikasi di test :", test_df.duplicated(subset='cleaned_text').sum())

# Hapus duplikasi berdasarkan cleaned_text
train_df = train_df.drop_duplicates(subset='cleaned_text').reset_index(drop=True)
test_df = test_df.drop_duplicates(subset='cleaned_text').reset_index(drop=True)

# Cek ulang setelah pembersihan
print("Setelah dihapus:")
print("Train:", len(train_df), "baris")
print("Test :", len(test_df), "baris")

# %%
# Print jumlah per kelas
print("Distribusi label di Train set:")
print(train_df['Class'].value_counts(), '\n')

print("Distribusi label di Test set:")
print(test_df['Class'].value_counts())

# %%
from sklearn.utils import resample

# Pisahkan data berdasarkan kelas
train_judi = train_df[train_df['Class'] == 'judi']
train_nonjudi = train_df[train_df['Class'] == 'non-judi']

# Undersampling kelas mayoritas (non-judi) agar jumlahnya sama dengan kelas judi
train_nonjudi_undersampled = resample(train_nonjudi,
                                      replace=False,      # tanpa duplikasi
                                      n_samples=len(train_judi),  # samakan jumlahnya dengan kelas minoritas
                                      random_state=seed)    # untuk replikasi hasil

# Gabungkan kembali data yang sudah diundersample
train_df_balanced = pd.concat([train_judi, train_nonjudi_undersampled])

# Cek distribusi baru
print("Distribusi label setelah undersampling:")
print(train_df_balanced['Class'].value_counts())

# %%
label_map = {
    "non-judi": 0,
    "judi": 1
}

train_df_balanced['label'] = train_df_balanced['Class'].map(label_map)
test_df['label'] = test_df['Class'].map(label_map)

train_df_balanced['image'] = train_df_balanced['File Name']
test_df['image'] = test_df['File Name']

# %%
traindf, validdf = train_test_split(
    train_df_balanced, test_size=0.2, stratify=train_df_balanced['label'], random_state=seed
)

print(f"Jumlah data train: {len(traindf)}")
print(f"Jumlah data valid: {len(validdf)}")
print(f"Jumlah data test: {len(test_df)}")

# %%
# Print jumlah per kelas
print("Distribusi label di Train set:")
print(traindf['label'].value_counts(), '\n')

print("Distribusi label di Validation set:")
print(validdf['label'].value_counts(), '\n')

print("Distribusi label di Test set:")
print(test_df['label'].value_counts())

# %%
class ResizePadToSquare:
    def __init__(self, target_size=300):
        self.target_size = target_size

    def __call__(self, img):
        img = img.convert("RGB")
        # Resize dengan thumbnail: jaga rasio, resize sisi terpanjang ke target
        img.thumbnail((self.target_size, self.target_size), Image.BILINEAR)
        # Padding agar menjadi square
        delta_w = self.target_size - img.size[0]
        delta_h = self.target_size - img.size[1]
        padding = (delta_w // 2, delta_h // 2, delta_w - delta_w // 2, delta_h - delta_h // 2)
        img = F.pad(img, padding, fill=0, padding_mode='constant')
        return img

# %%
transform_train = transforms.Compose([
    ResizePadToSquare(300),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

transform_test = transforms.Compose([
    ResizePadToSquare(300),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# %%
tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1')
max_length= 128

# %%
class CustomCombinedDataset(Dataset):
    def __init__(self, dataframe, images_path_judi, images_path_non_judi, tokenizer, max_length, transforms=None):
        self.dataframe = dataframe.reset_index(drop=True)  # Reset index to avoid KeyError
        self.images_path_judi = images_path_judi
        self.images_path_non_judi = images_path_non_judi
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.transforms = transforms
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        img_name = row['image']
        label = row['label']
        text = str(row['cleaned_text'])
        
        # Tentukan path berdasarkan label
        if label == 1:  # judi
            img_path = os.path.join(self.images_path_judi, img_name)
        else:  # non-judi (label == 0)
            img_path = os.path.join(self.images_path_non_judi, img_name)
        
        image = Image.open(img_path)
        
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            truncation=True,
            return_tensors='pt',
            padding='max_length'
        )
        
        input_ids = encoding['input_ids'].flatten()
        attention_mask = encoding['attention_mask'].flatten()
        
        if self.transforms is not None:
            image = self.transforms(image)
        
        return {
            'image': image,
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'label': torch.tensor(label, dtype=torch.long),
            'filename': img_name
        }

# %%
# Update inisialisasi dataset dengan kedua path
combined_train_dataset = CustomCombinedDataset(traindf, judi_images_path, nonjudi_images_path, tokenizer, max_length, transforms=transform_train)
combined_valid_dataset = CustomCombinedDataset(validdf, judi_images_path, nonjudi_images_path, tokenizer, max_length, transforms=transform_test)
combined_test_dataset = CustomCombinedDataset(test_df, judi_images_path, nonjudi_images_path, tokenizer, max_length, transforms=transform_test)

# %%
batch_size = 16
combined_train_loader = DataLoader(combined_train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, worker_init_fn=seed_worker, generator=g)
combined_valid_loader = DataLoader(combined_valid_dataset, batch_size=batch_size, shuffle=True, num_workers=0, worker_init_fn=seed_worker, generator=g)
combined_test_loader = DataLoader(combined_test_dataset, batch_size=batch_size, shuffle=False, num_workers=0, worker_init_fn=seed_worker, generator=g)

# %%
image_model_path = "/kaggle/input/gamblingwebi-v16/best_image_model_Adam_lr0.0001_bs16_state_dict.pt"
text_model_path = "/kaggle/input/gamblingwebt-v16/best_text_model_bs32_lr3e-05_ep3_state_dict.pt"

# %%
# Inisialisasi ulang model - SAMA DENGAN gamblingmultimodal-16.py
image_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
num_features = image_model.classifier[1].in_features
image_model.classifier = nn.Sequential(
    nn.Dropout(p=0.3),
    nn.Linear(num_features, 1)
)

# Load state dict
image_model.load_state_dict(torch.load(image_model_path, map_location=device))
image_model.to(device)
image_model.eval()

# Inisialisasi ulang model
text_model = AutoModelForSequenceClassification.from_pretrained(
    'indobenchmark/indobert-base-p1',
    num_labels=1
)

# Load state dict
text_model.load_state_dict(torch.load(text_model_path, map_location=device))
text_model.to(device)
text_model.eval()

# %%
# Define function to evaluate image model
def evaluate_image_model(model, data_loader):
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating Image Model"):
            images = batch['image'].to(device)
            labels = batch['label'].to(device)
            outputs = model(images).squeeze(1)
            preds = (torch.sigmoid(outputs) > 0.5).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())
    
    metrics = {
        'Accuracy': accuracy_score(all_labels, all_preds),
        'Precision': precision_score(all_labels, all_preds),
        'Recall': recall_score(all_labels, all_preds),
        'F1 Score': f1_score(all_labels, all_preds),
        'ConfusionMatrix': confusion_matrix(all_labels, all_preds)
    }
    return metrics

# %%
print("\n===== IMAGE MODEL EVALUATION =====")
image_metrics = evaluate_image_model(image_model, combined_test_loader)
print("\nTest Set Metrics:")
for metric_name, value in image_metrics.items():
    if metric_name != 'ConfusionMatrix':
        print(f"  {metric_name}: {value:.4f}")

# %%
def evaluate_text_model(model, data_loader):
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating Text Model"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask).logits.squeeze(1)
            preds = (torch.sigmoid(outputs) > 0.5).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())
    
    return {
        'Accuracy': accuracy_score(all_labels, all_preds),
        'Precision': precision_score(all_labels, all_preds),
        'Recall': recall_score(all_labels, all_preds),
        'F1 Score': f1_score(all_labels, all_preds),
        'ConfusionMatrix': confusion_matrix(all_labels, all_preds)
    }

# %%
print("\n===== TEXT MODEL EVALUATION =====")
text_metrics = evaluate_text_model(text_model, combined_test_loader)
print("\nTest Set Metrics:")
for metric_name, value in text_metrics.items():
    if metric_name != 'ConfusionMatrix':
        print(f"  {metric_name}: {value:.4f}")

# %%
class MLPFusionModel(nn.Module):
    def __init__(self, image_model, text_model, hidden_dim=16, dropout_rate=0.2):
        super(MLPFusionModel, self).__init__()
        self.image_model = image_model
        self.text_model = text_model
        
        # Freeze the pre-trained models
        for param in self.image_model.parameters():
            param.requires_grad = False
        for param in self.text_model.parameters():
            param.requires_grad = False
            
        # MLP fusion layers
        # Input dimension: 2 (image logit + text logit)
        self.fusion_mlp = nn.Sequential(
            nn.Linear(2, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim, 1) 
        )
        
        # Initialize MLP weights
        self._initialize_weights()
        
    def _initialize_weights(self):
        """Initialize MLP weights using Xavier initialization"""
        for module in self.fusion_mlp:
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                nn.init.zeros_(module.bias)
    
    def forward(self, images, input_ids, attention_mask):
        # Get predictions from individual models (no gradients needed)
        with torch.no_grad():
            image_logits = self.image_model(images).squeeze(1)
            text_logits = self.text_model(input_ids=input_ids, attention_mask=attention_mask).logits.squeeze(1)
        
        # Concatenate logits as input to MLP
        # Shape: (batch_size, 2)
        combined_features = torch.stack([image_logits, text_logits], dim=1)
        
        # Pass through MLP for fusion
        fused_logits = self.fusion_mlp(combined_features).squeeze(1)
        
        return fused_logits, image_logits, text_logits

# %%
def train_mlp_fusion_model(fusion_model, train_loader, valid_loader, criterion, optimizer, device, fusion_epochs=10, patience=5, exp_name=""):
    best_loss = float('inf')
    best_model_state = None
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    
    patience_counter = 0
    early_stopped = False
    
    # Gunakan nama file yang spesifik untuk setiap eksperimen
    model_save_path = f'best_mlp_fusion_model_{exp_name}_state_dict.pt' if exp_name else 'best_mlp_fusion_model_state_dict.pt'

    for epoch in range(fusion_epochs):
        # === TRAINING ===
        fusion_model.train()
        total_train_loss = 0.0
        correct_train = 0
        total_train = 0

        for batch in tqdm(train_loader, desc=f"[Epoch {epoch+1}] Training"):
            images = batch['image'].to(device)
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].float().to(device)

            optimizer.zero_grad()
            fused_logits, _, _ = fusion_model(images, input_ids, attention_mask)
            loss = criterion(fused_logits, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(fusion_model.fusion_mlp.parameters(), max_norm=1.0)
            optimizer.step()

            total_train_loss += loss.item()

            preds = (torch.sigmoid(fused_logits) > 0.5).long()
            correct_train += (preds.squeeze() == labels.long()).sum().item()
            total_train += labels.size(0)

        avg_train_loss = total_train_loss / len(train_loader)
        train_accuracy = correct_train / total_train
        train_losses.append(avg_train_loss)
        train_accuracies.append(train_accuracy)

        # === VALIDATION ===
        fusion_model.eval()
        total_val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for batch in tqdm(valid_loader, desc=f"[Epoch {epoch+1}] Validation", leave=False):
                images = batch['image'].to(device)
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['label'].float().to(device)

                fused_logits, _, _ = fusion_model(images, input_ids, attention_mask)
                loss = criterion(fused_logits, labels)
                total_val_loss += loss.item()

                preds = (torch.sigmoid(fused_logits) > 0.5).long()
                correct_val += (preds.squeeze() == labels.long()).sum().item()
                total_val += labels.size(0)

        avg_val_loss = total_val_loss / len(valid_loader)
        val_accuracy = correct_val / total_val
        val_losses.append(avg_val_loss)
        val_accuracies.append(val_accuracy)

        # === Early Stopping ===
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            best_model_state = fusion_model.state_dict()  # tidak perlu .copy()
            torch.save(best_model_state, model_save_path)
            patience_counter = 0
            print(f"Epoch {epoch+1}/{fusion_epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | ✅ Best model saved!")
        else:
            patience_counter += 1
            print(f"Epoch {epoch+1}/{fusion_epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Patience: {patience_counter}/{patience}")
            if patience_counter >= patience:
                print(f"\n🛑 Early stopping at epoch {epoch+1}. No improvement for {patience} epochs.")
                early_stopped = True
                break

    if best_model_state is not None:
        fusion_model.load_state_dict(best_model_state)
        print(f"\n✅ Loaded best model with validation loss: {best_loss:.4f}")

    print(f"🔍 Best Validation Loss: {best_loss:.4f}")
    return train_losses, val_losses, train_accuracies, val_accuracies, model_save_path

# %%
import time

def evaluate_mlp_fusion_model(fusion_model, data_loader):
    fusion_model.eval()
    all_preds = []
    all_labels = []
    all_probs = []
    start_time = time.time()  # Mulai pencatatan waktu
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating MLP Fusion Model"):
            images = batch['image'].to(device)
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            
            fused_logits, _, _ = fusion_model(images, input_ids, attention_mask)
            fused_probs = torch.sigmoid(fused_logits).cpu().numpy()
            fused_preds = (fused_probs > 0.5).astype(int)
            
            all_preds.extend(fused_preds)
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(fused_probs)
    
    end_time = time.time()
    total_time = end_time - start_time
    avg_time_per_sample = total_time / len(data_loader.dataset)
    
    print(f"\nTotal inference time: {total_time:.2f} seconds")
    print(f"Average time per sample: {avg_time_per_sample * 1000:.2f} ms")
    
    return {
        'Accuracy': accuracy_score(all_labels, all_preds),
        'Precision': precision_score(all_labels, all_preds),
        'Recall': recall_score(all_labels, all_preds),
        'F1 Score': f1_score(all_labels, all_preds),
        'AUC': roc_auc_score(all_labels, all_probs),  # Use probabilities for AUC
        'ConfusionMatrix': confusion_matrix(all_labels, all_preds),
        'InferenceTime': total_time,
        'TimePerSample': avg_time_per_sample
    }

def run_experiments_consistent():
    """
    Run experiments with EXACTLY the same approach as gamblingmultimodal-16.py
    """
    experiments = [
        ("Exp1_4neurons", 4, 0.1),
        ("Exp2_8neurons", 8, 0.15),
        ("Exp3_16neurons", 16, 0.2), # baseline model
        ("Exp4_32neurons", 32, 0.25),
        ("Exp5_64neurons", 64, 0.3)
    ]

    results = {}

    for exp_name, hidden_dim, dropout_rate in experiments:
        # === RESET SEED DAN MODEL DI SETIAP EKSPERIMEN ===
        seed_everything(seed)

        # Inisialisasi ulang image_model
        image_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
        num_features = image_model.classifier[1].in_features
        image_model.classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(num_features, 1)
        )
        image_model.load_state_dict(torch.load(image_model_path, map_location=device))
        image_model.to(device)
        image_model.eval()

        # Inisialisasi ulang text_model
        text_model = AutoModelForSequenceClassification.from_pretrained(
            'indobenchmark/indobert-base-p1',
            num_labels=1
        )
        text_model.load_state_dict(torch.load(text_model_path, map_location=device))
        text_model.to(device)
        text_model.eval()

        print(f"\n{'='*50}")
        print(f"RUNNING {exp_name.upper()}")
        print(f"{'='*50}")
        
        # 🔁 Buat fusion model baru dengan parameter yang sama seperti gamblingmultimodal-16.py
        fusion_model = MLPFusionModel(image_model, text_model, hidden_dim=hidden_dim, dropout_rate=dropout_rate)
        fusion_model.to(device)

        # 🔧 Setup optimizer dan loss - SAMA DENGAN gamblingmultimodal-16.py
        optimizer = optim.Adam(fusion_model.fusion_mlp.parameters(), lr=0.0001, weight_decay=1e-4)
        criterion = nn.BCEWithLogitsLoss()

        # 🔁 Train model
        train_losses, val_losses, train_accuracies, val_accuracies, model_save_path = train_mlp_fusion_model(
            fusion_model=fusion_model,
            train_loader=combined_train_loader,
            valid_loader=combined_valid_loader,
            criterion=criterion,
            optimizer=optimizer,
            device=device,
            fusion_epochs=15,
            patience=3,
            exp_name=exp_name
        )

        # ✅ Load model terbaik dan evaluasi - SAMA DENGAN gamblingmultimodal-16.py
        # Inisialisasi ulang image_model dan text_model untuk evaluasi
        image_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
        num_features = image_model.classifier[1].in_features
        image_model.classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(num_features, 1)
        )
        image_model.load_state_dict(torch.load(image_model_path, map_location=device))
        image_model.to(device)
        image_model.eval()

        text_model = AutoModelForSequenceClassification.from_pretrained(
            'indobenchmark/indobert-base-p1',
            num_labels=1
        )
        text_model.load_state_dict(torch.load(text_model_path, map_location=device))
        text_model.to(device)
        text_model.eval()

        fusion_model = MLPFusionModel(image_model, text_model, hidden_dim=hidden_dim, dropout_rate=dropout_rate)
        fusion_model.load_state_dict(torch.load(model_save_path, map_location=device))
        fusion_model.to(device)

        test_metrics = evaluate_mlp_fusion_model(fusion_model, combined_test_loader)

        results[exp_name] = {
            'test_metrics': test_metrics,
            'train_losses': train_losses,
            'val_losses': val_losses,
            'train_accuracies': train_accuracies,
            'val_accuracies': val_accuracies,
            'hidden_dim': hidden_dim
        }

        # Save model dengan nama yang spesifik untuk setiap eksperimen
        torch.save(fusion_model, f'best_mlp_fusion_model_{exp_name}.pt')

        print(f"\n{exp_name} Test Results:")
        for metric_name, value in test_metrics.items():
            if metric_name not in ['ConfusionMatrix', 'InferenceTime', 'TimePerSample']:
                print(f"  {metric_name}: {value:.4f}")

    return results

# %%
# MAIN EXECUTION
if __name__ == "__main__":
    print("🚀 Starting experiments with CONSISTENT approach (same as gamblingmultimodal-16.py)")
    print("✅ Using same model loading, seed management, and DataLoader approach")
    
    # Jalankan semua eksperimen dengan pendekatan yang KONSISTEN dengan gamblingmultimodal-16.py
    results = run_experiments_consistent()
    
    # Create comparison DataFrame
    metrics_to_compare = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'AUC']
    comparison_data = []
    
    for exp_name, exp_results in results.items():
        row = [exp_name]
        for metric in metrics_to_compare:
            row.append(exp_results['test_metrics'][metric])
        comparison_data.append(row)
    
    # Create DataFrame
    columns = ['Experiment'] + metrics_to_compare
    comparison_df = pd.DataFrame(comparison_data, columns=columns)
    
    print("\n" + "="*70)
    print("COMPARISON OF ALL EXPERIMENTS")
    print("="*70)
    print(comparison_df.round(4).to_string(index=False))
    
    # Find best experiment for each metric
    print("\n" + "="*70)
    print("BEST PERFORMANCE PER METRIC")
    print("="*70)
    for metric in metrics_to_compare:
        best_idx = comparison_df[metric].idxmax()
        best_exp = comparison_df.iloc[best_idx]['Experiment']
        best_score = comparison_df.iloc[best_idx][metric]
        print(f"{metric:12}: {best_exp:15} ({best_score:.4f})")
    
    # Export results to CSV
    comparison_df.to_csv('mlp_fusion_experiments_consistent.csv', index=False)
    print(f"\nResults exported to: mlp_fusion_experiments_consistent.csv")
    
    # Print summary
    print(f"\n{'='*70}")
    print("EXPERIMENT SUMMARY")
    print(f"{'='*70}")
    print(f"Total experiments run: {len(results)}")
    print(f"Neuron configurations tested: {[exp.split('_')[1] for exp in results.keys()]}")
    
    # Find best overall experiment
    best_overall = comparison_df.loc[comparison_df['F1 Score'].idxmax()]
    print(f"Best overall experiment: {best_overall['Experiment']} (F1: {best_overall['F1 Score']:.4f})")
    
    print(f"\nAll models saved with prefix: 'best_mlp_fusion_model_'")
    print(f"State dicts saved with prefix: 'best_mlp_fusion_model_*_state_dict.pt'")
    print(f"\n✅ This version is COMPLETELY CONSISTENT with gamblingmultimodal-16.py approach!")
    print(f"✅ Exp3_16neurons should produce IDENTICAL results to gamblingmultimodal-16.py") 

Missing values in Train:
 File Name         0
Extracted Text    0
Class             0
dtype: int64
Duplikasi di train: 8
Duplikasi di test : 0
Setelah dihapus:
Train: 3335 baris
Test : 700 baris
Distribusi label di Train set:
Class
non-judi    1740
judi        1595
Name: count, dtype: int64 

Distribusi label di Test set:
Class
judi        350
non-judi    350
Name: count, dtype: int64
Distribusi label setelah undersampling:
Class
judi        1595
non-judi    1595
Name: count, dtype: int64
Jumlah data train: 2552
Jumlah data valid: 638
Jumlah data test: 700
Distribusi label di Train set:
label
1    1276
0    1276
Name: count, dtype: int64 

Distribusi label di Validation set:
label
0    319
1    319
Name: count, dtype: int64 

Distribusi label di Test set:
label
1    350
0    350
Name: count, dtype: int64


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/efficientnet_b3_rwightman-b3899882.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b3_rwightman-b3899882.pth
100%|██████████| 47.2M/47.2M [00:00<00:00, 90.4MB/s]
2025-07-09 00:23:06.983683: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752020587.173986      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752020587.227865      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]


===== IMAGE MODEL EVALUATION =====


Evaluating Image Model: 100%|██████████| 44/44 [00:33<00:00,  1.32it/s]



Test Set Metrics:
  Accuracy: 0.9729
  Precision: 0.9688
  Recall: 0.9771
  F1 Score: 0.9730

===== TEXT MODEL EVALUATION =====


Evaluating Text Model: 100%|██████████| 44/44 [00:25<00:00,  1.70it/s]



Test Set Metrics:
  Accuracy: 0.9857
  Precision: 0.9885
  Recall: 0.9829
  F1 Score: 0.9857
🚀 Starting experiments with CONSISTENT approach (same as gamblingmultimodal-16.py)
✅ Using same model loading, seed management, and DataLoader approach


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUNNING EXP1_4NEURONS


[Epoch 1] Training: 100%|██████████| 160/160 [02:24<00:00,  1.11it/s]
                                                                     

Epoch 1/15 | Train Loss: 1.2077 | Val Loss: 1.1111 | ✅ Best model saved!


[Epoch 2] Training: 100%|██████████| 160/160 [01:53<00:00,  1.41it/s]
                                                                     

Epoch 2/15 | Train Loss: 1.0756 | Val Loss: 1.0552 | ✅ Best model saved!


[Epoch 3] Training: 100%|██████████| 160/160 [01:53<00:00,  1.41it/s]
                                                                     

Epoch 3/15 | Train Loss: 0.9632 | Val Loss: 0.8556 | ✅ Best model saved!


[Epoch 4] Training: 100%|██████████| 160/160 [01:53<00:00,  1.41it/s]
                                                                     

Epoch 4/15 | Train Loss: 0.8238 | Val Loss: 0.8326 | ✅ Best model saved!


[Epoch 5] Training: 100%|██████████| 160/160 [01:53<00:00,  1.41it/s]
                                                                     

Epoch 5/15 | Train Loss: 0.7503 | Val Loss: 0.7046 | ✅ Best model saved!


[Epoch 6] Training: 100%|██████████| 160/160 [01:53<00:00,  1.41it/s]
                                                                     

Epoch 6/15 | Train Loss: 0.6295 | Val Loss: 0.5715 | ✅ Best model saved!


[Epoch 7] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 7/15 | Train Loss: 0.5501 | Val Loss: 0.5154 | ✅ Best model saved!


[Epoch 8] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 8/15 | Train Loss: 0.4577 | Val Loss: 0.4446 | ✅ Best model saved!


[Epoch 9] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 9/15 | Train Loss: 0.3883 | Val Loss: 0.3770 | ✅ Best model saved!


[Epoch 10] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 10/15 | Train Loss: 0.3158 | Val Loss: 0.3130 | ✅ Best model saved!


[Epoch 11] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 11/15 | Train Loss: 0.2680 | Val Loss: 0.2637 | ✅ Best model saved!


[Epoch 12] Training: 100%|██████████| 160/160 [01:53<00:00,  1.41it/s]
                                                                      

Epoch 12/15 | Train Loss: 0.2288 | Val Loss: 0.2187 | ✅ Best model saved!


[Epoch 13] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 13/15 | Train Loss: 0.1885 | Val Loss: 0.1855 | ✅ Best model saved!


[Epoch 14] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 14/15 | Train Loss: 0.1621 | Val Loss: 0.1581 | ✅ Best model saved!


[Epoch 15] Training: 100%|██████████| 160/160 [01:53<00:00,  1.41it/s]
                                                                      

Epoch 15/15 | Train Loss: 0.1496 | Val Loss: 0.1403 | ✅ Best model saved!

✅ Loaded best model with validation loss: 0.1403
🔍 Best Validation Loss: 0.1403


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating MLP Fusion Model: 100%|██████████| 44/44 [00:28<00:00,  1.56it/s]



Total inference time: 28.20 seconds
Average time per sample: 40.28 ms

Exp1_4neurons Test Results:
  Accuracy: 0.9786
  Precision: 0.9614
  Recall: 0.9971
  F1 Score: 0.9790
  AUC: 0.9994


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUNNING EXP2_8NEURONS


[Epoch 1] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 1/15 | Train Loss: 0.0842 | Val Loss: 0.0568 | ✅ Best model saved!


[Epoch 2] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 2/15 | Train Loss: 0.0743 | Val Loss: 0.0541 | ✅ Best model saved!


[Epoch 3] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 3/15 | Train Loss: 0.0723 | Val Loss: 0.0518 | ✅ Best model saved!


[Epoch 4] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 4/15 | Train Loss: 0.0688 | Val Loss: 0.0507 | ✅ Best model saved!


[Epoch 5] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 5/15 | Train Loss: 0.0682 | Val Loss: 0.0499 | ✅ Best model saved!


[Epoch 6] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 6/15 | Train Loss: 0.0606 | Val Loss: 0.0486 | ✅ Best model saved!


[Epoch 7] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 7/15 | Train Loss: 0.0502 | Val Loss: 0.0479 | ✅ Best model saved!


[Epoch 8] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 8/15 | Train Loss: 0.0477 | Val Loss: 0.0469 | ✅ Best model saved!


[Epoch 9] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 9/15 | Train Loss: 0.0418 | Val Loss: 0.0432 | ✅ Best model saved!


[Epoch 10] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 10/15 | Train Loss: 0.0333 | Val Loss: 0.0439 | Patience: 1/3


[Epoch 11] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 11/15 | Train Loss: 0.0392 | Val Loss: 0.0449 | Patience: 2/3


[Epoch 12] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 12/15 | Train Loss: 0.0358 | Val Loss: 0.0428 | ✅ Best model saved!


[Epoch 13] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 13/15 | Train Loss: 0.0331 | Val Loss: 0.0414 | ✅ Best model saved!


[Epoch 14] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 14/15 | Train Loss: 0.0251 | Val Loss: 0.0461 | Patience: 1/3


[Epoch 15] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 15/15 | Train Loss: 0.0258 | Val Loss: 0.0436 | Patience: 2/3

✅ Loaded best model with validation loss: 0.0414
🔍 Best Validation Loss: 0.0414


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating MLP Fusion Model: 100%|██████████| 44/44 [00:27<00:00,  1.57it/s]



Total inference time: 27.98 seconds
Average time per sample: 39.98 ms

Exp2_8neurons Test Results:
  Accuracy: 0.9957
  Precision: 1.0000
  Recall: 0.9914
  F1 Score: 0.9957
  AUC: 0.9990


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUNNING EXP3_16NEURONS


[Epoch 1] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 1/15 | Train Loss: 1.1418 | Val Loss: 0.7574 | ✅ Best model saved!


[Epoch 2] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 2/15 | Train Loss: 0.7004 | Val Loss: 0.3525 | ✅ Best model saved!


[Epoch 3] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 3/15 | Train Loss: 0.3685 | Val Loss: 0.1347 | ✅ Best model saved!


[Epoch 4] Training: 100%|██████████| 160/160 [01:55<00:00,  1.39it/s]
                                                                     

Epoch 4/15 | Train Loss: 0.2182 | Val Loss: 0.0579 | ✅ Best model saved!


[Epoch 5] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 5/15 | Train Loss: 0.1166 | Val Loss: 0.0423 | ✅ Best model saved!


[Epoch 6] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 6/15 | Train Loss: 0.0842 | Val Loss: 0.0369 | ✅ Best model saved!


[Epoch 7] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 7/15 | Train Loss: 0.0500 | Val Loss: 0.0356 | ✅ Best model saved!


[Epoch 8] Training: 100%|██████████| 160/160 [01:55<00:00,  1.39it/s]
                                                                     

Epoch 8/15 | Train Loss: 0.0457 | Val Loss: 0.0373 | Patience: 1/3


[Epoch 9] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 9/15 | Train Loss: 0.0399 | Val Loss: 0.0354 | ✅ Best model saved!


[Epoch 10] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 10/15 | Train Loss: 0.0288 | Val Loss: 0.0351 | ✅ Best model saved!


[Epoch 11] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                      

Epoch 11/15 | Train Loss: 0.0242 | Val Loss: 0.0361 | Patience: 1/3


[Epoch 12] Training: 100%|██████████| 160/160 [01:55<00:00,  1.39it/s]
                                                                      

Epoch 12/15 | Train Loss: 0.0199 | Val Loss: 0.0356 | Patience: 2/3


[Epoch 13] Training: 100%|██████████| 160/160 [01:55<00:00,  1.39it/s]
                                                                      

Epoch 13/15 | Train Loss: 0.0150 | Val Loss: 0.0354 | Patience: 3/3

🛑 Early stopping at epoch 13. No improvement for 3 epochs.

✅ Loaded best model with validation loss: 0.0351
🔍 Best Validation Loss: 0.0351


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating MLP Fusion Model: 100%|██████████| 44/44 [00:28<00:00,  1.55it/s]



Total inference time: 28.34 seconds
Average time per sample: 40.49 ms

Exp3_16neurons Test Results:
  Accuracy: 0.9971
  Precision: 0.9971
  Recall: 0.9971
  F1 Score: 0.9971
  AUC: 0.9999


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUNNING EXP4_32NEURONS


[Epoch 1] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 1/15 | Train Loss: 0.3574 | Val Loss: 0.1268 | ✅ Best model saved!


[Epoch 2] Training: 100%|██████████| 160/160 [01:55<00:00,  1.39it/s]
                                                                     

Epoch 2/15 | Train Loss: 0.1798 | Val Loss: 0.0677 | ✅ Best model saved!


[Epoch 3] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 3/15 | Train Loss: 0.0859 | Val Loss: 0.0516 | ✅ Best model saved!


[Epoch 4] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 4/15 | Train Loss: 0.0544 | Val Loss: 0.0422 | ✅ Best model saved!


[Epoch 5] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 5/15 | Train Loss: 0.0334 | Val Loss: 0.0379 | ✅ Best model saved!


[Epoch 6] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 6/15 | Train Loss: 0.0271 | Val Loss: 0.0364 | ✅ Best model saved!


[Epoch 7] Training: 100%|██████████| 160/160 [01:55<00:00,  1.39it/s]
                                                                     

Epoch 7/15 | Train Loss: 0.0197 | Val Loss: 0.0356 | ✅ Best model saved!


[Epoch 8] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 8/15 | Train Loss: 0.0171 | Val Loss: 0.0342 | ✅ Best model saved!


[Epoch 9] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 9/15 | Train Loss: 0.0130 | Val Loss: 0.0349 | Patience: 1/3


[Epoch 10] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 10/15 | Train Loss: 0.0135 | Val Loss: 0.0343 | Patience: 2/3


[Epoch 11] Training: 100%|██████████| 160/160 [01:55<00:00,  1.39it/s]
                                                                      

Epoch 11/15 | Train Loss: 0.0119 | Val Loss: 0.0359 | Patience: 3/3

🛑 Early stopping at epoch 11. No improvement for 3 epochs.

✅ Loaded best model with validation loss: 0.0342
🔍 Best Validation Loss: 0.0342


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating MLP Fusion Model: 100%|██████████| 44/44 [00:28<00:00,  1.57it/s]



Total inference time: 28.05 seconds
Average time per sample: 40.07 ms

Exp4_32neurons Test Results:
  Accuracy: 0.9943
  Precision: 0.9915
  Recall: 0.9971
  F1 Score: 0.9943
  AUC: 0.9999


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUNNING EXP5_64NEURONS


[Epoch 1] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 1/15 | Train Loss: 0.8609 | Val Loss: 0.3780 | ✅ Best model saved!


[Epoch 2] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                     

Epoch 2/15 | Train Loss: 0.2759 | Val Loss: 0.1132 | ✅ Best model saved!


[Epoch 3] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 3/15 | Train Loss: 0.0755 | Val Loss: 0.0485 | ✅ Best model saved!


[Epoch 4] Training: 100%|██████████| 160/160 [01:55<00:00,  1.39it/s]
                                                                     

Epoch 4/15 | Train Loss: 0.0291 | Val Loss: 0.0384 | ✅ Best model saved!


[Epoch 5] Training: 100%|██████████| 160/160 [01:55<00:00,  1.38it/s]
                                                                     

Epoch 5/15 | Train Loss: 0.0192 | Val Loss: 0.0329 | ✅ Best model saved!


[Epoch 6] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 6/15 | Train Loss: 0.0130 | Val Loss: 0.0306 | ✅ Best model saved!


[Epoch 7] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 7/15 | Train Loss: 0.0106 | Val Loss: 0.0295 | ✅ Best model saved!


[Epoch 8] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                     

Epoch 8/15 | Train Loss: 0.0082 | Val Loss: 0.0289 | ✅ Best model saved!


[Epoch 9] Training: 100%|██████████| 160/160 [01:55<00:00,  1.38it/s]
                                                                     

Epoch 9/15 | Train Loss: 0.0056 | Val Loss: 0.0286 | ✅ Best model saved!


[Epoch 10] Training: 100%|██████████| 160/160 [01:56<00:00,  1.38it/s]
                                                                      

Epoch 10/15 | Train Loss: 0.0051 | Val Loss: 0.0288 | Patience: 1/3


[Epoch 11] Training: 100%|██████████| 160/160 [01:56<00:00,  1.38it/s]
                                                                      

Epoch 11/15 | Train Loss: 0.0045 | Val Loss: 0.0287 | Patience: 2/3


[Epoch 12] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                      

Epoch 12/15 | Train Loss: 0.0040 | Val Loss: 0.0279 | ✅ Best model saved!


[Epoch 13] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                      

Epoch 13/15 | Train Loss: 0.0039 | Val Loss: 0.0286 | Patience: 1/3


[Epoch 14] Training: 100%|██████████| 160/160 [01:54<00:00,  1.40it/s]
                                                                      

Epoch 14/15 | Train Loss: 0.0034 | Val Loss: 0.0293 | Patience: 2/3


[Epoch 15] Training: 100%|██████████| 160/160 [01:54<00:00,  1.39it/s]
                                                                      

Epoch 15/15 | Train Loss: 0.0038 | Val Loss: 0.0285 | Patience: 3/3

🛑 Early stopping at epoch 15. No improvement for 3 epochs.

✅ Loaded best model with validation loss: 0.0279
🔍 Best Validation Loss: 0.0279


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating MLP Fusion Model: 100%|██████████| 44/44 [00:28<00:00,  1.57it/s]



Total inference time: 28.07 seconds
Average time per sample: 40.09 ms

Exp5_64neurons Test Results:
  Accuracy: 0.9914
  Precision: 1.0000
  Recall: 0.9829
  F1 Score: 0.9914
  AUC: 1.0000

COMPARISON OF ALL EXPERIMENTS
    Experiment  Accuracy  Precision  Recall  F1 Score    AUC
 Exp1_4neurons    0.9786     0.9614  0.9971    0.9790 0.9994
 Exp2_8neurons    0.9957     1.0000  0.9914    0.9957 0.9990
Exp3_16neurons    0.9971     0.9971  0.9971    0.9971 0.9999
Exp4_32neurons    0.9943     0.9915  0.9971    0.9943 0.9999
Exp5_64neurons    0.9914     1.0000  0.9829    0.9914 1.0000

BEST PERFORMANCE PER METRIC
Accuracy    : Exp3_16neurons  (0.9971)
Precision   : Exp2_8neurons   (1.0000)
Recall      : Exp1_4neurons   (0.9971)
F1 Score    : Exp3_16neurons  (0.9971)
AUC         : Exp5_64neurons  (1.0000)

Results exported to: mlp_fusion_experiments_consistent.csv

EXPERIMENT SUMMARY
Total experiments run: 5
Neuron configurations tested: ['4neurons', '8neurons', '16neurons', '32neurons', '64