In [1]:
import pandas as pd
import numpy as np
import os
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb

warnings.filterwarnings('ignore')
# Konfigurasi Global
RANDOM_STATE = 42
all_scenario_results = []

In [2]:
def get_models(scaled=False):
    models = {}
    models['SVM'] = SVC(kernel='rbf', probability=True, random_state=RANDOM_STATE)
    
    # Tuning LogReg berdasarkan status scaling
    c_param = 0.01
    models['Logistic Regression'] = LogisticRegression(C=c_param, max_iter=1000, random_state=RANDOM_STATE)
    models['Decision Tree'] = DecisionTreeClassifier(random_state=RANDOM_STATE)
    models['Random Forest'] = RandomForestClassifier(n_estimators=200, random_state=RANDOM_STATE)
    models['XGBoost'] = xgb.XGBClassifier(eval_metric='logloss', random_state=RANDOM_STATE)
        
    return models

def evaluate_models(X_train, y_train, X_test, y_test, scenario_name, scaled_status):
    """
    Fungsi training dan evaluasi standar.
    Menambahkan hasil ke list global 'all_scenario_results'.
    """
    models = get_models(scaled=scaled_status)
    
    print(f"Running Skenario: {scenario_name}")
    print("-" * 60)
    
    for name, model in models.items():
        kategori = "ENSEMBLE" if name in ['Random Forest', 'XGBoost'] else "SINGLE"
        
        # Training
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        # Evaluasi (SAMA PERSIS DENGAN KODE SEBELUMNYA)
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
        
        # Simpan ke global list
        all_scenario_results.append({
            'Skenario': scenario_name,
            'Category': kategori,
            'Model': name,
            'Accuracy': acc,
            'Precision': prec,
            'Recall': rec,
            'F1-Score': f1
        })
        print(f"{name:<20} | F1: {f1:.2%}")
    print("\n")

In [3]:
# ================= KOMBINASI A =================
# Input: Non-Normalized CSV
# Proses: Tanpa Scaler
train_path = 'Dataset-Split/train_efficientnet_features(non normalized).csv'
test_path = 'Dataset-Split/test_efficientnet_features(non normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    # Jalankan Evaluasi (scaled_status=False)
    evaluate_models(X_train, y_train, X_test, y_test, 
                   scenario_name="Non-Norm + No Scaler", 
                   scaled_status=False)
else:
    print(f"File tidak ditemukan: {train_path}")

Running Skenario: Non-Norm + No Scaler
------------------------------------------------------------
SVM                  | F1: 89.53%
Logistic Regression  | F1: 84.70%
Decision Tree        | F1: 73.76%
Random Forest        | F1: 86.85%
XGBoost              | F1: 89.70%




In [4]:
# ================= KOMBINASI B =================
# Input: Non-Normalized CSV
# Proses: Dengan StandardScaler
train_path = 'Dataset-Split/train_efficientnet_features(non normalized).csv'
test_path = 'Dataset-Split/test_efficientnet_features(non normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    # Apply Scaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Jalankan Evaluasi (scaled_status=True)
    evaluate_models(X_train_scaled, y_train, X_test_scaled, y_test, 
                   scenario_name="Non-Norm + With Scaler", 
                   scaled_status=True)
else:
    print(f"File tidak ditemukan: {train_path}")

Running Skenario: Non-Norm + With Scaler
------------------------------------------------------------
SVM                  | F1: 90.57%
Logistic Regression  | F1: 91.10%
Decision Tree        | F1: 73.76%
Random Forest        | F1: 86.85%
XGBoost              | F1: 89.70%




In [5]:
# ================= KOMBINASI C =================
# Input: Normalized CSV (Bawaan EfficientNet)
# Proses: Tanpa Scaler
train_path = 'Dataset-Split/train_efficientnet_features(normalized).csv'
test_path = 'Dataset-Split/test_efficientnet_features(normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    # Jalankan Evaluasi (scaled_status=False)
    evaluate_models(X_train, y_train, X_test, y_test, 
                   scenario_name="Normalized + No Scaler", 
                   scaled_status=False)
else:
    print(f"File tidak ditemukan: {train_path}")

Running Skenario: Normalized + No Scaler
------------------------------------------------------------
SVM                  | F1: 89.81%
Logistic Regression  | F1: 86.99%
Decision Tree        | F1: 76.81%
Random Forest        | F1: 86.86%
XGBoost              | F1: 89.89%




In [6]:
# ================= KOMBINASI D =================
# Input: Normalized CSV
# Proses: Dengan StandardScaler
train_path = 'Dataset-Split/train_efficientnet_features(normalized).csv'
test_path = 'Dataset-Split/test_efficientnet_features(normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    # Apply Scaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Jalankan Evaluasi (scaled_status=True)
    evaluate_models(X_train_scaled, y_train, X_test_scaled, y_test, 
                   scenario_name="Normalized + With Scaler", 
                   scaled_status=True)
else:
    print(f"File tidak ditemukan: {train_path}")

Running Skenario: Normalized + With Scaler
------------------------------------------------------------
SVM                  | F1: 91.79%
Logistic Regression  | F1: 91.27%
Decision Tree        | F1: 76.81%
Random Forest        | F1: 86.86%
XGBoost              | F1: 89.89%




In [7]:
# ================= REKAPITULASI HASIL =================
if all_scenario_results:
    df_final = pd.DataFrame(all_scenario_results)
    
    # Urutkan berdasarkan F1-Score Tertinggi
    df_final = df_final.sort_values(by='F1-Score', ascending=False).reset_index(drop=True)
    
    print("="*80)
    print("HASIL AKHIR: PERBANDINGAN KOMBINASI PREPROCESSING (EFFICIENTNET)")
    print("="*80)
    
    # Format Tampilan Persen
    output_table = df_final.copy()
    for col in ['Accuracy', 'Precision', 'Recall', 'F1-Score']:
        output_table[col] = output_table[col].map('{:.2%}'.format)
    
    print(output_table.to_string(index=False))
    
    # Best Combination Logic
    best = df_final.iloc[0]
    print("\n" + "-"*80)
    print(f"BEST COMBINATION: {best['Skenario']}")
    print(f"MODEL           : {best['Model']}")
    print(f"F1-SCORE        : {best['F1-Score']:.2%}")
    print("-" * 80)
else:
    print("Belum ada hasil yang dijalankan.")

HASIL AKHIR: PERBANDINGAN KOMBINASI PREPROCESSING (EFFICIENTNET)
                Skenario Category               Model Accuracy Precision Recall F1-Score
Normalized + With Scaler   SINGLE                 SVM   91.82%    91.90% 91.82%   91.79%
Normalized + With Scaler   SINGLE Logistic Regression   91.23%    91.33% 91.23%   91.27%
  Non-Norm + With Scaler   SINGLE Logistic Regression   91.11%    91.11% 91.11%   91.10%
  Non-Norm + With Scaler   SINGLE                 SVM   90.64%    90.67% 90.64%   90.57%
Normalized + With Scaler ENSEMBLE             XGBoost   89.93%    89.90% 89.93%   89.89%
  Normalized + No Scaler ENSEMBLE             XGBoost   89.93%    89.90% 89.93%   89.89%
  Normalized + No Scaler   SINGLE                 SVM   89.81%    89.88% 89.81%   89.81%
  Non-Norm + With Scaler ENSEMBLE             XGBoost   89.81%    89.77% 89.81%   89.70%
    Non-Norm + No Scaler ENSEMBLE             XGBoost   89.81%    89.77% 89.81%   89.70%
    Non-Norm + No Scaler   SINGLE            