In [2]:
import pandas as pd
import numpy as np
import os
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# Matikan warning agar output bersih
warnings.filterwarnings('ignore')

# Cek & Import Library Boosting (Ensemble)
try:
    import xgboost as xgb
except ImportError:
    xgb = None
try:
    import lightgbm as lgb
except ImportError:
    lgb = None

# ================= KONFIGURASI PATH =================
# UPDATE: Path disesuaikan dengan folder "Dataset CSV" dan nama file baru
DATASETS = {
    'ResNet50': 'Dataset CSV/resnet50_features(normalized).csv'
}

RANDOM_STATE = 42
TEST_SIZE = 0.2

# ================= DEFINISI MODEL SESUAI FLOWCHART =================
def get_models():
    models = {}
    
    # --- KELOMPOK 1: SINGLE MODELS ---
    
    # 1. SVM (Wajib Scaler -> Sudah Aman)
    models['SVM'] = SVC(kernel='rbf', probability=True, random_state=RANDOM_STATE)
    
    # 2. LOGISTIC REGRESSION (TUNED)
    # C=100.0 digunakan untuk mengimbangi efek Scaling agar performa tetap tinggi
    models['Logistic Regression'] = LogisticRegression(
        C=0.1,             
        max_iter=1000, 
        random_state=RANDOM_STATE
    )
    
    # 3. Decision Tree
    models['Decision Tree'] = DecisionTreeClassifier(random_state=RANDOM_STATE)
    
    # --- KELOMPOK 2: ENSEMBLE MODELS ---
    models['Random Forest'] = RandomForestClassifier(n_estimators=200, random_state=RANDOM_STATE)
    
    if xgb is not None:
        models['XGBoost'] = xgb.XGBClassifier(eval_metric='logloss', random_state=RANDOM_STATE)
    
    if lgb is not None:
        models['LightGBM'] = lgb.LGBMClassifier(random_state=RANDOM_STATE, verbose=-1)
        
    return models

# ================= FUNGSI UTAMA =================
def run_scenario_1_base_model():
    print("="*80)
    print("SKENARIO 1: BASE MODEL COMPARISON (ResNet50 Only)")
    print("Pipeline: Scaler + Tuned LogReg (C=100)")
    print("="*80)
    
    final_results = []

    # 1. Loop Dataset (Hanya ResNet50)
    for dataset_name, csv_path in DATASETS.items():
        if not os.path.exists(csv_path):
            print(f"‚ö†Ô∏è File tidak ditemukan di path: {csv_path}")
            print("   Pastikan folder 'Dataset CSV' ada di lokasi yang sama dengan script ini.")
            continue
            
        print(f"\nüìÇ Memproses Fitur: {dataset_name}")
        print(f"   Path File: {csv_path}")
        
        # Load Data
        df = pd.read_csv(csv_path)
        feature_cols = [c for c in df.columns if c.startswith('feature_')]
        X = df[feature_cols].values
        y = df['label_encoded'].values
        
        # Split Data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y
        )
        
        # Scaling (Tetap dilakukan untuk memastikan distribusi optimal bagi SVM & LogReg)
        # Walaupun nama filenya 'normalized', StandardScaler (Z-Score) seringkali
        # lebih disukai SVM daripada sekadar MinMax (0-1).
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # Ambil daftar model
        models_dict = get_models()
        
        # 2. Loop Training Model
        for model_name, model in models_dict.items():
            kategori = "ENSEMBLE" if model_name in ['Random Forest', 'XGBoost', 'LightGBM'] else "SINGLE"
            
            print(f"   üëâ [{kategori}] Training {model_name}...", end=" ")
            
            # Train
            model.fit(X_train_scaled, y_train)
            
            # Predict
            y_pred = model.predict(X_test_scaled)
            
            # Evaluasi
            acc = accuracy_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
            rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
            f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
            
            print(f"Done. (F1: {f1:.2%})")
            
            # Simpan hasil
            final_results.append({
                'Feature Extractor': dataset_name,
                'Category': kategori,
                'Model': model_name,
                'Accuracy': acc,
                'Precision': prec,
                'Recall': rec,
                'F1-Score': f1
            })

    # ================= OUTPUT HASIL =================
    if final_results:
        print("\n" + "="*80)
        print("üèÜ HASIL SKENARIO 1: PERBANDINGAN MODEL (BASE)")
        print("="*80)
        
        df_results = pd.DataFrame(final_results)
        
        # Urutkan berdasarkan F1-Score tertinggi
        df_results = df_results.sort_values('F1-Score', ascending=False).reset_index(drop=True)
        
        # Format angka
        output_table = df_results.copy()
        for col in ['Accuracy', 'Precision', 'Recall', 'F1-Score']:
            output_table[col] = output_table[col].map('{:.2%}'.format)
            
        print(output_table.to_string(index=False))
        
        # Highlight Juara
        best = df_results.iloc[0]
        print("\n" + "-"*80)
        print(f"ü•á BEST MODEL: {best['Model']}")
        print(f"   Score: {best['F1-Score']:.2%}")
        print("-"*80)

if __name__ == '__main__':
    run_scenario_1_base_model()

SKENARIO 1: BASE MODEL COMPARISON (ResNet50 Only)
Pipeline: Scaler + Tuned LogReg (C=100)

üìÇ Memproses Fitur: ResNet50
   Path File: Dataset CSV/resnet50_features(normalized).csv
   üëâ [SINGLE] Training SVM... Done. (F1: 90.49%)
   üëâ [SINGLE] Training Logistic Regression... Done. (F1: 90.75%)
   üëâ [SINGLE] Training Decision Tree... Done. (F1: 76.42%)
   üëâ [ENSEMBLE] Training Random Forest... Done. (F1: 87.86%)
   üëâ [ENSEMBLE] Training XGBoost... 

KeyboardInterrupt: 