In [2]:
import pandas as pd
import numpy as np
import os
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# Matikan warning
warnings.filterwarnings('ignore')

# Cek Library Boosting
try:
    import xgboost as xgb
except ImportError:
    xgb = None
try:
    import lightgbm as lgb
except ImportError:
    lgb = None

# Konfigurasi Global
RANDOM_STATE = 42
# List global untuk menampung hasil dari semua cell
all_scenario_results = []

In [3]:
def get_models(scaled=False):
    """
    Definisi Model (Sama seperti sebelumnya).
    LogReg C=100 jika Scaled, C=1.0 jika Non-Scaled.
    """
    models = {}
    models['SVM'] = SVC(kernel='rbf', probability=True, random_state=RANDOM_STATE)
    
    c_param = 100.0 if scaled else 1.0
    models['Logistic Regression'] = LogisticRegression(C=c_param, max_iter=1000, random_state=RANDOM_STATE)
    
    models['Decision Tree'] = DecisionTreeClassifier(random_state=RANDOM_STATE)
    models['Random Forest'] = RandomForestClassifier(n_estimators=200, random_state=RANDOM_STATE)
    
    if xgb is not None:
        models['XGBoost'] = xgb.XGBClassifier(eval_metric='logloss', random_state=RANDOM_STATE)
    if lgb is not None:
        models['LightGBM'] = lgb.LGBMClassifier(random_state=RANDOM_STATE, verbose=-1)
        
    return models

def evaluate_models(X_train, y_train, X_test, y_test, scenario_name, scaled_status):
    models = get_models(scaled=scaled_status)
    
    print(f"Running Skenario: {scenario_name}")
    print("-" * 60)
    
    for name, model in models.items():
        kategori = "ENSEMBLE" if name in ['Random Forest', 'XGBoost', 'LightGBM'] else "SINGLE"
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
        
        all_scenario_results.append({
            'Skenario': scenario_name,
            'Category': kategori,
            'Model': name,
            'Accuracy': acc,
            'Precision': prec,
            'Recall': rec,
            'F1-Score': f1
        })
        print(f"{name:<20} | F1: {f1:.2%}")
    print("\n")

In [4]:
# ================= KOMBINASI A (RESNET) =================
# Input: Non-Normalized CSV
# Proses: Tanpa Scaler
train_path = 'Dataset Split/train_resnet50_features(non normalized).csv'
test_path = 'Dataset Split/test_resnet50_features(non normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    evaluate_models(X_train, y_train, X_test, y_test, 
                   scenario_name="Non-Norm + No Scaler", 
                   scaled_status=False)
else:
    print(f"File tidak ditemukan: {train_path}")

File tidak ditemukan: Dataset Split/train_resnet50_features(non normalized).csv


In [5]:
# ================= KOMBINASI B (RESNET) =================
# Input: Non-Normalized CSV
# Proses: Dengan StandardScaler
train_path = 'Dataset Split/train_resnet50_features(non normalized).csv'
test_path = 'Dataset Split/test_resnet50_features(non normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    # Apply Scaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    evaluate_models(X_train_scaled, y_train, X_test_scaled, y_test, 
                   scenario_name="Non-Norm + With Scaler", 
                   scaled_status=True)
else:
    print(f"File tidak ditemukan: {train_path}")

File tidak ditemukan: Dataset Split/train_resnet50_features(non normalized).csv


In [6]:
# ================= KOMBINASI C (RESNET) =================
# Input: Normalized CSV
# Proses: Tanpa Scaler
train_path = 'Dataset Split/train_resnet50_features(normalized).csv'
test_path = 'Dataset Split/test_resnet50_features(normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    evaluate_models(X_train, y_train, X_test, y_test, 
                   scenario_name="Normalized + No Scaler", 
                   scaled_status=False)
else:
    print(f"File tidak ditemukan: {train_path}")

File tidak ditemukan: Dataset Split/train_resnet50_features(normalized).csv


In [7]:
# ================= KOMBINASI D (RESNET) =================
# Input: Normalized CSV
# Proses: Dengan StandardScaler
train_path = 'Dataset Split/train_resnet50_features(normalized).csv'
test_path = 'Dataset Split/test_resnet50_features(normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    # Apply Scaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    evaluate_models(X_train_scaled, y_train, X_test_scaled, y_test, 
                   scenario_name="Normalized + With Scaler", 
                   scaled_status=True)
else:
    print(f"File tidak ditemukan: {train_path}")

File tidak ditemukan: Dataset Split/train_resnet50_features(normalized).csv


In [8]:
# ================= REKAPITULASI HASIL (RESNET50) =================
if all_scenario_results:
    df_final = pd.DataFrame(all_scenario_results)
    
    # Urutkan berdasarkan F1-Score Tertinggi
    df_final = df_final.sort_values(by='F1-Score', ascending=False).reset_index(drop=True)
    
    print("="*80)
    print("HASIL AKHIR: PERBANDINGAN KOMBINASI PREPROCESSING (RESNET50)")
    print("="*80)
    
    # Format Tampilan Persen
    output_table = df_final.copy()
    for col in ['Accuracy', 'Precision', 'Recall', 'F1-Score']:
        output_table[col] = output_table[col].map('{:.2%}'.format)
    
    print(output_table.to_string(index=False))
    
    # Best Combination Logic
    best = df_final.iloc[0]
    print("-" * 80)
    print(f"BEST COMBINATION: {best['Skenario']}")
    print(f"MODEL           : {best['Model']} ({best['Category']})")
    print(f"F1-SCORE        : {best['F1-Score']:.2%}")
    print("-" * 80)
else:
    print("Belum ada hasil yang dijalankan.")

Belum ada hasil yang dijalankan.
