In [1]:
import pandas as pd
import numpy as np
import os
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
# Matikan warning
warnings.filterwarnings('ignore')
# Konfigurasi Global
RANDOM_STATE = 42
all_scenario_results = []

In [2]:
def get_models(scaled=False):
    """
    Definisi Model (Sama seperti sebelumnya).
    LogReg C=100 jika Scaled, C=1.0 jika Non-Scaled.
    """
    models = {}
    models['SVM'] = SVC(kernel='rbf', probability=True, random_state=RANDOM_STATE)
    
    c_param = 0.01
    models['Logistic Regression'] = LogisticRegression(C=c_param, max_iter=1000, random_state=RANDOM_STATE)
    models['Decision Tree'] = DecisionTreeClassifier(random_state=RANDOM_STATE)
    models['Random Forest'] = RandomForestClassifier(n_estimators=200, random_state=RANDOM_STATE)
    models['XGBoost'] = xgb.XGBClassifier(eval_metric='logloss', random_state=RANDOM_STATE)
        
    return models

def evaluate_models(X_train, y_train, X_test, y_test, scenario_name, scaled_status):
    models = get_models(scaled=scaled_status)
    
    print(f"Running Skenario: {scenario_name}")
    print("-" * 60)
    
    for name, model in models.items():
        kategori = "ENSEMBLE" if name in ['Random Forest', 'XGBoost'] else "SINGLE"
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
        
        all_scenario_results.append({
            'Skenario': scenario_name,
            'Category': kategori,
            'Model': name,
            'Accuracy': acc,
            'Precision': prec,
            'Recall': rec,
            'F1-Score': f1
        })
        print(f"{name:<20} | F1: {f1:.2%}")
    print("\n")

In [3]:
# ================= KOMBINASI A (RESNET) =================
# Input: Non-Normalized CSV
# Proses: Tanpa Scaler
train_path = 'Dataset-Split/train_resnet50_features(non normalized).csv'
test_path = 'Dataset-Split/test_resnet50_features(non normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    evaluate_models(X_train, y_train, X_test, y_test, 
                   scenario_name="Non-Norm + No Scaler", 
                   scaled_status=False)
else:
    print(f"File tidak ditemukan: {train_path}")

Running Skenario: Non-Norm + No Scaler
------------------------------------------------------------
SVM                  | F1: 86.70%
Logistic Regression  | F1: 86.57%
Decision Tree        | F1: 75.18%
Random Forest        | F1: 87.46%
XGBoost              | F1: 88.55%




In [4]:
# ================= KOMBINASI B (RESNET) =================
# Input: Non-Normalized CSV
# Proses: Dengan StandardScaler
train_path = 'Dataset-Split/train_resnet50_features(non normalized).csv'
test_path = 'Dataset-Split/test_resnet50_features(non normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    # Apply Scaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    evaluate_models(X_train_scaled, y_train, X_test_scaled, y_test, 
                   scenario_name="Non-Norm + With Scaler", 
                   scaled_status=True)
else:
    print(f"File tidak ditemukan: {train_path}")

Running Skenario: Non-Norm + With Scaler
------------------------------------------------------------
SVM                  | F1: 90.39%
Logistic Regression  | F1: 90.73%
Decision Tree        | F1: 75.18%
Random Forest        | F1: 87.46%
XGBoost              | F1: 88.55%




In [5]:
# ================= KOMBINASI C (RESNET) =================
# Input: Normalized CSV
# Proses: Tanpa Scaler
train_path = 'Dataset-Split/train_resnet50_features(normalized).csv'
test_path = 'Dataset-Split/test_resnet50_features(normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    evaluate_models(X_train, y_train, X_test, y_test, 
                   scenario_name="Normalized + No Scaler", 
                   scaled_status=False)
else:
    print(f"File tidak ditemukan: {train_path}")

Running Skenario: Normalized + No Scaler
------------------------------------------------------------
SVM                  | F1: 88.72%
Logistic Regression  | F1: 88.15%
Decision Tree        | F1: 76.42%
Random Forest        | F1: 87.84%
XGBoost              | F1: 90.48%




In [6]:
# ================= KOMBINASI D (RESNET) =================
# Input: Normalized CSV
# Proses: Dengan StandardScaler
train_path = 'Dataset-Split/train_resnet50_features(normalized).csv'
test_path = 'Dataset-Split/test_resnet50_features(normalized).csv'

if os.path.exists(train_path):
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    feat_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' if 'label_encoded' in df_train.columns else df_train.columns[-1]
    
    X_train = df_train[feat_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feat_cols].values
    y_test = df_test[target_col].values
    
    # Apply Scaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    evaluate_models(X_train_scaled, y_train, X_test_scaled, y_test, 
                   scenario_name="Normalized + With Scaler", 
                   scaled_status=True)
else:
    print(f"File tidak ditemukan: {train_path}")

Running Skenario: Normalized + With Scaler
------------------------------------------------------------
SVM                  | F1: 90.49%
Logistic Regression  | F1: 90.42%
Decision Tree        | F1: 76.42%
Random Forest        | F1: 87.84%
XGBoost              | F1: 90.12%




In [7]:
# ================= REKAPITULASI HASIL (RESNET50) =================
if all_scenario_results:
    df_final = pd.DataFrame(all_scenario_results)
    
    # Urutkan berdasarkan F1-Score Tertinggi
    df_final = df_final.sort_values(by='F1-Score', ascending=False).reset_index(drop=True)
    
    print("="*80)
    print("HASIL AKHIR: PERBANDINGAN KOMBINASI PREPROCESSING (RESNET50)")
    print("="*80)
    
    # Format Tampilan Persen
    output_table = df_final.copy()
    for col in ['Accuracy', 'Precision', 'Recall', 'F1-Score']:
        output_table[col] = output_table[col].map('{:.2%}'.format)
    
    print(output_table.to_string(index=False))
    
    # Best Combination Logic
    best = df_final.iloc[0]
    print("-" * 80)
    print(f"BEST COMBINATION: {best['Skenario']}")
    print(f"MODEL           : {best['Model']} ({best['Category']})")
    print(f"F1-SCORE        : {best['F1-Score']:.2%}")
    print("-" * 80)
else:
    print("Belum ada hasil yang dijalankan.")

HASIL AKHIR: PERBANDINGAN KOMBINASI PREPROCESSING (RESNET50)
                Skenario Category               Model Accuracy Precision Recall F1-Score
  Non-Norm + With Scaler   SINGLE Logistic Regression   90.76%    90.71% 90.76%   90.73%
Normalized + With Scaler   SINGLE                 SVM   90.52%    90.73% 90.52%   90.49%
  Normalized + No Scaler ENSEMBLE             XGBoost   90.52%    90.57% 90.52%   90.48%
Normalized + With Scaler   SINGLE Logistic Regression   90.40%    90.45% 90.40%   90.42%
  Non-Norm + With Scaler   SINGLE                 SVM   90.40%    90.47% 90.40%   90.39%
Normalized + With Scaler ENSEMBLE             XGBoost   90.17%    90.17% 90.17%   90.12%
  Normalized + No Scaler   SINGLE                 SVM   88.74%    89.29% 88.74%   88.72%
  Non-Norm + With Scaler ENSEMBLE             XGBoost   88.63%    88.63% 88.63%   88.55%
    Non-Norm + No Scaler ENSEMBLE             XGBoost   88.63%    88.63% 88.63%   88.55%
  Normalized + No Scaler   SINGLE Logistic Regres