In [None]:
import pandas as pd
import numpy as np
import os
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb
warnings.filterwarnings('ignore')

In [None]:
# ================= KONFIGURASI PATH =================
TRAIN_FILE = 'Dataset Split/train_resnet50_features(normalized).csv'
TEST_FILE  = 'Dataset Split/test_resnet50_features(normalized).csv'
RANDOM_STATE = 42

def get_models():
    models = {}
    models['SVM'] = SVC(kernel='rbf', probability=True, random_state=RANDOM_STATE)
    models['Logistic Regression'] = LogisticRegression(C=0.01, max_iter=1000, random_state=RANDOM_STATE)
    models['Decision Tree'] = DecisionTreeClassifier(random_state=RANDOM_STATE)
    models['Random Forest'] = RandomForestClassifier(n_estimators=200, random_state=RANDOM_STATE)
    models['XGBoost'] = xgb.XGBClassifier(eval_metric='logloss', random_state=RANDOM_STATE)
    models['LightGBM'] = lgb.LGBMClassifier(random_state=RANDOM_STATE, verbose=-1)
    return models

def Skenario_1():
    # Cek File
    if not os.path.exists(TRAIN_FILE) or not os.path.exists(TEST_FILE):
        print(f"File tidak ditemukan di folder 'Dataset Split'. Cek path: {TRAIN_FILE}")
        return

    # Load Data (Silent)
    df_train = pd.read_csv(TRAIN_FILE)
    df_test = pd.read_csv(TEST_FILE)
    
    # Pisahkan Fitur & Target
    feature_cols = [c for c in df_train.columns if c.startswith('feature_')]
    target_col = 'label_encoded' 
    if target_col not in df_train.columns: target_col = df_train.columns[-1]
    
    X_train = df_train[feature_cols].values
    y_train = df_train[target_col].values
    X_test = df_test[feature_cols].values
    y_test = df_test[target_col].values
    
    # Scaling (Silent)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Training Loop (Silent)
    final_results = []
    models_dict = get_models()
    
    for model_name, model in models_dict.items():
        kategori = "ENSEMBLE" if model_name in ['Random Forest', 'XGBoost', 'LightGBM'] else "SINGLE"
        
        # Fit & Predict
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
        
        # Metric
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
        
        final_results.append({
            'Category': kategori,
            'Model': model_name,
            'Accuracy': acc,
            'Precision': prec,
            'Recall': rec,
            'F1-Score': f1
        })

    # Output Hasil Saja
    if final_results:
        print("="*80)
        print("HASIL SKENARIO 1: PERBANDINGAN MODEL (RESNET50)")
        print("="*80)
        
        df_results = pd.DataFrame(final_results)
        df_results = df_results.sort_values('F1-Score', ascending=False).reset_index(drop=True)
        
        output_table = df_results.copy()
        for col in ['Accuracy', 'Precision', 'Recall', 'F1-Score']:
            output_table[col] = output_table[col].map('{:.2%}'.format)
            
        print(output_table.to_string(index=False))
        
        best = df_results.iloc[0]
        print("-" * 80)
        print(f"BEST MODEL: {best['Model']}")
        print(f"F1-Score  : {best['F1-Score']:.2%}")
        print("-" * 80)

if __name__ == '__main__':
    Skenario_1()

HASIL SKENARIO 1: PERBANDINGAN MODEL (RESNET50)
Category               Model Accuracy Precision Recall F1-Score
ENSEMBLE            LightGBM   91.35%    91.49% 91.35%   91.32%
  SINGLE                 SVM   90.52%    90.73% 90.52%   90.49%
  SINGLE Logistic Regression   90.40%    90.45% 90.40%   90.42%
ENSEMBLE             XGBoost   90.05%    90.09% 90.05%   89.99%
ENSEMBLE       Random Forest   87.91%    88.14% 87.91%   87.86%
  SINGLE       Decision Tree   76.42%    76.43% 76.42%   76.42%
--------------------------------------------------------------------------------
BEST MODEL: LightGBM
F1-Score  : 91.32%
--------------------------------------------------------------------------------
