In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import numpy as np

In [2]:
fitur = pd.read_csv("fitur.csv")
fitur.head()

Unnamed: 0,id,ptb,lbp_0,lbp_1,lbp_2,lbp_3,lbp_4,lbp_5,lbp_6,lbp_7,...,pca_40,pca_41,pca_42,pca_43,pca_44,pca_45,pca_46,pca_47,pca_48,pca_49
0,1000,0,0.004856,0.003116,0.004787,0.00798,0.010015,0.016832,0.027246,0.052803,...,-0.011331,0.001496,0.010384,0.000556,0.003336,0.010161,-0.008201,0.006299,-0.007575,0.006094
1,1001,0,0.007681,0.005304,0.007644,0.012449,0.014378,0.024201,0.037289,0.069132,...,-0.186799,-0.05009,-0.372538,0.213923,0.023157,-0.554682,0.203674,-0.015846,-0.339592,0.298513
2,1002,0,0.006378,0.004307,0.006257,0.010432,0.012042,0.020382,0.032347,0.062255,...,0.339326,0.056459,-0.071077,0.104583,-0.097675,-0.356484,0.328379,-0.18855,0.450332,-0.136584
3,1003,1,0.006966,0.004943,0.006512,0.010015,0.012441,0.020827,0.033185,0.060859,...,0.261261,-0.593214,-0.371235,-0.177743,-0.21315,-0.311263,0.303972,-0.387887,-0.228045,-0.043416
4,1004,1,0.007841,0.006019,0.007455,0.011519,0.013818,0.022939,0.036557,0.064593,...,-0.259318,0.442234,0.347933,0.856989,-0.183228,0.069757,-0.584401,-0.126902,-0.234651,-0.782384


In [3]:
X = fitur.drop(columns=["ptb", "id"])
y = fitur["ptb"]

X_scalled = StandardScaler().fit_transform(X)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X_scalled, y, test_size=0.3, random_state=1)

In [5]:
# ============================================
# METODE 1: Grid Search CV (Recommended)
# ============================================
print("=" * 50)
print("METODE 1: Grid Search dengan Cross Validation")
print("=" * 50)

# Define parameter grid
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100, 500],               # Regularization parameter
    'kernel': ['linear', 'rbf', 'poly'],             # Kernel types umum
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1],    # Kernel coefficient
    'degree': [2, 3, 4, 5],                          # Degree untuk poly kernel
    'coef0': [0.0, 1.0],                             # Independent term
    'class_weight': ['balanced', None]               # Imbalanced data handling
}

# Initialize SVC
svc = SVC(random_state=42, class_weight='balance')

# Grid Search dengan Cross Validation
grid_search = GridSearchCV(
    estimator=svc,
    param_grid=param_grid,
    cv=5,                          # 5-fold cross validation
    scoring='accuracy',            # Bisa juga: 'f1', 'roc_auc', dll
    n_jobs=-1,                     # Use all CPU cores
    verbose=2
)

# Fit model
print("\nüîç Mencari hyperparameter terbaik...")
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_

print("\n‚úÖ Best Parameters:", grid_search.best_params_)
print("‚úÖ Best Cross-Validation Score:", f"{grid_search.best_score_:.4f}")

METODE 1: Grid Search dengan Cross Validation

üîç Mencari hyperparameter terbaik...
Fitting 5 folds for each of 1440 candidates, totalling 7200 fits
[CV] END C=0.01, class_weight=balanced, coef0=0.0, degree=2, gamma=scale, kernel=linear; total time=   0.0s
[CV] END C=0.01, class_weight=balanced, coef0=0.0, degree=2, gamma=scale, kernel=linear; total time=   0.0s
[CV] END C=0.01, class_weight=balanced, coef0=0.0, degree=2, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END C=0.01, class_weight=balanced, coef0=0.0, degree=2, gamma=scale, kernel=linear; total time=   0.0s
[CV] END C=0.01, class_weight=balanced, coef0=0.0, degree=2, gamma=scale, kernel=linear; total time=   0.0s
[CV] END C=0.01, class_weight=balanced, coef0=0.0, degree=2, gamma=scale, kernel=poly; total time=   0.0s
[CV] END C=0.01, class_weight=balanced, coef0=0.0, degree=2, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END C=0.01, class_weight=balanced, coef0=0.0, degree=2, gamma=scale, kernel=poly; total time=   

In [6]:
# ============================================
# EVALUASI MODEL
# ============================================
print("\n" + "=" * 50)
print("EVALUASI MODEL")
print("=" * 50)

# Prediksi
y_pred_train = best_model.predict(X_train)
y_pred_test = best_model.predict(X_test)

# Accuracy
train_acc = accuracy_score(y_train, y_pred_train)
test_acc = accuracy_score(y_test, y_pred_test)

print(f"\nüìä Training Accuracy: {train_acc:.4f}")
print(f"üìä Testing Accuracy: {test_acc:.4f}")
print(f"üìä Overfitting Gap: {train_acc - test_acc:.4f}")


EVALUASI MODEL

üìä Training Accuracy: 0.8516
üìä Testing Accuracy: 0.8208
üìä Overfitting Gap: 0.0309


In [7]:
# Classification Report
print("\nüìã Classification Report (Test Set):")
print(classification_report(y_test, y_pred_test))


üìã Classification Report (Test Set):
              precision    recall  f1-score   support

           0       0.83      0.83      0.83       113
           1       0.81      0.81      0.81        99

    accuracy                           0.82       212
   macro avg       0.82      0.82      0.82       212
weighted avg       0.82      0.82      0.82       212



In [8]:
# Confusion Matrix
print("\nüìä Confusion Matrix (Test Set):")
print(confusion_matrix(y_test, y_pred_test))


üìä Confusion Matrix (Test Set):
[[94 19]
 [19 80]]


In [9]:
# Cross Validation Score
cv_scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring='accuracy')
print(f"\nüîÑ Cross-Validation Scores: {cv_scores}")
print(f"üîÑ Mean CV Score: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")


üîÑ Cross-Validation Scores: [0.74747475 0.74747475 0.73469388 0.73469388 0.73469388]
üîÑ Mean CV Score: 0.7398 (+/- 0.0125)


In [10]:
# # Model dengan parameter yang sudah di-tune
# optimal_model = SVC(
#     C=10,                          # Regularization
#     kernel='rbf',                  # Biasanya rbf memberikan hasil terbaik
#     gamma='scale',                 # Automatic gamma calculation
#     class_weight='balanced',       # Handle imbalanced data
#     probability=True,              # Enable probability estimates
#     random_state=42,
#     cache_size=1000,              # Increase cache for faster training
#     max_iter=1000                 # Maximum iterations
# )

# optimal_model.fit(X_train, y_train)

# # Evaluasi
# y_pred_optimal = optimal_model.predict(X_test)
# optimal_acc = accuracy_score(y_test, y_pred_optimal)

# print(f"\nüìä Optimal Model Test Accuracy: {optimal_acc:.4f}")