In [28]:
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

In [29]:
# Parameter yang akan di-tuning
param_grid = {
    'n_estimators': [50, 100, 200],              # Jumlah pohon dalam model
    'max_depth': [3, 5, 7],                      # Kedalaman maksimum tiap pohon
    'learning_rate': [0.01, 0.1, 0.2],           # Tingkat pembelajaran
    'subsample': [0.7, 0.8, 1.0],                # Proporsi sampel yang diambil untuk tiap pohon
    'scale_pos_weight': [1, len(y_train[y_train == 0]) / len(y_train[y_train == 1])],  # Penyeimbang kelas
}

In [30]:
# Inisialisasi model XGBoost
xgb = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')

In [31]:
# GridSearchCV
grid_search = GridSearchCV(estimator=xgb, param_grid=param_grid, scoring='f1_macro', cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 162 candidates, totalling 810 fits


Parameters: { "use_label_encoder" } are not used.



In [32]:
# Hasil terbaik
print("Best Parameters:", grid_search.best_params_)
print("Best F1 Score:", grid_search.best_score_)

Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50, 'scale_pos_weight': 1, 'subsample': 0.7}
Best F1 Score: 0.7989843621204684


In [33]:
# Evaluasi model dengan parameter terbaik
best_xgb = grid_search.best_estimator_
y_pred_best_xgb = best_xgb.predict(X_test_scaled)
print("\nClassification Report for Optimized XGBoost:")
print(classification_report(y_test, y_pred_best_xgb))



Classification Report for Optimized XGBoost:
              precision    recall  f1-score   support

           0       0.95      0.97      0.96       153
           1       0.67      0.50      0.57        16

    accuracy                           0.93       169
   macro avg       0.81      0.74      0.77       169
weighted avg       0.92      0.93      0.92       169



In [34]:
# Dapatkan probabilitas prediksi untuk kelas positif (PE)
y_pred_proba = best_xgb.predict_proba(X_test_scaled)[:, 1]

In [35]:
# Menentukan threshold baru, misalnya 0.3
threshold = 0.3
y_pred_adjusted = np.where(y_pred_proba >= threshold, 1, 0)

In [36]:
# Evaluasi model dengan threshold baru
print("Classification Report with Adjusted Threshold:")
print(classification_report(y_test, y_pred_adjusted))
print("\nConfusion Matrix with Adjusted Threshold:")
print(confusion_matrix(y_test, y_pred_adjusted))

Classification Report with Adjusted Threshold:
              precision    recall  f1-score   support

           0       0.97      0.94      0.95       153
           1       0.55      0.69      0.61        16

    accuracy                           0.92       169
   macro avg       0.76      0.81      0.78       169
weighted avg       0.93      0.92      0.92       169


Confusion Matrix with Adjusted Threshold:
[[144   9]
 [  5  11]]
