# Final Model

In [1]:
import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from xgboost import XGBClassifier


# Load your dataset

In [2]:
df=pd.read_csv("loan_preprocess_dataset.csv")
X = df.drop("loan_status", axis=1)  
y = df["loan_status"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


# Best hyperparameters from Optuna (fixed learning_rate)

In [3]:
best_params = {
    "n_estimators": 454,
    "max_depth": 5,
    "learning_rate": 0.2   # corrected from 2 -> 0.2
}

model = XGBClassifier(
    n_estimators=best_params["n_estimators"],
    max_depth=best_params["max_depth"],
    learning_rate=best_params["learning_rate"],
    use_label_encoder=False,
    eval_metric="logloss",
    random_state=42
)


#  Training & Cross-validation (Stratified K-Fold)

In [4]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(model, X, y, cv=cv, scoring="accuracy")

print("🔄 Cross-validation results:")
print("CV Accuracy scores:", cv_scores)
print("Mean CV Accuracy:", np.mean(cv_scores))

#  Train on train split
model.fit(X_train, y_train)

#  Evaluate on test split
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average="weighted")
recall = recall_score(y_test, y_pred, average="weighted")
f1 = f1_score(y_test, y_pred, average="weighted")

print("\n Test Set Evaluation:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

print("\n Classification Report:\n", classification_report(y_test, y_pred))
print("\n Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Step 9: Save model
joblib.dump(model, "xgboost_model.joblib")
print("\n Model saved as xgboost_model.joblib")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🔄 Cross-validation results:
CV Accuracy scores: [0.98083832 0.99041916 0.98562874 0.99040767 0.97721823]
Mean CV Accuracy: 0.9849024253650972


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



 Test Set Evaluation:
Accuracy: 0.9796407185628743
Precision: 0.9796288068076231
Recall: 0.9796407185628743
F1 Score: 0.9796212920349671

 Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.98       520
           1       0.98      0.97      0.97       315

    accuracy                           0.98       835
   macro avg       0.98      0.98      0.98       835
weighted avg       0.98      0.98      0.98       835


 Confusion Matrix:
 [[513   7]
 [ 10 305]]

 Model saved as xgboost_model.joblib
