In [19]:
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC

In [4]:
df_encoded = pd.read_csv("C:/Github/Credit Risk Prediction/data/processed/df_encoded.csv")

In [5]:
# 2. Separate features and target
y = df_encoded['Approved_Flag']
X = df_encoded. drop ( ['Approved_Flag'], axis = 1 )

In [6]:
# 3. Encode labels to integers (e.g. p1, p2, p3, p4 → 0, 1, 2, 3)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [7]:
# 4. Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

In [9]:
# Scale features for SVM
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTETomek
smote_tomek = SMOTETomek(random_state=42)
X_train_balanced, y_train_balanced = smote_tomek.fit_resample(X_train, y_train)
X_train_scaled_balanced, _ = smote_tomek.fit_resample(X_train_scaled, y_train)

In [12]:
# 1. Random Forest

rf_classifier = RandomForestClassifier(n_estimators = 200, random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)

acc = accuracy_score(y_test, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred)

print("=== RandomForest ===")
print(f"Accuracy: {acc:.3f}")
for i, cls in enumerate(le.classes_):
    print(f"Class {cls}: Precision={precision[i]:.3f}, Recall={recall[i]:.3f}, F1 Score={f1[i]:.3f}")
    

=== RandomForest ===
Accuracy: 0.764
Class P1: Precision=0.837, Recall=0.704, F1 Score=0.765
Class P2: Precision=0.796, Recall=0.928, F1 Score=0.857
Class P3: Precision=0.442, Recall=0.211, F1 Score=0.286
Class P4: Precision=0.718, Recall=0.727, F1 Score=0.722


In [13]:
# 2. xgboost

import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

xgb_classifier = xgb.XGBClassifier(objective='multi:softmax',  num_class=4)

xgb_classifier.fit(X_train, y_train)
y_pred = xgb_classifier.predict(X_test)

acc = accuracy_score(y_test, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred)

print("=== XGBoost ===")
print(f"Accuracy: {acc:.3f}")
for i, cls in enumerate(le.classes_):
    print(f"Class {cls}: Precision={precision[i]:.3f}, Recall={recall[i]:.3f}, F1 Score={f1[i]:.3f}")


=== XGBoost ===
Accuracy: 0.778
Class P1: Precision=0.824, Recall=0.761, F1 Score=0.791
Class P2: Precision=0.826, Recall=0.914, F1 Score=0.867
Class P3: Precision=0.476, Recall=0.309, F1 Score=0.375
Class P4: Precision=0.734, Recall=0.736, F1 Score=0.735


In [14]:
# 3. Decision Tree
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=10)
dt_model.fit(X_train, y_train)
y_pred = dt_model.predict(X_test)


acc = accuracy_score(y_test, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred)

print("=== Decision Tree ===")
print(f"Accuracy: {acc:.3f}")
for i, cls in enumerate(le.classes_):
    print(f"Class {cls}: Precision={precision[i]:.3f}, Recall={recall[i]:.3f}, F1 Score={f1[i]:.3f}")


=== Decision Tree ===
Accuracy: 0.712
Class P1: Precision=0.722, Recall=0.724, F1 Score=0.723
Class P2: Precision=0.810, Recall=0.826, F1 Score=0.818
Class P3: Precision=0.349, Recall=0.331, F1 Score=0.340
Class P4: Precision=0.649, Recall=0.626, F1 Score=0.637


In [15]:
from sklearn.pipeline import make_pipeline
from lightgbm import LGBMClassifier


# 5. Create a pipeline with StandardScaler and LGBMClassifier
lgb_pipeline = make_pipeline(
    StandardScaler(),
    LGBMClassifier(objective='multiclass', num_class=4, random_state=42)
)

# 6. Train the model
lgb_pipeline.fit(X_train, y_train)

# 7. Predict and evaluate
y_pred = lgb_pipeline.predict(X_test)

acc = accuracy_score(y_test, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred)

print("=== LightGBM Classifier ===")
print(f"Accuracy: {acc:.3f}")
for i, cls in enumerate(le.classes_):
    print(f"Class {cls}: Precision={precision[i]:.3f}, Recall={recall[i]:.3f}, F1 Score={f1[i]:.3f}")


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013004 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3157
[LightGBM] [Info] Number of data points in the train set: 33651, number of used features: 54
[LightGBM] [Info] Start training from score -2.156606
[LightGBM] [Info] Start training from score -0.500165
[LightGBM] [Info] Start training from score -1.883865
[LightGBM] [Info] Start training from score -2.072659
=== LightGBM Classifier ===
Accuracy: 0.779
Class P1: Precision=0.829, Recall=0.772, F1 Score=0.799
Class P2: Precision=0.822, Recall=0.917, F1 Score=0.867
Class P3: Precision=0.469, Recall=0.289, F1 Score=0.358
Class P4: Precision=0.741, Recall=0.739, F1 Score=0.740


  message = "The feature names should match those that were passed during fit.\n"


In [16]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Binary labels: 1 if class == P3, else 0
y_train_p3 = (y_train == 2).astype(int)  # P3 is class index 2
y_test_p3 = (y_test == 2).astype(int)

clf_p3 = make_pipeline(StandardScaler(), LogisticRegression(class_weight='balanced'))
clf_p3.fit(X_train, y_train_p3)
p3_preds = clf_p3.predict(X_test)

print(classification_report(y_test_p3, p3_preds, target_names=["Not P3", "P3"]))


              precision    recall  f1-score   support

      Not P3       0.93      0.62      0.74      7088
          P3       0.27      0.74      0.39      1325

    accuracy                           0.64      8413
   macro avg       0.60      0.68      0.57      8413
weighted avg       0.82      0.64      0.69      8413



In [20]:
# 2. Define base models with scaling
rf_model = make_pipeline(StandardScaler(), RandomForestClassifier(random_state=42))
xgb_model = make_pipeline(StandardScaler(), xgb.XGBClassifier(objective='multi:softmax', num_class=4))
svm_model = make_pipeline(StandardScaler(), SVC(probability=True))
dt_model = make_pipeline(StandardScaler(), DecisionTreeClassifier())

# 3. Train stacking classifier
estimators = [
    ('rf', rf_model),
    ('xgb', xgb_model),
    ('svm', svm_model),
    ('dt', dt_model),
]

stacked_model = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(max_iter=1000),
    cv=3,
    n_jobs=-1
)

stacked_model.fit(X_train, y_train)

# 4. Train P3-specialist binary classifier (P3 vs. Not P3)
y_train_p3 = (y_train == 2).astype(int)
y_test_p3 = (y_test == 2).astype(int)

p3_clf = make_pipeline(StandardScaler(), LogisticRegression(class_weight='balanced', max_iter=1000))
p3_clf.fit(X_train, y_train_p3)

# 5. Predict stacking and P3 probabilities
stacked_preds = stacked_model.predict(X_test)
p3_probs = p3_clf.predict_proba(X_test)[:, 1]  # P3 confidence

# 6. Override logic: change to P3 if probability > threshold
threshold = 0.85
override_mask = (p3_probs > threshold)
stacked_preds[override_mask] = 2  # Class index 2 is P3

# 7. Final evaluation
acc = accuracy_score(y_test, stacked_preds)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, stacked_preds)

print("=== Stacking + P3 Override ===")
print(f"Accuracy: {acc:.3f}")
for i, cls in enumerate(le.classes_):
    print(f"Class {cls}: Precision={precision[i]:.3f}, Recall={recall[i]:.3f}, F1 Score={f1[i]:.3f}")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  )
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  )
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  )
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  )
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  )


=== Stacking + P3 Override ===
Accuracy: 0.776
Class P1: Precision=0.837, Recall=0.761, F1 Score=0.798
Class P2: Precision=0.825, Recall=0.914, F1 Score=0.867
Class P3: Precision=0.444, Recall=0.293, F1 Score=0.353
Class P4: Precision=0.735, Recall=0.737, F1 Score=0.736
