In [None]:
import sys
import os


current_dir = os.path.dirname(os.path.abspath('__file__'))
ml_dir = os.path.dirname(current_dir) 
backend_dir = os.path.dirname(os.path.dirname(current_dir)) 

sys.path.append(ml_dir)
sys.path.append(backend_dir)

print("ML directory:", ml_dir)
print("Backend directory:", backend_dir)
print("Python path:", sys.path[-2:])

from train import train_model
from sklearn.model_selection import train_test_split
import pandas as pd

ML directory: c:\Users\Administrator\OneDrive\Desktop\credit_scoring_project\ML
Backend directory: c:\Users\Administrator\OneDrive\Desktop\credit_scoring_project
Python path: ['c:\\Users\\Administrator\\OneDrive\\Desktop\\credit_scoring_project\\ML', 'c:\\Users\\Administrator\\OneDrive\\Desktop\\credit_scoring_project']


In [None]:
columns = [
    'Checking_Status', 'Duration', 'Credit_History', 'Purpose', 
    'Credit_Amount', 'Savings', 'Employment', 'Installment_Rate', 
    'Personal_Status', 'Other_Debtors', 'Residence_Since', 'Property', 
    'Age', 'Other_Installment', 'Housing', 'Existing_Credits', 
    'Job', 'People_Liable', 'Telephone', 'Foreign_Worker', 'Target'
]

data_path = os.path.join(os.path.dirname(os.path.abspath('__file__')), 'german.data')
print("Loading data from:", data_path)

df = pd.read_csv(data_path, sep=" ", names=columns)
df["Target"] = df["Target"].map({1: 0, 2: 1})

print("Data shape:", df.shape)
print("Target distribution:")
print(df["Target"].value_counts())

Loading data from: c:\Users\Administrator\OneDrive\Desktop\credit_scoring_project\ML\notebooks\german.data
Data shape: (1000, 21)
Target distribution:
Target
0    700
1    300
Name: count, dtype: int64


In [3]:
X = df.drop("Target", axis=1)
y = df["Target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [4]:

RandomForest_pipeline = train_model(X_train, y_train, "RandomForest")
print("RandomForest model retrained")

RandomForest model retrained


In [5]:

xgb_pipeline = train_model(X_train, y_train, "xgb")
print("XGBoost model retrained")

XGBoost model retrained


In [6]:

y_proba_rf = RandomForest_pipeline.predict_proba(X_test)[:, 1]
y_proba_xgb = xgb_pipeline.predict_proba(X_test)[:, 1]
print("Predictions successful!")

Predictions successful!


In [7]:
from sklearn.metrics import roc_auc_score

print("ROC-AUC RF :", roc_auc_score(y_test, y_proba_rf))
print("ROC-AUC XGB :", roc_auc_score(y_test, y_proba_xgb))

ROC-AUC RF : 0.7711236132288765
ROC-AUC XGB : 0.7455702192544299


In [8]:
import numpy as np

def ks_stat(y_true, y_proba):
    data = np.column_stack((y_true, y_proba))
    data = data[data[:, 1].argsort()]
    cum_good = np.cumsum(data[:, 0] == 0) / np.sum(data[:, 0] == 0)
    cum_bad = np.cumsum(data[:, 0] == 1) / np.sum(data[:, 0] == 1)
    return np.max(np.abs(cum_good - cum_bad))

print("KS RF :", ks_stat(y_test.values, y_proba_rf))
print("KS XGB :", ks_stat(y_test.values, y_proba_xgb))

KS RF : 0.4353541195646459
KS XGB : 0.4252589515747411


In [9]:
from sklearn.metrics import f1_score

thresholds = np.arange(0.2, 0.8, 0.05)

for t in thresholds:
    preds = (y_proba_xgb >= t).astype(int)
    print(t, "F1:", f1_score(y_test, preds))

0.2 F1: 0.5982142857142857
0.25 F1: 0.5550239234449761
0.3 F1: 0.5520833333333334
0.35 F1: 0.5161290322580645
0.39999999999999997 F1: 0.4942528735632184
0.44999999999999996 F1: 0.4727272727272727
0.49999999999999994 F1: 0.43037974683544306
0.5499999999999999 F1: 0.43243243243243246
0.5999999999999999 F1: 0.4117647058823529
0.6499999999999999 F1: 0.36220472440944884
0.7 F1: 0.336
0.7499999999999998 F1: 0.3140495867768595
0.7999999999999998 F1: 0.2956521739130435


Conclusion and Final Model Selection

After comparing the two models (Random Forest and XGBoost) on the German Credit dataset, the following results were observed:

ROC-AUC (separation ability):

Random Forest: 0.771

XGBoost: 0.746
→ Random Forest shows better overall discrimination.

KS statistic (credit scoring performance):

Random Forest: 0.436

XGBoost: 0.425
→ Random Forest is slightly better, which is important for credit scoring.

Confusion matrix analysis:

XGBoost detects more “bad” cases at a threshold of 0.5.

However, the confusion matrix depends on the decision threshold (0.5 by default).
In credit scoring, the threshold should be adjusted according to the acceptable risk level.

Final Decision

The model selected for production is Random Forest, because it provides:

better separation ability (ROC-AUC),

better KS performance,

higher stability.

The decision threshold will be adjusted based on business strategy (e.g., threshold between 0.20 and 0.35 depending on the acceptable risk level).

The final model saved for production is: credit_scoring_model.pkl (Random Forest).