# Imports

In [1]:
import joblib

import constants.paths as pth
from src.preprocessing import Preprocessor  # noqa: F401
from src.utils.load_data import load_data


In [2]:
# Use the preprocessor that was trained during model training
preprocessor = joblib.load(pth.PREPROCESSOR_PATH)

In [3]:
data_to_predict = load_data(pth.NEW_LOANS_PATH)
scaled_data_to_predict = preprocessor.transform(data_to_predict)

In [4]:
# Make predictions
model_name = "XGBoost"  # Change as needed
model = joblib.load(pth.MODELS_DIR / f"{model_name}.pkl")  # Load your trained model

proba_predictions = model.predict_proba(scaled_data_to_predict)[
    :, 1
]  # Probability of the positive class (default probability)

In [5]:
data_to_predict["predicted_default_proba"] = proba_predictions

In [6]:
# We compute the breake-even interest rate for each loan
data_to_predict["breakeven_interest_rate"] = data_to_predict[
    "predicted_default_proba"
] / (1 - data_to_predict["predicted_default_proba"])

In [7]:
data_to_predict

Unnamed: 0,id,sex,employment,married,income,signal2,predicted_default_proba,breakeven_interest_rate
0,1,M,employed,0,29758,0.000000,0.189274,0.233463
1,2,F,retired,1,6140,0.000000,0.532457,1.138839
2,3,F,employed,1,27186,0.008139,0.300983,0.430579
3,4,M,employed,1,8429,0.624570,0.481592,0.928981
4,5,M,retired,1,2820,0.097230,0.719252,2.561913
...,...,...,...,...,...,...,...,...
99995,99996,M,retired,1,9027,0.070968,0.523854,1.100195
99996,99997,M,student,0,6229,0.635176,0.563191,1.289329
99997,99998,M,employed,1,6471,0.748987,0.581244,1.388024
99998,99999,M,unemployed,0,32654,0.000000,0.177984,0.216522


The function
$$
f : x \mapsto \frac{x}{1-x}
$$
is strictly growing on $[0,1[$ and $f(\frac{1}{2})=1$.

Thus, any predicted default probability above $0.5$ gives a break even interest rate above $100\%$. We cannot fix interest rates above $100\%$.

In [9]:
data_to_predict[data_to_predict["breakeven_interest_rate"] < 0.1]

Unnamed: 0,id,sex,employment,married,income,signal2,predicted_default_proba,breakeven_interest_rate
20,21,M,retired,0,53474,0.342223,0.012830,0.012997
61,62,F,employed,0,80829,0.000000,0.021365,0.021831
68,69,M,employed,1,48064,0.515877,0.058822,0.062499
75,76,M,student,0,43162,0.985836,0.045398,0.047557
99,100,F,employed,1,139120,0.608255,0.008287,0.008356
...,...,...,...,...,...,...,...,...
99974,99975,F,employed,1,75375,0.308819,0.018663,0.019018
99979,99980,M,employed,1,43677,0.660603,0.079606,0.086491
99986,99987,F,student,0,37122,0.738878,0.080004,0.086961
99993,99994,F,employed,1,56829,0.587365,0.027572,0.028353


In [10]:
threshold = 0.6214760622417385
predictions = (proba_predictions >= threshold).astype(int)

In [12]:
predictions.sum()

np.int64(24194)