In [1]:
import numpy as np
import pandas as pd

from constants.paths import OUTPUT_DIR

In [2]:
y_pred_proba = pd.read_csv(
    OUTPUT_DIR / "y_pred_proba_PLTR.csv", index_col=0
).values.flatten()

In [3]:
# Set up a threshold to decide when it becomes to risky for us
# It was decided to reject the riskiest 17.5% of the applications
# No real reasons behind this number, just a gut feeling from the train set
quantile_level = 1.0 - 0.175
threshold = np.quantile(y_pred_proba, quantile_level)

In [4]:
y_pred_proba_df = pd.DataFrame(
    y_pred_proba, columns=["rate"], index=np.arange(len(y_pred_proba)) + 1
)

In [5]:
y_pred_proba_df

Unnamed: 0,rate
1,0.023750
2,0.126406
3,0.022749
4,0.084825
5,0.141071
...,...
99996,0.076966
99997,0.134010
99998,0.144755
99999,0.014185


In [6]:
mask = y_pred_proba_df["rate"] > threshold

In [7]:
y_pred_proba_df["rate"] = y_pred_proba_df["rate"].where(~mask, pd.NA)

In [8]:
# Applied the breakeven formula to transform the default probability into an interest rate
y_pred_proba_df["rate"] = y_pred_proba_df["rate"] / (1 - y_pred_proba_df["rate"])

In [9]:
# Add a premium to the interest rate to cover costs and make a profit
# Gut feeling, 1.2 seems reasonable
y_pred_proba_df["rate"] = y_pred_proba_df["rate"] * 1.2

In [10]:
# Because we are French we use French bond rate as a minimum interest rate (some kind of risk-free rate)
# We know that for euro area, German bonds are the closest to risk-free
# But as we are a French company, we should use the French bond rate
french_3_month_bond_rate = 1.959 / 100
german_3_month_bond_rate = 1.427 / 100

In [11]:
# Use clip to set a minimum interest rate (nice function btw i recommand, didn't know it)
y_pred_proba_df = y_pred_proba_df.clip(lower=french_3_month_bond_rate)

In [None]:
y_pred_proba_df.to_csv(
    OUTPUT_DIR / "submission_round_1.csv", index_label="id", header=["rate"]
)

In [15]:
pd.read_csv(
    "/home/augustin/projects/xhec_dsb/05_intro_to_finance_for_data_scientists/output/submission_round_1.csv"
)

Unnamed: 0,id,rate
0,1,0.029193
1,2,0.173636
2,3,0.027934
3,4,0.111224
4,5,0.197088
...,...,...
99995,99996,0.100060
99996,99997,0.185697
99997,99998,0.203107
99998,99999,0.019590
