In [1]:
import numpy as np
import pandas as pd

from constants.paths import OUTPUT_DIR

In [2]:
y_pred_proba = pd.read_csv(OUTPUT_DIR / "y_pred_proba_PLTR.csv", index_col=0).values.flatten()

In [3]:
# Set up a threshold to decide when it becomes to risky for us
# It was decided to reject the riskiest 17.5% of the applications
# No real reasons behind this number, just a gut feeling from the train set
quantile_level = 1.0 - 0.175
threshold = np.quantile(y_pred_proba, quantile_level)

In [4]:
y_pred_proba_df = pd.DataFrame(y_pred_proba, columns=["rate"])

In [5]:
mask = y_pred_proba_df["rate"] > threshold

In [6]:
y_pred_proba_df["rate"] = y_pred_proba_df["rate"].where(~mask, pd.NA)

In [7]:
# Applied the breakeven formula to transform the default probability into an interest rate
y_pred_proba_df["rate"] = y_pred_proba_df["rate"] / (1 - y_pred_proba_df["rate"])

In [8]:
# Add a premium to the interest rate to cover costs and make a profit
# Gut feeling, 1.2 seems reasonable
y_pred_proba_df["rate"] = y_pred_proba_df["rate"] * 1.2

In [9]:
# Because we are French we use French bond rate as a minimum interest rate (some kind of risk-free rate)
# We know that for euro area, German bonds are the closest to risk-free
# But as we are a French company, we should use the French bond rate
french_3_month_bond_rate = 1.959 / 100
german_3_month_bond_rate = 1.427 / 100

In [10]:
# Use clip to set a minimum interest rate (nice function btw i recommand, didn't know it)
y_pred_proba_df = y_pred_proba_df.clip(lower=french_3_month_bond_rate)

In [11]:
y_pred_proba_df.to_csv(OUTPUT_DIR / "submission_round_1.csv", index_label="id", header=["rate"])