### Import necessary packages

In [None]:
import pandas as pd
from src.model_dispatcher import large_models, small_models
from src import config
from src.evaluation import evaluate_report
import matplotlib as mpl
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.metrics import roc_curve, roc_auc_score


%matplotlib inline

sns.set_style("whitegrid")
sns.set_palette("deep")
mpl.rcParams['figure.figsize'] = config.DEFAULT_FIGSIZE
mpl.rcParams['lines.linewidth'] = config.DEFAULT_PLOT_LINEWIDTH
mpl.rcParams['lines.linestyle'] = config.DEFAULT_PLOT_LINESTYLE
mpl.rcParams['font.size'] = config.DEFAULT_AXIS_FONT_SIZE

pal = sns.color_palette("deep")
pal_hex = pal.as_hex()

### Run #1 

### Large loans model candidates

In [None]:
ll_test = pd.read_parquet(config.FIN_FILE_PATH / "test_df_large_loans_300000.parquet")

X_large_test = ll_test.drop(columns=config.TARGET)
y_large_test = ll_test[config.TARGET]

In [None]:
_, ax = plt.subplots()
plt.tight_layout()
plt.plot([0, 1], [0, 1], ls="--", color="black")
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.title(f"Large loans - ROC Curves")
run = 0

for name, model in large_models.items():
    y_pred_prob = model.predict_proba(X_large_test)[:,1]
    y_pred = model.predict(X_large_test)
    fpr, tpr, _ = roc_curve(y_large_test, y_pred_prob)
    auc = roc_auc_score(y_large_test, y_pred_prob)

    ax.plot(
        fpr, tpr, 
        label=f"{name}: AUC {auc:.2%}", 
        linestyle="solid", 
        linewidth=2,
        color=pal_hex[run]
    )
    run += 1

ax.legend(loc="lower right")
plt.savefig(config.REPORTS_PATH / "roc/all_large_models_300000.jpeg", bbox_inches="tight")
plt.show()

In [None]:
ll_results = []

for name, model in large_models.items():
    y_pred_prob = model.predict_proba(X_large_test)[:,1]
    y_pred = model.predict(X_large_test)
    report = evaluate_report(y_test=y_large_test, y_pred=y_pred, y_pred_prob=y_pred_prob)
    report["model"] = name
    ll_results.append(report)

In [None]:
# results
large_model_metrics = pd.DataFrame(ll_results).set_index("model")
large_model_metrics

### Small loans model candidates

In [None]:
sl_test = pd.read_parquet(config.FIN_FILE_PATH / "test_df_small_loans_300000.parquet")

X_small_test = sl_test.drop(columns=config.TARGET)
y_small_test = sl_test[config.TARGET]

In [None]:
_, ax = plt.subplots()
plt.tight_layout()
plt.plot([0, 1], [0, 1], ls="--", color="black")
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.title(f"Small loans - ROC Curves")
run = 0

for name, model in small_models.items():
    y_pred_prob = model.predict_proba(X_small_test)[:,1]
    y_pred = model.predict(X_small_test)
    fpr, tpr, _ = roc_curve(y_small_test, y_pred_prob)
    auc = roc_auc_score(y_small_test, y_pred_prob)

    ax.plot(
        fpr, tpr, 
        label=f"{name}: AUC {auc:.2%}", 
        linestyle="solid", 
        linewidth=2,
        color=pal_hex[run]
    )
    run += 1

ax.legend(loc="lower right")
plt.savefig(config.REPORTS_PATH / "roc/all_small_models_300000.jpeg", bbox_inches="tight")
plt.show()

In [None]:
sl_results = []

for name, model in small_models.items():
    y_pred_prob = model.predict_proba(X_small_test)[:,1]
    y_pred = model.predict(X_small_test)
    report = evaluate_report(y_test=y_small_test, y_pred=y_pred, y_pred_prob=y_pred_prob)
    report["model"] = name
    sl_results.append(report)

In [None]:
# results
small_model_metrics = pd.DataFrame(sl_results).set_index("model")
small_model_metrics

# export results to csv if needed
# small_model_metrics.to_csv("small_model_metrics.csv")