# Evaluate model performance on test set

In [None]:
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import relplot as rp
from sklearn.calibration import calibration_curve
from sklearn.metrics import (
    precision_recall_curve,
    precision_score,
    recall_score,
    roc_auc_score,
    roc_curve,
)
from sklearn.model_selection import train_test_split

In [None]:
with open("../output/models/no_show_model_cv.pickle", "rb") as f:
    model = pickle.load(f)

featuretable = pd.read_parquet("../data/processed/featuretable.parquet")

featuretable["no_show"] = featuretable["no_show"].replace({"no_show": 1, "show": 0})

X, y = featuretable.drop(columns="no_show"), featuretable["no_show"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0, shuffle=False
)

## ROC curve

In [None]:
y_pred = model.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_test, y_pred[:, 1])
auc_score = roc_auc_score(y_test, y_pred[:, 1])
fig, ax = plt.subplots(figsize=(8, 8))
ax.plot(fpr, tpr, label=f"Hist Gradient Boosting (AUC={round(auc_score, 2)})")
ax.plot([0, 1], [0, 1], label="Random (AUC=0.5)", linestyle="dotted")
ax.legend()
plt.show()

## Prediction, recall

In [None]:
precision, recall, thresholds = precision_recall_curve(y_test, y_pred[:, 1])

fig, ax = plt.subplots(figsize=(8, 8))
ax.plot(thresholds, precision[:-1], label="precision")
ax.plot(thresholds, recall[:-1], label="recall")
ax.legend()
plt.show()

In [None]:
X_test.resample("1D", level="start")["age"].count().plot.hist(
    title="Number of appointments per day"
)
plt.show()

In [None]:
total_test_data = X_test.copy()
total_test_data["y_pred"] = y_pred[:, 1]
total_test_data["y_true"] = y_test

test_dates = total_test_data.index.get_level_values("start").date
precisions = []
precisions_random = []
recalls = []
recalls_random = []

for i in range(5, 155, 5):
    prec = []
    prec_random = []
    rec = []
    rec_random = []
    for name, group in total_test_data.groupby(
        total_test_data.index.get_level_values("start").date
    ):
        if name.weekday() not in [5, 6]:
            group_random = group.copy().sample(frac=1)
            group_random["predicted_value"] = 0
            group_random.iloc[:i, group_random.columns.get_loc("predicted_value")] = 1
            prec_random.append(
                precision_score(group_random["y_true"], group_random["predicted_value"])
            )
            rec_random.append(
                recall_score(group_random["y_true"], group_random["predicted_value"])
            )

            group = group.sort_values("y_pred", ascending=False)
            group["predicted_value"] = 0
            group.iloc[:i, group.columns.get_loc("predicted_value")] = 1
            prec.append(precision_score(group["y_true"], group["predicted_value"]))
            rec.append(recall_score(group["y_true"], group["predicted_value"]))
    precisions.append(prec)
    precisions_random.append(prec_random)
    recalls.append(rec)
    recalls_random.append(rec_random)

In [None]:
print(f"Daily precision: {np.mean(precisions[4])} (+- {np.std(precisions[4])})")
print(f"Daily recall: {np.mean(recalls[4])} (+- {np.std(recalls[4])})")

In [None]:
x_values = range(5, 155, 5)
mean_precision = np.mean(precisions, axis=1)
mean_recall = np.mean(recalls, axis=1)
std_precision = np.std(precisions, axis=1)
std_recall = np.std(recalls, axis=1)

mean_precision_random = np.mean(precisions_random, axis=1)
mean_recall_random = np.mean(recalls_random, axis=1)
std_precision_random = np.std(precisions_random, axis=1)
std_recall_random = np.std(recalls_random, axis=1)
fig, ax = plt.subplots(figsize=(15, 8))

ax.plot(x_values, mean_precision, label="precision")
ax.plot(x_values, mean_recall, label="recall")
ax.fill_between(
    x_values,
    mean_precision - std_precision,
    mean_precision + std_precision,
    label="std precision",
    alpha=0.3,
)
ax.fill_between(
    x_values,
    mean_recall - std_recall,
    mean_recall + std_recall,
    label="std recall",
    alpha=0.3,
)
ax.plot(x_values, mean_precision_random, label="precision_random")
ax.plot(x_values, mean_recall_random, label="recall_random")
ax.fill_between(
    x_values,
    mean_precision_random - std_precision_random,
    mean_precision_random + std_precision_random,
    label="std precision_random",
    alpha=0.3,
)
ax.fill_between(
    x_values,
    mean_recall_random - std_recall_random,
    mean_recall_random + std_recall_random,
    label="std recall_random",
    alpha=0.3,
)
ax.legend()
ax.set_title("Recall and precision for number of people called per day")
ax.set_xlabel("People called per day")
ax.set_ylabel("Total score over all days")
ax.set_ylim((0, 1))
plt.show()

## Calibration curve

In [None]:
prob_true, prob_pred = calibration_curve(y_test, y_pred[:, 1], n_bins=20)

fix, ax = plt.subplots()
ax.plot(prob_pred, prob_true, label="model calibration")
ax.plot([0, 1], [0, 1], label="perfect calibration")
ax.set_xlabel("Mean predicted probability")
ax.set_ylabel("Fraction of positives")
ax.set_title("Calibration curve")
ax.legend()
plt.show()

## Reliability plot

In [None]:
y_pred_total = model.predict_proba(X)
print("calibration error:", rp.smECE(y_pred_total[:, 1], y))
fig, ax = rp.rel_diagram(y_pred_total[:, 1], y)
fig.show()

## Feature importance

In [None]:
forest_importances = pd.Series(
    model[-1].feature_importances_, index=model[0].get_feature_names_out()
).sort_values()

fig, ax = plt.subplots()
ax.barh(
    forest_importances.index,
    forest_importances,
)
ax.set_title("Feature importances using MDI")
ax.set_ylabel("Mean decrease in impurity")
fig.tight_layout()
plt.show()

## Try out predictions

In [None]:
total_test_data.sort_values("y_pred").head()

In [None]:
total_test_data.loc[(slice(None), "2022-04-05"), :].sort_values(
    "y_pred", ascending=False
)

In [None]:
from ipywidgets import FloatSlider, IntSlider, interact


@interact
def sim_prediction(
    hour=IntSlider(9, 0, 24, 1),
    weekday=IntSlider(0, 0, 6, 1),
    specialty_code="REV",
    minutesDuration=IntSlider(30, 5, 120, 5),
    prev_no_show=IntSlider(1, 0, 20, 1),
    prev_no_show_perc=FloatSlider(0.03, min=0, max=1, step=0.01),
    age=IntSlider(20, 0, 100, 1),
    dist_umcu=IntSlider(45, 0, 200, 1),
    prev_minutes_early=IntSlider(-3, -60, 60, 1),
    earlier_appointments=IntSlider(10, 0, 100, 1),
    appointments_same_day=IntSlider(1, 0, 7, 1),
    appointments_last_days=IntSlider(1, 0, 100, 1),
    days_since_created=IntSlider(150, 0, 365, 10),
    days_since_last_appointment=IntSlider(21, 0, 365, 1),
    poli_ident="Consult",
):
    prediction_df = pd.DataFrame(
        {
            "hour": hour,
            "weekday": weekday,
            "specialty_code": specialty_code,
            "minutesDuration": minutesDuration,
            "prev_no_show": prev_no_show,
            "prev_no_show_perc": prev_no_show_perc,
            "age": age,
            "dist_umcu": dist_umcu,
            "prev_minutes_early": prev_minutes_early,
            "earlier_appointments": earlier_appointments,
            "appointments_same_day": appointments_same_day,
            "appointments_last_days": appointments_last_days,
            "days_since_created": days_since_created,
            "days_since_last_appointment": days_since_last_appointment,
            "poli_ident": poli_ident,
        },
        index=[0],
    )

    print(f"Predicted value is: {model.predict_proba(prediction_df)[:,1]}")