# Performance monitoring
This notebook can be used to monitor the performance of the No-Show model over time. Performance is calculated over the patient group who were not called, but did have a prediction. 

### Install dependencies

In [None]:
import pickle

import matplotlib.pyplot as plt
import pandas as pd
import relplot as rp
import seaborn as sns
from sklearn.metrics import (
    roc_auc_score,
    roc_curve,
)
from sklearn.model_selection import train_test_split
from sqlalchemy import select

from noshow.config import CLINIC_CONFIG
from noshow.database.connection import get_engine
from noshow.database.models import ApiCallResponse, ApiPrediction
from noshow.features.feature_pipeline import create_features
from noshow.preprocessing.load_data import (
    load_appointment_csv,
    process_appointments,
    process_postal_codes,
)

### *Reminder: change the .env file to select the production database*

## Data exploration

### Extract patients who are not called
Extract from ApiPrediction and ApiCallResponse. Select patients who had an prediction for a appointment, but were not called.

In [None]:
query = (
    select(
        ApiPrediction.start_time,
        ApiPrediction.prediction,
        ApiPrediction.id,
        ApiPrediction.patient_id,
        ApiPrediction.appointment_id,
        ApiCallResponse.call_status,
    )
    .outerjoin(ApiPrediction.callresponse_relation)
    .outerjoin(ApiPrediction.patient_relation)
)

predictions = pd.read_sql(query, get_engine())

predictions.head()

### Percentage and count of not-called patients

In [None]:
predictions["month"] = predictions["start_time"].dt.to_period("M")
current_month = pd.Timestamp("today").to_period("M")
predictions_filtered = predictions[predictions["month"] < current_month]

monthly_total = predictions_filtered["month"].value_counts().sort_index()

monthly_not_called = (
    predictions_filtered.loc[
        predictions_filtered["call_status"].isna()
        | (predictions_filtered["call_status"] == "Niet gebeld"),
        "month",
    ]
    .value_counts()
    .sort_index()
)
monthly_percent = (
    (monthly_not_called / monthly_total * 100)
    .rename("percent_none_or_niet_gebeld")
    .reset_index()
)

months = monthly_percent["month"].astype(str)

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), sharex=True)

ax1.bar(
    months,
    monthly_percent["percent_none_or_niet_gebeld"],
    color="salmon",
)
ax1.set_title("Percentage of not-called patients", fontsize=12)
ax1.set_ylabel("Percentage", fontsize=10)
ax1.set_ylim(0, 100)
ax1.grid(axis="y", linestyle="--", alpha=0.7)
ax1.tick_params(axis="both", labelsize=8)

ax2.plot(months, monthly_not_called.values, color="gray", marker="o")
ax2.set_title("Count of not-called patients", fontsize=12)
ax2.set_ylabel("Count", fontsize=10)
ax2.set_xlabel("Month", fontsize=10)
ax2.grid(axis="y", linestyle="--", alpha=0.7)
ax2.tick_params(axis="x", rotation=45, labelsize=8)
ax2.tick_params(axis="y", labelsize=8)
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()


### Prediction distribution

In [None]:
df_not_called = predictions_filtered[
    predictions_filtered["call_status"].isnull()
    | (predictions_filtered["call_status"] == "Niet gebeld")
].copy()

predictions = df_not_called.groupby("month")["prediction"].apply(list)
sorted_months = sorted(predictions.index)
recent_months = sorted_months[-12:]
predictions = predictions.loc[recent_months]
most_recent_month = recent_months[-1]

plt.figure(figsize=(10, 5))
palette = sns.color_palette("tab10", n_colors=len(predictions) - 1)
palette_iter = iter(palette)

for month, preds in predictions.items():
    if month == most_recent_month:
        sns.kdeplot(
            preds,
            fill=True,
            alpha=0.2,
            bw_adjust=0.3,
            color="black",
            label=str(month),
            linewidth=2.5,
        )
    else:
        sns.kdeplot(
            preds,
            fill=True,
            alpha=0.05,
            bw_adjust=0.3,
            color=next(palette_iter),
            label=str(month),
            linewidth=1.5,
        )

plt.title("Prediction Distributions by Month (Last 12 Months)", fontsize=12)
plt.xlabel("Prediction Value", fontsize=10)
plt.ylabel("Density", fontsize=10)
legend = plt.legend(title="Month", fontsize=8)
legend.set_title("Month", prop={"size": 9})
plt.grid(axis="y", linestyle="--", alpha=0.6)
plt.tick_params(axis="x", labelsize=8)
plt.tick_params(axis="y", labelsize=8)
plt.tight_layout()
plt.show()


## Results performance monitoring

### Load SQL data

In [None]:
##  Make new export and save to poliafspraken_no_show.csv
# from noshow.database.export import export_data
# export_data()

In [None]:
appointments_df = load_appointment_csv("../data/raw/poliafspraken_no_show.csv")
appointments_df = process_appointments(appointments_df, CLINIC_CONFIG)
appointments_df.head()

### ROC and AUC scores

In [None]:
all_postalcodes = process_postal_codes("../data/raw/NL.txt")
featuretable = create_features(
    appointments_df,
    all_postalcodes,
)
print(f"Number of total patients in featuretable: {len(featuretable)}")

In [None]:
df_not_called["appointment_id"] = df_not_called["appointment_id"].astype(int)
featuretable_filtered = featuretable[
    featuretable["APP_ID"].isin(df_not_called["appointment_id"])
]
print(f"Number of patients in featuretable who are not called: {len(featuretable)}")


In [None]:
featuretable_filtered = (
    featuretable_filtered[
        [
            "hoofdagenda",
            "hour",
            "weekday",
            "minutesDuration",
            "no_show",
            "prev_no_show",
            "prev_no_show_perc",
            "age",
            "dist_umcu",
            "prev_minutes_early",
            "earlier_appointments",
            "appointments_same_day",
            "appointments_last_days",
            "days_since_created",
            "days_since_last_appointment",
        ]
    ]
    .reset_index()
    .set_index(["pseudo_id", "start", "hoofdagenda"])
)

In [None]:
featuretable_filtered["no_show"] = (
    featuretable_filtered["no_show"].replace({"no_show": "1", "show": "0"}).astype(int)
)
featuretable_filtered["hour"] = featuretable_filtered["hour"].astype("category")
featuretable_filtered["weekday"] = featuretable_filtered["weekday"].astype("category")

print(featuretable_filtered.dtypes)

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6), sharex=False)

roc_data = []
current_month = pd.Timestamp.now().to_period("M")

with open("../output/models/no_show_model_cv.pickle", "rb") as f:
    model = pickle.load(f)

for month, month_data in featuretable_filtered.groupby(
    pd.Grouper(level="start", freq="ME")
):
    print(f"Processing data for {month.strftime('%Y-%m')}")
    print(f"Shape: {month_data.shape}")

    if month.to_period("M") <= pd.Period("2024-12", freq="M"):
        continue

    if month.to_period("M") >= current_month:
        print(f"Skipping current month: {month.strftime('%Y-%m')}")
        continue

    X, y = month_data.drop(columns="no_show"), month_data["no_show"]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=0
    )

    print(f"Training set shape: {X_train.shape}")
    print(f"Test set shape: {X_test.shape}")

    y_pred = model.predict_proba(X_test)

    fpr, tpr, thresholds = roc_curve(y_test, y_pred[:, 1])
    auc_score = roc_auc_score(y_test, y_pred[:, 1])

    month_label = month.strftime("%Y-%m")
    roc_data.append(
        {
            "month": month_label,
            "fpr": fpr,
            "tpr": tpr,
            "auc": auc_score,
            "y_pred": y_pred[:, 1],
            "y_true": y_test,
            "n_samples": len(month_data),
        }
    )

    print(f"AUC for {month_label}: {auc_score:.3f}")


for entry in roc_data:
    ax1.plot(
        entry["fpr"],
        entry["tpr"],
        label=f"{entry['month']} (AUC = {entry['auc']:.2f})",
        alpha=1,
        linewidth=1.5,
    )

ax1.plot([0, 1], [0, 1], linestyle="--", color="gray", label="Random Classifier")
ax1.set_title("ROC Curves by Month", fontsize=12)
ax1.set_xlabel("False Positive Rate", fontsize=10)
ax1.set_ylabel("True Positive Rate", fontsize=10)
ax1.grid(True, linestyle="--", alpha=0.7)
ax1.legend(loc="lower right", fontsize=8)
ax1.tick_params(axis="both", labelsize=8)

months = [entry["month"] for entry in roc_data]
auc_scores = [entry["auc"] for entry in roc_data]
months_str = [str(month) for month in months]

ax2.plot(months_str, auc_scores, marker="o", linestyle="-", color="blue")
ax2.set_title("AUC Score Trend Over Time", fontsize=12)
ax2.set_xlabel("Month", fontsize=10)
ax2.set_ylabel("AUC Score", fontsize=10)
ax2.set_ylim(0.0, 1.0)
ax2.grid(True, axis="y", linestyle="--", alpha=0.7)
ax2.tick_params(axis="x", rotation=45, labelsize=8)
ax2.tick_params(axis="y", labelsize=8)

fig.set_layout_engine("tight")
fig.show()

print(f"\nProcessed {len(roc_data)} months")
print(f"Mean AUC: {sum(entry['auc'] for entry in roc_data) / len(roc_data):.3f}")
print(f"Min AUC: {min(entry['auc'] for entry in roc_data):.3f}")
print(f"Max AUC: {max(entry['auc'] for entry in roc_data):.3f}")

### Reliability diagram and calibration errors

In [None]:
# Create reliability diagram for the last month
roc_data_sorted = sorted(roc_data, key=lambda x: x["month"])
last_month_entry = roc_data_sorted[-1]
print(f"Showing reliability diagram for month: {last_month_entry['month']}")

fig, ax = rp.rel_diagram(last_month_entry["y_pred"], last_month_entry["y_true"])
fig.set_size_inches(7, 6)
fig.suptitle(f"Reliability Diagram - {last_month_entry['month']}", fontsize=12)
ax.set_xlabel("Mean Predicted Probability", fontsize=11)
ax.set_ylabel("Fraction of Positives", fontsize=11)
ax.tick_params(axis="both", labelsize=9)
ax.legend(fontsize=9, loc="lower right")
for txt in ax.texts:
    txt.set_fontsize(12)
plt.tight_layout()
plt.show()

In [None]:
months = []
calib_errors = []

for entry in roc_data_sorted:
    month = entry["month"]
    error = rp.smECE(entry["y_pred"], entry["y_true"])
    months.append(str(month))
    calib_errors.append(error)

plt.figure(figsize=(10, 5))
plt.plot(months, calib_errors, marker="o", linestyle="-", color="darkorange")

plt.title("Calibration Error (smECE) Over Time", fontsize=12)
plt.xlabel("Month", fontsize=10)
plt.ylabel("Calibration Error (smECE)", fontsize=10)
plt.tick_params(axis="both", labelsize=9)

plt.ylim(0, max(calib_errors) * 1.1)
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
