### Install dependencies

In [None]:
import pickle
from datetime import date, timedelta

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import relplot as rp
from sklearn.calibration import calibration_curve
from sklearn.metrics import (
    precision_recall_curve,
    precision_score,
    recall_score,
    roc_auc_score,
    roc_curve,
)
from sklearn.model_selection import train_test_split
from sqlalchemy import Date, cast, select

from noshow.config import CLINIC_CONFIG
from noshow.dashboard.connection import init_session
from noshow.database.export import export_data
from noshow.database.models import ApiCallResponse, ApiPatient, ApiPrediction
from noshow.features.feature_pipeline import create_features, select_feature_columns
from noshow.preprocessing.load_data import (
    load_appointment_csv,
    process_appointments,
    process_postal_codes,
)

### Extract patients who are not called
Extract from ApiPrediction and ApiCallResponse. Filter patients who had an appointment prediction last year, but were not called.

In [None]:
session_object = init_session()

with session_object() as session:
    predicted_patients = session.execute(
        select(
            ApiPrediction.start_time,
            ApiPrediction.prediction,
            ApiPrediction.id,
            ApiPrediction.patient_id,
            ApiPrediction.appointment_id,
            ApiCallResponse.call_status,
        )
        .outerjoin(ApiPrediction.callresponse_relation)
        .outerjoin(ApiPrediction.patient_relation)
        # .where(ApiCallResponse.call_outcome is Missing value or 'niet gebeld')
        .where(
            (ApiCallResponse.call_status.is_(None))
            | (ApiCallResponse.call_status == "Niet gebeld")
        )
        .where(
            cast(ApiPrediction.start_time, Date) >= date.today() - timedelta(days=365)
        )
    ).all()

In [None]:
# Show results of predicted_patients in a dataframe
df = pd.DataFrame(
    predicted_patients,
    columns=[
        "start_time",
        "prediction",
        "id",
        "patient_id",
        "appointment_id",
        "call_status",
    ],
)
df.head(100)

### Load SQL data

In [None]:
# Make new export and save to poliafspraken_no_show.csv
# export_data()

In [None]:
# Load appointments from CSV and process them
appointments_df = load_appointment_csv("../data/raw/poliafspraken_no_show.csv")
appointments_df = process_appointments(appointments_df, CLINIC_CONFIG)

appointments_df.head(100)

In [None]:
# only show appointments with a start time between now and 1 year ago
appointments_df = appointments_df[
    appointments_df["end"].between(
        (date.today() - timedelta(days=365)).strftime("%Y-%m-%d"),
        date.today().strftime("%Y-%m-%d"),
    )
]

df["appointment_id"] = df["appointment_id"].astype(str)
appointments_df["APP_ID"] = appointments_df["APP_ID"].astype(str)

appointments_df = appointments_df[appointments_df["APP_ID"].isin(df["appointment_id"])]

appointments_df.head(100)

In [None]:
appointments_df["end"] = pd.to_datetime(appointments_df["end"])
appointments_df["year_month"] = appointments_df["end"].dt.to_period("M")
grouped = appointments_df.groupby("year_month")

for period, df_month in grouped:
    df_month = df_month.reset_index(drop=True)

    # Apply your custom feature function
    all_postalcodes = process_postal_codes("../data/raw/NL.txt")
    appointments_features = create_features(df_month, all_postalcodes).pipe(
        select_feature_columns
    )

    # Define filename using the period (e.g., '2025-04')
    filename = f"appointments_features_{period}.parquet"

    # Save to Parquet
    # appointments_features.to_parquet(filename, index=False)

    print(f"Saved: {filename}")


In [None]:
all_postalcodes = process_postal_codes("../data/raw/NL.txt")
appointments_features = create_features(appointments_df, all_postalcodes).pipe(
    select_feature_columns
)
appointments_features.to_parquet("../data/processed/featuretable_monitoring.parquet")

In [None]:
type(appointments_df)


In [None]:
with open("../output/models/no_show_model_cv.pickle", "rb") as f:
    model = pickle.load(f)

featuretable = pd.read_parquet("../data/processed/featuretable_monitoring.parquet")

featuretable.head(100)

In [None]:
featuretable["no_show"] = (
    featuretable["no_show"].replace({"no_show": "1", "show": "0"}).astype(int)
)

X, y = featuretable.drop(columns="no_show"), featuretable["no_show"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0, shuffle=False
)

y_pred = model.predict_proba(X_test)

In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_pred[:, 1])
auc_score = roc_auc_score(y_test, y_pred[:, 1])
fig, ax = plt.subplots(figsize=(8, 8))
ax.plot(fpr, tpr, label=f"Hist Gradient Boosting (AUC={round(auc_score, 2)})")
ax.plot([0, 1], [0, 1], label="Random (AUC=0.5)", linestyle="dotted")
ax.legend()
plt.show()

In [None]:
precision, recall, thresholds = precision_recall_curve(y_test, y_pred[:, 1])

fig, ax = plt.subplots(figsize=(8, 8))
ax.plot(thresholds, precision[:-1], label="precision")
ax.plot(thresholds, recall[:-1], label="recall")
ax.legend()
plt.show()

calculate for each month and plot auc values trend in curve. 