# Evaluate no-show percentages during pilot

This notebook is used for evaluating the no-show pilot by looking at the no-show percentage

In [None]:
from noshow.preprocessing.load_data import (
    load_appointment_csv,
    process_appointments,
    process_postal_codes,
)
from noshow.features.feature_pipeline import create_features, select_feature_columns
from noshow.model.predict import create_prediction
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import seaborn as sns

In [None]:
appointments_df = load_appointment_csv("../data/raw/poliafspraken_pilot.csv")
appointments_df = process_appointments(appointments_df).sort_index()
appointments_df["pilot"] = "Geen pilot"
appointments_df.loc[(slice(None), slice("2023-10-05", "2023-11-01")), "pilot"] = "pilot"
appointments_df.shape

## No show per month

In [None]:
appointments_df["month"] = appointments_df.index.get_level_values("start").to_period(
    "M"
)
appointments_df.groupby("month")["no_show"].value_counts(True).unstack()[
    "no_show"
].plot.bar(figsize=(20, 6))

### Per clinic

In [None]:
for agenda in appointments_df["hoofdagenda"].unique():
    (
        appointments_df.loc[appointments_df["hoofdagenda"] == agenda]
        .groupby("month")["no_show"]
        .value_counts(True)
        .unstack()["no_show"]
        .plot.bar(figsize=(20, 6), title=agenda)
    )
    plt.show()

## No show aggregated on month

In [None]:
appointments_df["month"] = appointments_df.index.get_level_values("start").month
(
    appointments_df.groupby(["month", "pilot"])["no_show"]
    .value_counts(True)
    .unstack(["no_show", "pilot"])["no_show"]
    .plot.bar(figsize=(15, 6))
)

### Per clinic

In [None]:
for agenda in appointments_df["hoofdagenda"].unique():
    (
        appointments_df.loc[appointments_df["hoofdagenda"] == agenda]
        .groupby(["month", "pilot"])["no_show"]
        .value_counts(True)
        .unstack(["no_show", "pilot"])["no_show"]
        .plot.bar(figsize=(15, 6), title=agenda)
    )
    plt.show()

## No-show percentage per prediction bin

In [None]:
with open("../output/models/no_show_model_cv.pickle", "rb") as f:
    model = pickle.load(f)

all_postalcodes = process_postal_codes("../data/raw/NL.txt")
predictions_df = create_prediction(model, appointments_df, all_postalcodes)

In [None]:
# Control group is Oct 2015 - 2022
predictions_df = predictions_df.loc[
    predictions_df.index.get_level_values("start").month == 10
]

In [None]:
total_appointments = appointments_df.join(predictions_df, how="inner")
total_appointments["predict_bin"] = pd.cut(
    total_appointments["prediction"], bins=[0, 0.05, 0.1, 0.15, 0.2, 0.25, 1]
)

In [None]:
total_appointments.groupby(["pilot", "predict_bin"])["no_show"].value_counts(
    True
).unstack(["no_show", "pilot"])["no_show"].plot.bar()

In [None]:
total_appointments["noshow_num"] = 0
total_appointments.loc[total_appointments["no_show"] == "no_show", "noshow_num"] = 1
total_appointments_plot = total_appointments.groupby(["pilot", "predict_bin"])[
    "noshow_num"
].agg(["mean", "std", "size"])
total_appointments_plot

In [None]:
plt.subplots(figsize=(15, 6))
sns.barplot(data=total_appointments, x="predict_bin", y="noshow_num", hue="pilot")
plt.title("No-Show percentage per risico-categorie")
plt.xlabel("Risico-categorieen")
plt.ylabel("Percentage No-Show")
plt.show()