# Notebook to analyse the results from the implementation of the No-Show project

Mainly focussed on the stored information in the database

In [None]:
import json
import os

import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from noshow.api.app_helpers import apply_bins

In [None]:
mpl.rcdefaults()

# remove top and right splines
mpl.rcParams["axes.spines.top"] = False
mpl.rcParams["axes.spines.right"] = False
mpl.rcParams["axes.axisbelow"] = True

## Connect to Database

In [None]:
load_dotenv("../.env")


# Global and env variables
db_user = os.environ["DB_USER"]
db_passwd = os.environ["DB_PASSWD"]
db_host = os.environ["DB_HOST"]
db_port = os.environ["DB_PORT"]
db_database = os.environ["DB_DATABASE"]

PERIOD_START = "2024-06-20"
PERIOD_END = "2024-07-08"

In [None]:
CONNECTSTRING = (
    rf"mssql+pymssql://{db_user}:{db_passwd}@{db_host}:{db_port}/{db_database}"
)
engine = create_engine(CONNECTSTRING)
session_object = sessionmaker(bind=engine)

In [None]:
call_response = pd.read_sql_table("apicallresponse", engine, schema="noshow")
prediction = pd.read_sql_table("apiprediction", engine, schema="noshow")
requests = pd.read_sql_table("apirequest", engine, schema="noshow")
patients = pd.read_sql_table("apipatient", engine, schema="noshow")

## Preprocess data

In [None]:
prediction_call = (
    prediction.set_index("id")
    .join(call_response.set_index("prediction_id"))
    .set_index("patient_id")
    .join(patients.set_index("id"))
)

prediction_call = prediction_call.loc[prediction_call["start_time"] >= PERIOD_START]
prediction_call = prediction_call.loc[prediction_call["start_time"] < PERIOD_END]

prediction_call["start_time"] = prediction_call["start_time"].dt.date
result_per_day = prediction_call.groupby(
    ["start_time", "clinic_name", "call_outcome"]
).size()

## Analyze data

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
prediction_call.loc[prediction_call["treatment_group"] == 1].groupby(
    ["clinic_name", "call_outcome"], dropna=False
).size().unstack().plot.bar(stacked=True, ax=ax)
ax.grid(axis="y")
fig.suptitle("Uitkomst van bellen huidige status per kliniek")
fig.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
prediction_call.groupby(
    ["treatment_group", "call_outcome"], dropna=False
).size().unstack().plot.bar(stacked=True, ax=ax)
ax.grid(axis="y")
fig.suptitle("Vergelijking control en treatment groep")
fig.show()

In [None]:
with open("../data/processed/fixed_pred_score_bin.json", "r") as f:
    fixed_pred_score_bin = json.load(f)

predictions_with_bins = (
    prediction_call.groupby("clinic_name")
    .apply(apply_bins, bin_dict=fixed_pred_score_bin, include_groups=False)
    .reset_index()
)

fix, ax = plt.subplots(figsize=(10, 5))
predictions_with_bins.groupby(
    ["treatment_group", "score_bin"]
).size().unstack().plot.bar(stacked=True, ax=ax)
ax.set_title("Vergelijking control en treatment groep per score bin")
ax.grid(axis="y")
fig.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
predictions_with_bins.loc[predictions_with_bins["treatment_group"] == 1].groupby(
    ["score_bin", "call_outcome"], dropna=False
).size().unstack().plot.bar(stacked=True, ax=ax)
ax.set_title("Uitkomst van bellen huidige status per score bin")
ax.grid(axis="y")
fig.show()

In [None]:
plt.figure(figsize=(10, 5))
result_per_day.groupby("call_outcome").sum().plot.bar()
plt.title("Uitkomst Implementatie")
plt.xlabel("")
plt.grid(axis="y")
plt.show()

In [None]:
rct_groups = prediction_call.groupby("treatment_group")

fig, ax = plt.subplots(2, figsize=(10, 7), sharex=True, sharey=True)
for id, (group, data) in enumerate(rct_groups):
    ax[id].hist(data["prediction"], bins=50, alpha=0.5, label=f"Group {group}")
    ax[id].set_title(f"{'treatment' if group == 1 else 'control'} group")
    ax[id].grid(axis="y")

fig.suptitle("Histogram van voorspellingen per groep")
fig.set_layout_engine("tight")
fig.show()

In [None]:
prediction_call.groupby("clinic_name").size()

In [None]:
prediction_call.groupby(["clinic_name", "call_outcome"]).size()

In [None]:
prediction_call.value_counts("call_status")

In [None]:
prediction_call["remarks"].drop_duplicates().values

In [None]:
prediction_call_request = prediction_call.set_index("request_id").join(
    requests.set_index("id")
)

prediction_call_request

In [None]:
prediction_call_request["day_called"] = prediction_call_request["timestamp"].dt.date
prediction_call_request.groupby("day_called").size()

## Analyse text

In [None]:
text_contents = prediction_call["remarks"].drop_duplicates().dropna().to_list()
text_contents = [val for val in text_contents if "oicemail" not in val]
text_contents = [val for val in text_contents if "oorverbin" not in val]
text_contents = [val for val in text_contents if "eschikbaar" not in val]
text_contents

## Analyse when to call

In [None]:
# Group by hour of timestamp and calculate percentage and number of different outcomes
call_response["hour_called"] = call_response["timestamp"].dt.hour
hourly_outcome = call_response.groupby(["hour_called", "call_outcome"]).size()
hourly_outcome = hourly_outcome.unstack()

fig, ax = plt.subplots(figsize=(10, 5))
hourly_outcome.plot.bar(stacked=True, ax=ax)
ax.set_title("Uitkomst van bellen huidige status per uur")
ax.set_ylabel("Aantal")
ax.set_xlabel("Uur")
ax.legend(title="Uitkomst")
fig.show()

# Also plot the percentages with bars of equal height
fig, ax = plt.subplots(figsize=(10, 5))
hourly_outcome.div(hourly_outcome.sum(axis=1), axis=0).plot.bar(stacked=True, ax=ax)
ax.set_title("Percentage uitkomst van bellen huidige status per uur")
ax.set_ylabel("Percentage")
ax.set_xlabel("Uur")
ax.legend(title="Uitkomst")
fig.show()