# Export and analyze RCT results

Notebook to export and analyze data from the RCT period

## Initialize packages and defaults

In [None]:
import os

import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

In [None]:
mpl.rcdefaults()

# remove top and right splines
mpl.rcParams["axes.spines.top"] = False
mpl.rcParams["axes.spines.right"] = False
mpl.rcParams["axes.axisbelow"] = True

pd.options.mode.copy_on_write = True

## Connect to DB and export data

In [None]:
load_dotenv("../.env")


# Global and env variables
db_user = os.environ["DB_USER"]
db_passwd = os.environ["DB_PASSWD"]
db_host = os.environ["DB_HOST"]
db_port = os.environ["DB_PORT"]
db_database = os.environ["DB_DATABASE"]

In [None]:
CONNECTSTRING = (
    rf"mssql+pymssql://{db_user}:{db_passwd}@{db_host}:{db_port}/{db_database}"
)
engine = create_engine(CONNECTSTRING)
session_object = sessionmaker(bind=engine)

In [None]:
call_response = pd.read_sql_table("apicallresponse", engine, schema="noshow")
prediction = pd.read_sql_table("apiprediction", engine, schema="noshow")
requests = pd.read_sql_table("apirequest", engine, schema="noshow")
patients = pd.read_sql_table("apipatient", engine, schema="noshow")

## Read and preprocess data

In [None]:
data_export = pd.read_csv(
    "../data/raw/poliafspraken_rct.csv", parse_dates=["start", "end"]
).drop(columns=["specialty_code", "name", "soort_consult", "afspraak_code"])
data_export.loc[data_export["mutationReason_code"] == "N", "outcome"] = "No-Show"
data_export.loc[data_export["status_code_original"] == "J", "outcome"] = "Show"
data_export = data_export.drop(
    columns=[
        "mutationReason_code",
        "status_code_original",
        "status",
        "mutationReason_display",
    ]
)
data_export

In [None]:
live_data_rct = (
    prediction.rename(columns={"id": "prediction_id"})
    .merge(call_response, on="prediction_id", how="left")
    .merge(patients, left_on="patient_id", right_on="id", how="left")
    .drop(
        columns=[
            "id_y",
            "id_x",
            "clinic_phone_number",
            "clinic_teleq_unit",
            "call_number",
            "opt_out",
        ]
    )
)

live_data_rct["prediction_id"] = live_data_rct["prediction_id"].astype("int64")
live_data_rct = live_data_rct[live_data_rct["treatment_group"] != 2]
# live_data_rct = live_data_rct[live_data_rct["active"] == 1]
live_data_rct = live_data_rct.drop(
    columns=["active", "clinic_reception", "request_id", "remarks"]
)

In [None]:
live_data_rct

## Combine data export and live data

In [None]:
combined_data = live_data_rct.merge(
    data_export,
    left_on="prediction_id",
    right_on="APP_ID",
    how="left",
)
combined_data

In [None]:
combined_data.to_csv("../data/processed/data_rct.csv", index=False)

## Analyze No-Show

In [None]:
# Analyse how many appointments with status "herinnerd" are actually completed
combined_data.loc[combined_data["call_outcome"] == "Herinnerd"].value_counts(
    "outcome", normalize=True
)

In [None]:
combined_data.loc[combined_data["call_outcome"] == "Geen"].value_counts(
    "outcome", normalize=True
)

In [None]:
combined_data["call_outcome"].value_counts()

In [None]:
# How many appointments with call outcome verzet/geannulleerd are actually changed and
# subsequently completed
combined_data.loc[
    combined_data["start_time"] != combined_data["start"], "app_moved"
] = True
combined_data.loc[
    combined_data["start_time"] == combined_data["start"], "app_moved"
] = False
combined_data.loc[combined_data["call_outcome"] == "Verzet/Geannuleerd"].value_counts(
    ["app_moved", "outcome"], dropna=False
).unstack()

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
ax = (
    combined_data.groupby(["treatment_group", "call_outcome"], dropna=False)
    .size()
    .unstack()
    .plot.bar(stacked=True, ax=ax)
)
ax.set_ylabel("Aantal afspraken")
ax.set_xlabel("")
ax.set_title("Aantal afspraken in de RCT")
ax.legend(title="Uitkomst", loc="upper left")


# rename x_label values
labels = ["Controle", "Interventie"]
ax.set_xticklabels(labels)
fig.set_layout_engine("tight")
fig.show()

In [None]:
combined_data["no_show"] = combined_data["outcome"] == "No-Show"
combined_data = combined_data[combined_data["outcome"].notnull()]
combined_data.groupby(["treatment_group", "no_show"]).size().unstack().plot.bar(
    stacked=True
)

In [None]:
# Calculate the no_show rate for each treatment group
no_show_rate = combined_data.groupby("treatment_group")["no_show"].agg(
    ["mean", "count"]
)
no_show_rate

In [None]:
# compare no-show rate between control group and treatment group,
# filtered on called patients
called_patients = combined_data[
    combined_data["call_outcome"].isin(["Herinnerd", "Verzet/Geannuleerd"])
    | (combined_data["treatment_group"] == 0)
]
no_show_rate_called = called_patients.groupby("treatment_group")["no_show"].agg(
    ["mean", "size"]
)
no_show_rate_called

In [None]:
# Calculate the no_show rate for each prediction score bin and treatment group
combined_data.loc[:, "score_bin"] = pd.qcut(combined_data["prediction"], 10)
no_show_rate = combined_data.groupby(["treatment_group", "score_bin"], observed=True)[
    "no_show"
].mean()

no_show_rate.unstack(level="treatment_group").plot.bar()