# Analyse the time measurements of Metavision

Additional info:
- Baseline measurements started on 2024-09-18
- AI-generated letters were introduced on 2024-10-15
- Pilot officially ended on 2024-12-24 (AI was still used, but failed during the Christmas holidays)


## Load packages

In [None]:
from datetime import date, timedelta
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import mannwhitneyu

In [None]:
BASELINE_START_DATE = date(2024, 9, 18)
PILOT_START_DATE = date(2024, 10, 15)
PILOT_MID_DATE = date(2024, 11, 15)
PILOT_END_DATE = date(2024, 12, 10)
OFFLINE_START_DATE = date(2025, 12, 24)

## Analysis datadump

In [None]:
df_total = pd.read_csv(
    Path.cwd().parent / "data" / "raw" / "metavision_time_measurements.csv",
    sep=",",
    parse_dates=[
        "AddmissionDate",
        "DischargeDate",
        "FormRelease",
        "SessieCreate",
        "StartSchrijven",
        "EindeSchrijven",
    ],
)
df_total.rename(columns={"AddmissionDate": "AdmissionDate"}, inplace=True)
df_total

In [None]:
df_total = df_total[df_total["Schrijven_minuten"] >= 0]
df_total = df_total[df_total["Schrijven_minuten"] < 180]  # Remove outliers
df_total["Schrijven_minuten"].hist(bins=100)


### Explanation of the filtering

The baseline should contain all admissions where no AI was used and time measurement is available. Time measurements are available when a patient was admitted after the BASELINE_START_DATE. AI was not used when either the discharge date is before the PILOT_START_DATE or when they remained in a session created before the PILOT_START_DATE. It is important to note that the AI was offline after OFFLINE_START_DATE. However, for patients in the baseline group as described above, they were not affected by this as they were not using AI so no additional filtering is needed. There are two options for the baseline group:
- Baseline: the admission date falls in between BASELINE_START_DATE and PILOT_START_DATE
- Baseline strictly: the admission and discharge date fall in between BASELINE_START_DATE and PILOT_START_DATE
- Baseline (strict) with admissions less than one week
- Baseline (strict) with admissions more than one week

The AI measurement should contain all admissions where AI was used (and time measurement is available but there is no option for AI without time measurement). AI was used when the admission date is after the PILOT_START_DATE. As the AI was also utilized after the pilot ended there are a few different analyses we will perform. It is important to note that the AI was offline after OFFLINE_START_DATE, so we will not consider any admissions where the discharge date is after this date.
- AI during the pilot: the admission date falls in between PILOT_START_DATE and PILOT_END_DATE
- AI strictly during the pilot: the admission and discharge date fall in between PILOT_START_DATE and PILOT_END_DATE
- AI during first half of the pilot: the admission date is in between the PILOT_START_DATE and the PILOT_MID_DATE, regardless of the discharge date
- AI strictly during first half of the pilot: the admission and discharge date fall in between the PILOT_START_DATE and the PILOT_MID_DATE
- AI during second half of the pilot: the admission date is in between the PILOT_MID_DATE and the PILOT_END_DATE, regardless of the discharge date
- AI strictly during second half of the pilot: the admission and discharge date fall in between the PILOT_MID_DATE and the PILOT_END_DATE
- AI strictly after the pilot: the admission date and discharge date fall in between the PILOT_END_DATE and the OFFLINE_START_DATE
- AI during the "extended" pilot: the admission date is in between the PILOT_START_DATE and the OFFLINE_START_DATE. This is similar to the "strict" setup above.
- AI during the pilot (strict) with admissions less than one week
- AI during the pilot (strict) with admissions more than one week

In [None]:
# Filtering
baseline_df = df_total.copy()
baseline_strict_df = df_total.copy()
baseline_short_df = df_total.copy()
baseline_long_df = df_total.copy()
ai_pilot_df = df_total.copy()
ai_pilot_strict_df = df_total.copy()
ai_first_half_df = df_total.copy()
ai_first_half_strict_df = df_total.copy()
ai_second_half_df = df_total.copy()
ai_second_half_strict_df = df_total.copy()
ai_post_strict_df = df_total.copy()
ai_extended_df = df_total.copy()
ai_pilot_short_df = df_total.copy()
ai_pilot_long_df = df_total.copy()

baseline_strict_no_short_df = df_total.copy()
ai_pilot_no_short_df = df_total.copy()

baseline_df = baseline_df[
    (baseline_df["AdmissionDate"].dt.date >= BASELINE_START_DATE)
    & (
        (baseline_df["DischargeDate"].dt.date < PILOT_START_DATE)
        | baseline_df.groupby("PatientID")["SessieCreate"].transform(
            lambda x: (x.dt.date < PILOT_START_DATE).all()
        )
    )
]

baseline_strict_df = baseline_strict_df[
    (baseline_strict_df["AdmissionDate"].dt.date >= BASELINE_START_DATE)
    & (baseline_strict_df["DischargeDate"].dt.date < PILOT_START_DATE)
]


baseline_short_df = baseline_short_df[
    (baseline_short_df["AdmissionDate"].dt.date >= BASELINE_START_DATE)
    & (baseline_short_df["DischargeDate"].dt.date < PILOT_START_DATE)
    & (
        (
            baseline_short_df["DischargeDate"].dt.date
            - baseline_short_df["AdmissionDate"].dt.date
        )
        < timedelta(days=7)
    )
]

baseline_long_df = baseline_long_df[
    (baseline_long_df["AdmissionDate"].dt.date >= BASELINE_START_DATE)
    & (baseline_long_df["DischargeDate"].dt.date < PILOT_START_DATE)
    & (
        (
            baseline_long_df["DischargeDate"].dt.date
            - baseline_long_df["AdmissionDate"].dt.date
        )
        >= timedelta(days=7)
    )
]

ai_pilot_df = ai_pilot_df[
    (ai_pilot_df["AdmissionDate"].dt.date >= PILOT_START_DATE)
    & (ai_pilot_df["AdmissionDate"].dt.date < PILOT_END_DATE)
    & (ai_pilot_df["DischargeDate"].dt.date < OFFLINE_START_DATE)
]

ai_pilot_strict_df = ai_pilot_strict_df[
    (ai_pilot_strict_df["AdmissionDate"].dt.date >= PILOT_START_DATE)
    & (ai_pilot_strict_df["DischargeDate"].dt.date < PILOT_END_DATE)
]

ai_first_half_df = ai_first_half_df[
    (ai_first_half_df["AdmissionDate"].dt.date >= PILOT_START_DATE)
    & (ai_first_half_df["AdmissionDate"].dt.date < PILOT_MID_DATE)
    & (ai_first_half_df["DischargeDate"].dt.date < OFFLINE_START_DATE)
]

ai_first_half_strict_df = ai_first_half_strict_df[
    (ai_first_half_strict_df["AdmissionDate"].dt.date >= PILOT_START_DATE)
    & (ai_first_half_strict_df["DischargeDate"].dt.date < PILOT_MID_DATE)
]

ai_second_half_df = ai_second_half_df[
    (ai_second_half_df["AdmissionDate"].dt.date >= PILOT_MID_DATE)
    & (ai_second_half_df["AdmissionDate"].dt.date < PILOT_END_DATE)
    & (ai_second_half_df["DischargeDate"].dt.date < OFFLINE_START_DATE)
]

ai_second_half_strict_df = ai_second_half_strict_df[
    (ai_second_half_strict_df["AdmissionDate"].dt.date >= PILOT_MID_DATE)
    & (ai_second_half_strict_df["DischargeDate"].dt.date < PILOT_END_DATE)
]

ai_post_strict_df = ai_post_strict_df[
    (ai_post_strict_df["AdmissionDate"].dt.date >= PILOT_END_DATE)
    & (ai_post_strict_df["DischargeDate"].dt.date < OFFLINE_START_DATE)
]

ai_extended_df = ai_extended_df[
    (ai_extended_df["AdmissionDate"].dt.date >= PILOT_START_DATE)
    & (ai_extended_df["DischargeDate"].dt.date < OFFLINE_START_DATE)
]

ai_pilot_short_df = ai_pilot_short_df[
    (ai_pilot_short_df["AdmissionDate"].dt.date >= PILOT_START_DATE)
    & (ai_pilot_short_df["DischargeDate"].dt.date < PILOT_END_DATE)
    & (
        (
            ai_pilot_short_df["DischargeDate"].dt.date
            - ai_pilot_short_df["AdmissionDate"].dt.date
        )
        < timedelta(days=7)
    )
]

ai_pilot_long_df = ai_pilot_long_df[
    (ai_pilot_long_df["AdmissionDate"].dt.date >= PILOT_START_DATE)
    & (ai_pilot_long_df["DischargeDate"].dt.date < PILOT_END_DATE)
    & (
        (
            ai_pilot_long_df["DischargeDate"].dt.date
            - ai_pilot_long_df["AdmissionDate"].dt.date
        )
        >= timedelta(days=7)
    )
]

baseline_strict_no_short_df = baseline_strict_no_short_df[
    (baseline_strict_no_short_df["AdmissionDate"].dt.date >= BASELINE_START_DATE)
    & (baseline_strict_no_short_df["DischargeDate"].dt.date < PILOT_START_DATE)
    & (
        (
            baseline_strict_no_short_df["DischargeDate"].dt.date
            - baseline_strict_no_short_df["AdmissionDate"].dt.date
        )
        > timedelta(days=2)
    )
]

ai_pilot_no_short_df = ai_pilot_no_short_df[
    (ai_pilot_no_short_df["AdmissionDate"].dt.date >= PILOT_START_DATE)
    & (ai_pilot_no_short_df["DischargeDate"].dt.date < PILOT_END_DATE)
    & (
        (
            ai_pilot_no_short_df["DischargeDate"].dt.date
            - ai_pilot_no_short_df["AdmissionDate"].dt.date
        )
        > timedelta(days=2)
    )
]

In [None]:
print(
    f"Number of patients in baseline measurement: {baseline_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in baseline measurement (strict): "
    f"{baseline_strict_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in baseline measurement (strict & short): "
    f"{baseline_short_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in baseline measurement (strict & long): "
    f"{baseline_long_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in AI measurement during the pilot: "
    f"{ai_pilot_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in AI measurement during the pilot (strict): "
    f"{ai_pilot_strict_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in AI measurement during the pilot (strict & short): "
    f"{ai_pilot_short_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in AI measurement during the pilot (strict & long): "
    f"{ai_pilot_long_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in AI measurement during the first half of the pilot: "
    f"{ai_first_half_df['PatientID'].nunique()}"
)

print(
    f"Number of patients in AI measurement during the first half of the pilot (strict):"
    f" {ai_first_half_strict_df['PatientID'].nunique()}"
)

print(
    f"Number of patients in AI measurement during the second half of the pilot: "
    f"{ai_second_half_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in AI measurement during the second half of the pilot (strict)"
    f": {ai_second_half_strict_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in AI measurement post pilot (strict): "
    f"{ai_post_strict_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in AI measurement during the extended pilot: "
    f"{ai_extended_df['PatientID'].nunique()}"
)

print(
    f"Number of patients in baseline measurement (strict & no short): "
    f"{baseline_strict_no_short_df['PatientID'].nunique()}"
)
print(
    f"Number of patients in AI measurement during the pilot (no short): "
    f"{ai_pilot_no_short_df['PatientID'].nunique()}"
)

## Analyse difference between measurements

In [None]:
baseline_sum = baseline_df.groupby("PatientID")["Schrijven_minuten"].sum()
baseline_strict_sum = baseline_strict_df.groupby("PatientID")["Schrijven_minuten"].sum()
baseline_short_sum = baseline_short_df.groupby("PatientID")["Schrijven_minuten"].sum()
baseline_long_sum = baseline_long_df.groupby("PatientID")["Schrijven_minuten"].sum()
ai_pilot_sum = ai_pilot_df.groupby("PatientID")["Schrijven_minuten"].sum()
ai_pilot_strict_sum = ai_pilot_strict_df.groupby("PatientID")["Schrijven_minuten"].sum()
ai_pilot_short_sum = ai_pilot_short_df.groupby("PatientID")["Schrijven_minuten"].sum()
ai_pilot_long_sum = ai_pilot_long_df.groupby("PatientID")["Schrijven_minuten"].sum()
ai_first_half_sum = ai_first_half_df.groupby("PatientID")["Schrijven_minuten"].sum()
ai_first_half_strict_sum = ai_first_half_strict_df.groupby("PatientID")[
    "Schrijven_minuten"
].sum()
ai_second_half_sum = ai_second_half_df.groupby("PatientID")["Schrijven_minuten"].sum()
ai_second_half_strict_sum = ai_second_half_strict_df.groupby("PatientID")[
    "Schrijven_minuten"
].sum()
ai_post_strict_sum = ai_post_strict_df.groupby("PatientID")["Schrijven_minuten"].sum()
ai_extended_sum = ai_extended_df.groupby("PatientID")["Schrijven_minuten"].sum()

baseline_strict_no_short_sum = baseline_strict_no_short_df.groupby("PatientID")[
    "Schrijven_minuten"
].sum()
ai_pilot_no_short_sum = ai_pilot_no_short_df.groupby("PatientID")[
    "Schrijven_minuten"
].sum()

print(f"Average time baseline: {baseline_sum.mean()} ({baseline_sum.std()})")
print(f"Average time during pilot: {ai_pilot_sum.mean()} ({ai_pilot_sum.std()})")
print(
    f"Average time during the first half of the pilot: {ai_first_half_sum.mean()} "
    f"({ai_first_half_sum.std()})"
)
print(
    f"Average time during the second half of the pilot: {ai_second_half_sum.mean()} "
    f"({ai_second_half_sum.std()})"
)
print(
    f"Average time baseline (strict): {baseline_strict_sum.mean()} "
    f"({baseline_strict_sum.std()})"
)
print(
    f"Average time baseline (strict & short): {baseline_short_sum.mean()} "
    f"({baseline_short_sum.std()})"
)
print(
    f"Average time baseline (strict & long): {baseline_long_sum.mean()} "
    f"({baseline_long_sum.std()})"
)
print(
    f"Average time during pilot (strict): {ai_pilot_strict_sum.mean()} "
    f"({ai_pilot_strict_sum.std()})"
)
print(
    f"Average time during pilot (strict & short): "
    f"{ai_pilot_short_sum.mean()} ({ai_pilot_short_sum.std()})"
)
print(
    f"Average time during pilot (strict & long): "
    f"{ai_pilot_long_sum.mean()} ({ai_pilot_long_sum.std()})"
)
print(
    f"Average time during the first half of the pilot (strict): "
    f"{ai_first_half_strict_sum.mean()} ({ai_first_half_strict_sum.std()})"
)
print(
    f"Average time during the second half of the pilot (strict): "
    f"{ai_second_half_strict_sum.mean()} ({ai_second_half_strict_sum.std()})"
)
print(
    f"Average time post pilot (strict): {ai_post_strict_sum.mean()} "
    f"({ai_post_strict_sum.std()})"
)
print(
    f"Average time during the extended pilot: {ai_extended_sum.mean()} "
    f"({ai_extended_sum.std()})"
)

print(
    f"Average time baseline (strict & no short): "
    f"{baseline_strict_no_short_sum.mean()} ({baseline_strict_no_short_sum.std()})"
)
print(
    f"Average time during pilot (no short): {ai_pilot_no_short_sum.mean()} "
    f"({ai_pilot_no_short_sum.std()})"
)


In [None]:
# Check differences between departments

baseline_average_per_department = baseline_strict_no_short_df.groupby("Afdeling")[
    "Schrijven_minuten"
].mean()
ai_pilot_average_per_department = ai_pilot_no_short_df.groupby("Afdeling")[
    "Schrijven_minuten"
].mean()

print("Average time per department in baseline:")
print(baseline_average_per_department)
print("Average time per department in AI pilot:")
print(ai_pilot_average_per_department)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, sharex=True, figsize=(10, 10))
baseline_sum.hist(bins=100, ax=ax1)
ai_pilot_sum.hist(bins=100, ax=ax2)

ax1.set_ylabel("Frequency")
ax1.set_title("Baseline Distribution")
ax2.set_xlabel("Totaal aantal minuten geschreven per patient")
ax2.set_ylabel("Frequency")
ax2.set_title("AI Pilot Distribution")

fig.show()

In [None]:
baseline_sum.plot.density(color="red")
ai_pilot_sum.plot.density(color="blue")
plt.show()

In [None]:
def get_statistical_significance(variable_1, variable_2):
    # Perform the Mann-Whitney U test
    stat, p = mannwhitneyu(variable_1, variable_2, alternative="two-sided")

    # Output results
    print("Mann-Whitney U statistic:", stat)
    print("p-value:", p)

    # Interpret the results
    if p < 0.05:
        print("There is a statistically significant difference.")
    else:
        print("No statistically significant difference.")

In [None]:
print("Baseline vs AI Pilot")
get_statistical_significance(baseline_sum, ai_pilot_sum)

print("First Half Pilot vs Second Half Pilot")
get_statistical_significance(ai_first_half_sum, ai_second_half_sum)

print("First Half Pilot vs Second Half Pilot (strict)")
get_statistical_significance(ai_first_half_strict_sum, ai_second_half_strict_sum)

print("Second Half Pilot vs Post Pilot (strict)")
get_statistical_significance(ai_second_half_strict_sum, ai_post_strict_sum)


print("Baseline vs second half AI pilot")
get_statistical_significance(baseline_sum, ai_second_half_sum)


print("Baseline vs second half AI pilot (strict)")
get_statistical_significance(baseline_strict_sum, ai_second_half_strict_sum)

print("Baseline short vs AI Pilot short (strict)")
get_statistical_significance(baseline_short_sum, ai_pilot_short_sum)

print("Baseline long vs AI Pilot long (strict)")
get_statistical_significance(baseline_long_sum, ai_pilot_long_sum)


In [None]:
# Calculate statistical differences per department between baseline and AI pilot
for department in baseline_average_per_department.index:
    print(f"Department: {department}")
    baseline_department = (
        baseline_strict_no_short_df.loc[
            baseline_strict_no_short_df["Afdeling"] == department
        ]
        .groupby("PatientID")["Schrijven_minuten"]
        .sum()
    )
    ai_pilot_department = (
        ai_pilot_no_short_df.loc[ai_pilot_no_short_df["Afdeling"] == department]
        .groupby("PatientID")["Schrijven_minuten"]
        .sum()
    )
    get_statistical_significance(baseline_department, ai_pilot_department)
    print("\n")