<h1> Patient Arrival Rate </h1>

This notebook was used to analysis the patient arrivals in the region and test for seasonality.

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import truncnorm
from scipy.stats import kruskal
import scipy.stats as stats

import matplotlib.pyplot as plt
#seaborn settings
sns.set_theme(style="whitegrid",palette="colorblind",font_scale=1.5)
sns.set_context("paper")
sns.despine(left=True,right=True)
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv("example_dataset.csv")

<h2>Data preparation </h2>

In [None]:
df['season'] = pd.to_datetime(df['start_date']).dt.month.apply(lambda x: 'winter' if x in [12,1,2] else 'spring' if x in [3,4,5] else 'summer' if x in [6,7,8] else 'autumn').astype('string')
df['month'] = pd.to_datetime(df['start_date']).dt.month
df_filtered = df[df['stay_number'] == 1]



In [None]:
#show all ward levels
df_filtered['ward_level'].value_counts()

In [None]:
#unique regions
df_filtered['region'].value_counts()    

<h2>Patient admissions </h2>

In [None]:
#df_inside by year
df_filtered['year'] = pd.to_datetime(df_filtered['start_date']).dt.year
df_filtered['year'].value_counts()

In [None]:
daily_admissions = pd.to_datetime(df_filtered['start_date']).dt.date.value_counts().sort_index().reset_index()
daily_admissions.columns = ['date', 'admissions']

In [None]:
#min date max date date range
min_date = pd.to_datetime(df_filtered['start_date']).min()
max_date = pd.to_datetime(df_filtered['start_date']).max()
date_range = pd.date_range(start=min_date, end=max_date)

#missing dates
missing_dates = date_range[~date_range.isin(daily_admissions['date'])]
missing_dates

In [None]:
#plot daily admissions
fig, ax = plt.subplots(figsize=(10, 6))
sns.lineplot(data=daily_admissions, x='date', y='admissions', ax=ax)
plt.title('Number of admissions per day')
plt.xlabel('Date')
plt.ylabel('Number of admissions')
plt.show()

In [None]:
#plot 3 day moving average

daily_admissions['7_day_avg'] = daily_admissions['admissions'].rolling(7).mean()
fig, ax = plt.subplots(figsize=(10, 6))
sns.lineplot(data=daily_admissions, x='date', y='7_day_avg', ax=ax)
plt.title('Weekly moving average of admissions per day')
plt.xlabel('Date')


plt.ylabel('Number of admissions')
plt.show()


In [None]:
#first date
df_filtered['start_date'].min()

In [None]:
#set date to datetime in daily_admissions
daily_admissions['date'] = pd.to_datetime(daily_admissions['date'])


In [None]:
daily_admissions_filtered = daily_admissions[pd.to_datetime(daily_admissions['date']) <= pd.to_datetime(max_date)]

In [None]:
missing_dates = date_range[~date_range.isin(daily_admissions_filtered['date'])]
missing_dates
#fill missing with 0
daily_admissions_filtered = daily_admissions_filtered.set_index('date').reindex(date_range)

In [None]:
#fill admissions with 0 for missing dates
daily_admissions_filtered['admissions'] = daily_admissions_filtered['admissions'].fillna(0)

In [None]:
daily_admissions_filtered = daily_admissions[pd.to_datetime(daily_admissions['date']) <= pd.to_datetime(max_date)]

In [None]:
#describe daily admissions
print(daily_admissions_filtered['admissions'].describe())

In [None]:
lower_bound = 9
upper_bound = 49

mu,std = np.mean(daily_admissions_filtered['admissions']), np.std(daily_admissions_filtered['admissions'])
a,b = (lower_bound - mu) / std, (upper_bound - mu) / std
fitted_params = truncnorm.fit(daily_admissions_filtered['admissions'], a, b, loc=mu, scale=std)
fitted_mu, fitted_std = fitted_params[2], fitted_params[3]
print(fitted_mu, fitted_std)
x = np.linspace(lower_bound, upper_bound, 100)
fitted_pdf = truncnorm.pdf(x, a, b, loc=fitted_mu, scale=fitted_std)
sns.histplot(daily_admissions_filtered['admissions'], bins=20, stat='density')
plt.plot(x, fitted_pdf, 'r')
plt.title('Daily admissions')
plt.xlabel('Number of admissions')
plt.ylabel('Density')
plt.legend(['Truncated normal fit', 'Admissions'])
plt.show()

In [None]:
from scipy.stats import probplot
plt.figure(figsize=(10,5),dpi=400)

res = probplot(daily_admissions_filtered['admissions'], dist="norm",  plot=plt)
plt.title('Q-Q plot normal distribution against observed daily admissions')
plt.show()

In [None]:

 
# Sample data (replace this with your actual data)
data = daily_admissions_filtered['admissions']
 

 
def normal_cdf(x, mu, sigma):
    return stats.norm.cdf(x, mu, sigma)



In [None]:
std = np.std(data)


In [None]:
lambda_,std = fitted_params[2],fitted_params[3]
# Perform the one-sample K-S test

ks_stat, p_value = stats.kstest(data, normal_cdf, args=(lambda_,std))
 
print(f'K-S Statistic: {ks_stat}')
print(f'P-Value: {p_value}')
 
# Interpretation based on the significance level
alpha = 0.05
if p_value > alpha:
    print("Fail to reject the null hypothesis. The data fits a Normal distribution.")
else:
    print("Reject the null hypothesis. The data does not fit a Normal distribution.")

In [None]:
#average admission count per season
daily_admissions_filtered['season'] = pd.to_datetime(daily_admissions_filtered['date']).dt.month.apply(lambda x: 'winter' if x in [12,1,2] else 'spring' if x in [3,4,5] else 'summer' if x in [6,7,8] else 'autumn').astype('string')
daily_admissions_filtered['month'] = pd.to_datetime(daily_admissions_filtered['date']).dt.month


In [None]:
#average admissions per month
fig, ax = plt.subplots(figsize=(10, 6),dpi=400)
sns.boxenplot(data=daily_admissions_filtered, x='month', y='admissions', ax=ax)
plt.title('Average number of daily admissions per month')
plt.xlabel('Month')
plt.ylabel('Average number of daily  admissions')
plt.show()


In [None]:
# Conduct Kruskal-Wallis H test
months = daily_admissions_filtered['month'].unique()
month_groups = [daily_admissions_filtered[daily_admissions_filtered['month'] == month]['admissions'] for month in months]
stat, p = kruskal(*month_groups)
print('Kruskal-Wallis H test statistic:', stat)
print('p-value:', p)
 
if p < 0.05:
    print('The differences between seasons are statistically significant.')
else:
    print('The differences between seasons are not statistically significant.')

In [None]:
#average admission count per season
fig, ax = plt.subplots(figsize=(10, 6),dpi=400)
sns.barplot(data=daily_admissions_filtered, x='season', y='admissions', ax=ax)
plt.title('Average number of admissions per season')
plt.xlabel('Season')
plt.ylabel('Average number of admissions')
plt.show()


In [None]:
#average admissions per season in table
print(daily_admissions_filtered.groupby('season')['admissions'].describe())

In [None]:

from scipy.stats import kruskal


# Boxplot of admissions by season
plt.figure(figsize=(12, 6),dpi=400)
sns.boxenplot(x='season', y='admissions', data=daily_admissions_filtered)
plt.title('Admissions by Season')
plt.xlabel('Season')
plt.ylabel('Admissions')
plt.grid(True)
plt.show()
 
# Conduct Kruskal-Wallis H test
seasons = daily_admissions_filtered['season'].unique()
season_groups = [daily_admissions_filtered[daily_admissions_filtered['season'] == season]['admissions'] for season in seasons]
stat, p = kruskal(*season_groups)
print('Kruskal-Wallis H test statistic:', stat)
print('p-value:', p)
 
if p < 0.05:
    print('The differences between seasons are statistically significant.')
else:
    print('The differences between seasons are not statistically significant.')