In [10]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [11]:
df = pd.read_csv("ScanRecords.csv")
df['Date'] = pd.to_datetime(df['Date'])
df1 = df[df["PatientType"] == "Type 1"]
df2 = df[df["PatientType"] == "Type 2"]

# Type 1

## 1.1. Duration

In [3]:
# MEAN AND VARIANCE OF DURATION
np.random.seed(515)
B1 = 100000
alpha = 0.05

n = len(df1)

X_bar = np.mean(df1['Duration'])
St_Dev = np.std(df1['Duration'], ddof=1)

# empty arrays for bootstrapped values
X_star_bar = np.empty(B1)
X_star_sd = np.empty(B1)
Q_star = np.empty(B1)  # Statistic for variance
T_star = np.empty(B1)  # Statistic for mean

# Bootstrapping
for b in range(B1):
    J = np.random.choice(np.arange(n), size=n, replace=True)  # Resampling with replacement
    X_star = df1['Duration'].iloc[J]  # Construct the bootstrap sample
    X_star_bar[b] = np.mean(X_star)  # Calculate the bootstrap sample mean
    X_star_sd[b] = np.std(X_star, ddof=1)  # Calculate the bootstrap sample standard deviation
    T_star[b] = np.sqrt(n) * (X_star_bar[b] - X_bar) / X_star_sd[b]  
    Q_star[b] = (n - 1) * (X_star_sd[b]**2) / (St_Dev**2)  

# Critical values for T-statistic and Q-statistic
cv_LB_T = np.quantile(T_star, alpha / 2)
cv_UB_T = np.quantile(T_star, 1 - alpha / 2)

cv_LB_Q = np.quantile(Q_star, alpha / 2)
cv_UB_Q = np.quantile(Q_star, 1 - alpha / 2)

# Confidence interval for variance
CI_LB_Q = (n - 1) * St_Dev**2 / cv_UB_Q
CI_UB_Q = (n - 1) * St_Dev**2 / cv_LB_Q

# Confidence interval for mean
CI_LB_T = X_bar - cv_UB_T * St_Dev / np.sqrt(n)
CI_UP_T = X_bar - cv_LB_T * St_Dev / np.sqrt(n)

results = {
    "Bootstrap mean of Duration": np.mean(X_star_bar) * 60,
    "95% CI for Mean Duration": (CI_LB_T * 60, CI_UP_T * 60),
    "Bootstrap std of Variance": ((np.mean(X_star_sd)) ** 2 )* 60,
    "95% CI for std Variance": (CI_LB_Q * 60, CI_UB_Q * 60),
}

for key, value in results.items():
    print(f"{key}: {value}")


Bootstrap mean of Duration: 25.959161825682816
95% CI for Mean Duration: (np.float64(25.362994321596993), np.float64(26.550661222688273))
Bootstrap std of Variance: 0.5712553802951649
95% CI for std Variance: (np.float64(0.4971193963500315), np.float64(0.6727786369350718))


In [13]:
quantiles = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]

actual_quantiles = np.quantile(df1['Duration'], quantiles)

# bootstrap samples and their quantiles
bootstrap_data = np.random.choice(df1['Duration'], size=(B1, n), replace=True)
bootstrap_quantiles = np.quantile(bootstrap_data, quantiles, axis=1)

differences = bootstrap_quantiles - actual_quantiles[:, np.newaxis]

# CI for each quantile
confidence_intervals = []
for i, quantile in enumerate(quantiles):
    differences_quantile = differences[i]
    cv_LB = np.quantile(differences_quantile, alpha / 2)
    cv_UB = np.quantile(differences_quantile, 1 - alpha / 2)
    ci_LB = actual_quantiles[i] - cv_UB
    ci_UB = actual_quantiles[i] - cv_LB
    confidence_intervals.append((ci_LB*60, ci_UB*60))


for i, quantile in enumerate(quantiles):
    lower, upper = confidence_intervals[i]
    print(f"{quantile:.1%}: CI = ({lower:.2f}, {upper:.2f}) min")


50.0%: CI = (25.35, 27.09) min
55.0%: CI = (26.25, 27.76) min
60.0%: CI = (26.88, 28.37) min
65.0%: CI = (27.85, 29.24) min
70.0%: CI = (28.37, 29.65) min
75.0%: CI = (28.94, 30.49) min
80.0%: CI = (30.23, 31.66) min
85.0%: CI = (30.85, 32.26) min
90.0%: CI = (31.07, 33.16) min
95.0%: CI = (32.42, 35.84) min


## 1.2. Interarrival time

In [4]:
numRows = len(df1)
interArrivals1 = []

for i in range(numRows - 1):
    if df1.iloc[i]["Date"] == df1.iloc[i + 1]["Date"]:
        # interaarival time of arrivals on the same day
        interval = df1.iloc[i + 1]["Time"] - df1.iloc[i]["Time"]
    else:
        interval = (17 - df1.iloc[i]["Time"]) + (df1.iloc[i + 1]["Time"] - 8)
    interArrivals1.append(interval)

interArrivals1 = np.array(interArrivals1)

print("Mean Inter-arrival time: ", np.mean(interArrivals1))
print( np.mean(interArrivals1)*60, "min")

Mean Inter-arrival time:  0.5453439153439154
32.72063492063492 min


In [5]:
# INTERARRIVAL TIME

np.random.seed(515)
B1 = 100000
alpha = 0.05

n = len(interArrivals1)

X_bar = np.mean(interArrivals1)
St_Dev = np.std(interArrivals1, ddof=1)

# Initialize empty arrays for bootstrapped values
X_star_bar = np.empty(B1)
X_star_sd = np.empty(B1)
Q_star = np.empty(B1)  # Statistic for variance
T_star = np.empty(B1)  # Statistic for mean

# Bootstrapping
for b in range(B1):
    J = np.random.choice(np.arange(n), size=n, replace=True)  # Resampling with replacement
    X_star = interArrivals1[J]  # Construct the bootstrap sample
    X_star_bar[b] = np.mean(X_star)  # Calculate the bootstrap sample mean
    X_star_sd[b] = np.std(X_star, ddof=1)  # Calculate the bootstrap sample standard deviation
    T_star[b] = np.sqrt(n) * (X_star_bar[b] - X_bar) / X_star_sd[b]  
    Q_star[b] = (n - 1) * (X_star_sd[b]**2) / (St_Dev**2)  

# Critical values for T-statistic and Q-statistic
cv_LB_T = np.quantile(T_star, alpha / 2)
cv_UB_T = np.quantile(T_star, 1 - alpha / 2)

cv_LB_Q = np.quantile(Q_star, alpha / 2)
cv_UB_Q = np.quantile(Q_star, 1 - alpha / 2)

# Confidence interval for variance
CI_LB_Q = (n - 1) * St_Dev**2 / cv_UB_Q
CI_UB_Q = (n - 1) * St_Dev**2 / cv_LB_Q

# Confidence interval for mean
CI_LB_T = X_bar - cv_UB_T * St_Dev / np.sqrt(n)
CI_UP_T = X_bar - cv_LB_T * St_Dev / np.sqrt(n)

results = {
    "Bootstrap mean interarrival time": np.mean(X_star_bar) * 60,
    "95% CI for Mean interarrival time": (CI_LB_T * 60, CI_UP_T * 60),
    "Bootstrap Variance of interarrival time": ((np.mean(X_star_sd)) ** 2 )* 60,
    "95% CI for Variance of interarrival time": (CI_LB_Q * 60, CI_UB_Q * 60),
}

for key, value in results.items():
    print(f"{key}: {value}")

Bootstrap mean interarrival time: 32.73159025396826
95% CI for Mean interarrival time: (np.float64(29.413812475577068), np.float64(36.522848803529804))
Bootstrap Variance of interarrival time: 20.304647903460648
95% CI for Variance of interarrival time: (np.float64(16.334074258618422), np.float64(26.708241683100496))


# Type 2

## 2.1. Duration

In [3]:
# MEAN AND VARIANCE OF DURATION

np.random.seed(515)
B1 = 100000
alpha = 0.05

n = len(df2)

X_bar = np.mean(df2['Duration'])
St_Dev = np.std(df2['Duration'], ddof=1)

# Initialize empty arrays for bootstrap values
X_star_bar = np.empty(B1)
X_star_sd = np.empty(B1)
Q_star = np.empty(B1)  # Statistic for variance
T_star = np.empty(B1)  # Statistic for mean

# Bootstrapping
for b in range(B1):
    J = np.random.choice(np.arange(n), size=n, replace=True)  # Resampling with replacement
    X_star = df2['Duration'].iloc[J]  # Construct the bootstrap sample
    X_star_bar[b] = np.mean(X_star)  # Calculate the bootstrap sample mean
    X_star_sd[b] = np.std(X_star, ddof=1)  # Calculate the bootstrap sample standard deviation
    T_star[b] = np.sqrt(n) * (X_star_bar[b] - X_bar) / X_star_sd[b]  
    Q_star[b] = (n - 1) * (X_star_sd[b]**2) / (St_Dev**2)  

# Critical values for T-statistic and Q-statistic
cv_LB_T = np.quantile(T_star, alpha / 2)
cv_UB_T = np.quantile(T_star, 1 - alpha / 2)

cv_LB_Q = np.quantile(Q_star, alpha / 2)
cv_UB_Q = np.quantile(Q_star, 1 - alpha / 2)

# Confidence interval for variance
CI_LB_Q = (n - 1) * St_Dev**2 / cv_UB_Q
CI_UB_Q = (n - 1) * St_Dev**2 / cv_LB_Q

# Confidence interval for mean
CI_LB_T = X_bar - cv_UB_T * St_Dev / np.sqrt(n)
CI_UP_T = X_bar - cv_LB_T * St_Dev / np.sqrt(n)

results = {
    "Bootstrap mean of Duration": np.mean(X_star_bar) * 60,
    "95% CI for Mean Duration": (CI_LB_T * 60, CI_UP_T * 60),
    "Bootstrap std of Variance": ((np.mean(X_star_sd)) ** 2 )* 60,
    "95% CI for std Variance": (CI_LB_Q * 60, CI_UB_Q * 60),
}

for key, value in results.items():
    print(f"{key}: {value}")

Bootstrap mean of Duration: 40.16281591932623
95% CI for Mean Duration: (np.float64(38.750817197086555), np.float64(41.62005085837455))
Bootstrap std of Variance: 2.09244626856855
95% CI for std Variance: (np.float64(1.8177422263788867), np.float64(2.4985252141756975))


In [14]:
quantiles = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]

actual_quantiles = np.quantile(df2['Duration'], quantiles)

# bootstrap samples and their quantiles
bootstrap_data = np.random.choice(df2['Duration'], size=(B1, n), replace=True)
bootstrap_quantiles = np.quantile(bootstrap_data, quantiles, axis=1)

differences = bootstrap_quantiles - actual_quantiles[:, np.newaxis]

# CI for each quantile
confidence_intervals = []
for i, quantile in enumerate(quantiles):
    differences_quantile = differences[i]
    cv_LB = np.quantile(differences_quantile, alpha / 2)
    cv_UB = np.quantile(differences_quantile, 1 - alpha / 2)
    ci_LB = actual_quantiles[i] - cv_UB
    ci_UB = actual_quantiles[i] - cv_LB
    confidence_intervals.append((ci_LB*60, ci_UB*60))


for i, quantile in enumerate(quantiles):
    lower, upper = confidence_intervals[i]
    print(f"{quantile:.1%}: CI = ({lower:.2f}, {upper:.2f}) min")


50.0%: CI = (36.49, 39.78) min
55.0%: CI = (40.04, 43.35) min
60.0%: CI = (40.87, 43.10) min
65.0%: CI = (41.06, 44.58) min
70.0%: CI = (43.62, 47.97) min
75.0%: CI = (46.34, 49.88) min
80.0%: CI = (47.55, 51.60) min
85.0%: CI = (51.08, 55.27) min
90.0%: CI = (53.60, 58.47) min
95.0%: CI = (58.74, 64.00) min


## 2.2. Interarrival

In [9]:
numRows = len(df2)
interArrivals2 = []

for i in range(numRows - 1):
    if df2.iloc[i]["Date"] == df2.iloc[i + 1]["Date"]:
        # interaarival time of arrivals on the same day
        interval = df2.iloc[i + 1]["Time"] - df2.iloc[i]["Time"]
    else:
        interval = (17 - df2.iloc[i]["Time"]) + (df2.iloc[i + 1]["Time"] - 8)
    interArrivals2.append(interval)

interArrivals2 = np.array(interArrivals2)

print("Mean Inter-arrival time: ", np.mean(interArrivals2))
print( np.mean(interArrivals2)*60, "min")

Mean Inter-arrival time:  0.8666386554621849
51.998319327731096 min


In [10]:
# INTERARRIVAL TIME:

np.random.seed(515)
B1 = 100000
alpha = 0.05

n = len(interArrivals2)

X_bar = np.mean(interArrivals2)
St_Dev = np.std(interArrivals2, ddof=1)

# Initialize empty arrays for bootstrap values
X_star_bar = np.empty(B1)
X_star_sd = np.empty(B1)
Q_star = np.empty(B1)  # Statistic for variance
T_star = np.empty(B1)  # Statistic for mean

# Bootstrapping
for b in range(B1):
    J = np.random.choice(np.arange(n), size=n, replace=True)  # Resampling with replacement
    X_star = interArrivals2[J]  # Construct the bootstrap sample
    X_star_bar[b] = np.mean(X_star)  # Calculate the bootstrap sample mean
    X_star_sd[b] = np.std(X_star, ddof=1)  # Calculate the bootstrap sample standard deviation
    T_star[b] = np.sqrt(n) * (X_star_bar[b] - X_bar) / X_star_sd[b]  
    Q_star[b] = (n - 1) * (X_star_sd[b]**2) / (St_Dev**2)  

# Critical values for T-statistic and Q-statistic
cv_LB_T = np.quantile(T_star, alpha / 2)
cv_UB_T = np.quantile(T_star, 1 - alpha / 2)

cv_LB_Q = np.quantile(Q_star, alpha / 2)
cv_UB_Q = np.quantile(Q_star, 1 - alpha / 2)

# Confidence interval for variance
CI_LB_Q = (n - 1) * St_Dev**2 / cv_UB_Q
CI_UB_Q = (n - 1) * St_Dev**2 / cv_LB_Q

# Confidence interval for mean
CI_LB_T = X_bar - cv_UB_T * St_Dev / np.sqrt(n)
CI_UP_T = X_bar - cv_LB_T * St_Dev / np.sqrt(n)

results = {
    "Bootstrap mean interarrival time": np.mean(X_star_bar) * 60,
    "95% CI for Mean interarrival time": (CI_LB_T * 60, CI_UP_T * 60),
    "Bootstrap Variance of interarrival time": ((np.mean(X_star_sd)) ** 2 )* 60,
    "95% CI for Variance of interarrival time": (CI_LB_Q * 60, CI_UB_Q * 60),
}

for key, value in results.items():
    print(f"{key}: {value}")


Bootstrap mean interarrival time: 51.99819768907564
95% CI for Mean interarrival time: (np.float64(49.63299860602454), np.float64(54.39909823141045))
Bootstrap Variance of interarrival time: 5.759607001575478
95% CI for Variance of interarrival time: (np.float64(4.890567632535156), np.float64(7.052303462032274))
