In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from scipy.stats import norm, jarque_bera, kstest, gamma, expon

# 1. Parametric: Bootstrapping

In [20]:
df = pd.read_csv("ScanRecords.csv")
df['Date'] = pd.to_datetime(df['Date'])
print("Shape of df: ", df.shape)
df.head()

Shape of df:  (618, 4)


Unnamed: 0,Date,Time,Duration,PatientType
0,2023-08-01,8.23,0.949176,Type 2
1,2023-08-01,8.49,0.479593,Type 1
2,2023-08-01,9.12,0.496112,Type 2
3,2023-08-01,10.26,0.691947,Type 2
4,2023-08-01,10.64,0.345412,Type 1


In [21]:
df1 = df[df["PatientType"] == "Type 1"]
df2 = df[df["PatientType"] == "Type 2"]

In [22]:
print(df.groupby('PatientType')['Duration'].describe())  

             count      mean       std       min       25%       50%  \
PatientType                                                            
Type 1       379.0  0.432661  0.097774  0.093731  0.368123  0.435888   
Type 2       239.0  0.669339  0.187286  0.220708  0.523962  0.646603   

                  75%       max  
PatientType                      
Type 1       0.496907  0.708922  
Type 2       0.796523  1.146789  


## Type 1

## 1.1. Mean and std

In [23]:
df1.head()

Unnamed: 0,Date,Time,Duration,PatientType
1,2023-08-01,8.49,0.479593,Type 1
4,2023-08-01,10.64,0.345412,Type 1
5,2023-08-01,11.07,0.42227,Type 1
6,2023-08-01,11.13,0.356129,Type 1
8,2023-08-01,11.56,0.423303,Type 1


In [25]:
def bootstrap_Type1(df1, B1, alpha):

    n1 = len(df1)
    X_bar = np.mean(df1['Duration'])
    St_Dev = np.std(df1['Duration'])

    # Initialize empty arrays for bootstrap quantities
    X_star_bar = np.empty(B1)
    X_star_sd = np.empty(B1)
    Q_star = np.empty(B1)

    # bootstrapping
    for b in range(B1):
        J1 = np.random.choice(np.arange(n1), size=n1, replace=True) # Resampling with replacement
        X_star = df1['Duration'].iloc[J1] # Construct the bootstrap sample
        X_star_bar[b] = np.mean(X_star) # Calculate the bootstrap sample mean
        X_star_sd[b] = np.std(X_star) # Calculate the bootstrap sample variance
        Q_star[b] = np.sqrt(n1) * (X_star_bar[b] - X_bar) / X_star_sd[b] # Calculate the bootstrap quantity

    # Critical values
    cv_LB = np.quantile(Q_star, alpha / 2)
    cv_UB = np.quantile(Q_star, 1 - alpha / 2)

    # Confidence interval
    CI_LB = X_bar - cv_LB * St_Dev / np.sqrt(n1)
    CI_UP = X_bar - cv_UB * St_Dev / np.sqrt(n1)

    results = {
        "Bootstrap mean of Duration": np.mean(X_star_bar),
        "95% CI for Mean Duration": (CI_LB, CI_UP),
        "Bootstrap std of Duration": np.mean(X_star_sd) ,
        "95% CI for std Duration": (cv_LB, cv_UB)
    }
    
    return results

results = bootstrap_Type1(df1, B1=499, alpha=0.05)
for key, value in results.items():
    print(f"{key}: {value}")

Bootstrap mean of Duration: 0.43252118884419655
95% CI for Mean Duration: (np.float64(0.4420646488477063), np.float64(0.42306011545002176))
Bootstrap std of Duration: 0.09741698230455403
95% CI for std Duration: (np.float64(-1.8748848753183738), np.float64(1.9141278238596104))


In [None]:
## TODO: ADD QUANTILES W CI

## 1.2. Mean Interarrival time

In [30]:
numRows = len(df1)
interArrivals1 = []

for i in range(numRows - 1):
    if df1.iloc[i]["Date"] == df1.iloc[i + 1]["Date"]:
        # interaarival time of arrivals on the same day
        interval = df1.iloc[i + 1]["Time"] - df1.iloc[i]["Time"]
    else:
        interval = (17 - df1.iloc[i]["Time"]) + (df1.iloc[i + 1]["Time"] - 8)
    interArrivals1.append(interval)

interArrivals1 = np.array(interArrivals1)

print("Mean Inter-arrival time: ", np.mean(interArrivals1))

Mean Inter-arrival time:  0.5453439153439154


In [None]:
def bootstrap_Type1(interArrivals, B1, alpha):

    n1 = len(interArrivals)
    X_bar = np.mean(interArrivals)
    St_Dev = np.std(interArrivals)

    # Initialize empty arrays for bootstrap quantities
    X_star_bar = np.empty(B1)
    X_star_sd = np.empty(B1)
    Q_star = np.empty(B1)

    # bootstrapping
    for b in range(B1):
        J1 = np.random.choice(np.arange(n1), size=n1, replace=True) # Resampling with replacement
        X_star = interArrivals[J1] # Construct the bootstrap sample
        X_star_bar[b] = np.mean(X_star) # Calculate the bootstrap sample mean
        X_star_sd[b] = np.std(X_star) # Calculate the bootstrap sample variance
        Q_star[b] = np.sqrt(n1) * (X_star_bar[b] - X_bar) / X_star_sd[b] # Calculate the bootstrap quantity

    # Critical values
    cv_LB = np.quantile(Q_star, alpha / 2)
    cv_UB = np.quantile(Q_star, 1 - alpha / 2)

    # Confidence interval
    CI_LB = X_bar - cv_LB * St_Dev / np.sqrt(n1)
    CI_UP = X_bar - cv_UB * St_Dev / np.sqrt(n1)

    results = {
        "Bootstrap mean of interArrivals": np.mean(X_star_bar),
        "95% CI for Mean interArrivals": (CI_LB, CI_UP),
        "Bootstrap std of interArrivals": np.mean(X_star_sd),
        "95% CI for std interArrivals": (cv_LB, cv_UB)
    }
    
    return results

results = bootstrap_Type1(interArrivals1, B1=499, alpha=0.05)
for key, value in results.items():
    print(f"{key}: {value}")

Bootstrap mean of interArrivals: 0.5446245400854618
95% CI for Mean interArrivals: (np.float64(0.6203758653854785), np.float64(0.4895444229586992))
Bootstrap std of interArrivals: 0.5795309007316473
95% CI for std interArrivals: (np.float64(-2.50422752248658), np.float64(1.8623349718944266))


# Type 2