In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from scipy.stats import norm, jarque_bera, kstest, gamma, expon

# Parametric: Bootstrapping

In [2]:
df = pd.read_csv("ScanRecords.csv")
df['Date'] = pd.to_datetime(df['Date'])
print("Shape of df: ", df.shape)
df.head()

Shape of df:  (618, 4)


Unnamed: 0,Date,Time,Duration,PatientType
0,2023-08-01,8.23,0.949176,Type 2
1,2023-08-01,8.49,0.479593,Type 1
2,2023-08-01,9.12,0.496112,Type 2
3,2023-08-01,10.26,0.691947,Type 2
4,2023-08-01,10.64,0.345412,Type 1


In [3]:
df1 = df[df["PatientType"] == "Type 1"]
df2 = df[df["PatientType"] == "Type 2"]

In [6]:
print(df.groupby('PatientType')['Duration'].describe())  

             count      mean       std       min       25%       50%  \
PatientType                                                            
Type 1       379.0  0.432661  0.097774  0.093731  0.368123  0.435888   
Type 2       239.0  0.669339  0.187286  0.220708  0.523962  0.646603   

                  75%       max  
PatientType                      
Type 1       0.496907  0.708922  
Type 2       0.796523  1.146789  


## Type 1

In [4]:
df1.head()

Unnamed: 0,Date,Time,Duration,PatientType
1,2023-08-01,8.49,0.479593,Type 1
4,2023-08-01,10.64,0.345412,Type 1
5,2023-08-01,11.07,0.42227,Type 1
6,2023-08-01,11.13,0.356129,Type 1
8,2023-08-01,11.56,0.423303,Type 1


In [12]:
def bootstrap_Type1(df1, B1, alpha):

    n1 = len(df1)
    X_bar = np.mean(df1['Duration'])
    St_Dev = np.std(df1['Duration'])

    # Initialize empty arrays for bootstrap quantities
    X_star_bar = np.empty(B1)
    X_star_sd = np.empty(B1)
    Q_star = np.empty(B1)

    # bootstrapping
    for b in range(B1):
        J1 = np.random.choice(np.arange(n1), size=n1, replace=True)
        X_star = df1['Duration'].iloc[J1] # Construct the bootstrap sample
        X_star_bar[b] = np.mean(X_star) # Calculate the bootstrap sample mean
        X_star_sd[b] = np.std(X_star) # Calculate the bootstrap sample variance
        Q_star[b] = np.sqrt(n1) * (X_star_bar[b] - X_bar) / X_star_sd[b] # Calculate the bootstrap quantity

    # Critical values
    cv_LB = np.quantile(Q_star, alpha / 2)
    cv_UB = np.quantile(Q_star, 1 - alpha / 2)

    # Confidence interval
    CI_LB = X_bar - cv_LB * St_Dev / np.sqrt(n1)
    CI_UP = X_bar - cv_UB * St_Dev / np.sqrt(n1)

    results = {
        "Bootstrap mean of Duration": np.mean(X_star_bar),
        "95% CI for Mean Duration": (CI_LB, CI_UP),
        "Bootstrap std of Duration": np.mean(X_star_sd) ,
        "95% CI for std Duration": (cv_LB, cv_UB)
    }
    
    return results

results = bootstrap_Type1(df1, B1=499, alpha=0.05)
for key, value in results.items():
    print(f"{key}: {value}")

Bootstrap mean of Duration: 0.43270909528508605
95% CI for Mean Duration: (np.float64(0.44245287539648026), np.float64(0.42357441169877136))
Bootstrap std of Duration: 0.09784846899259866
95% CI for std Duration: (np.float64(-1.9522872136279104), np.float64(1.811590446231109))


# Type 2