In [7]:
import scipy.stats as st 
import numpy as np 

In [10]:
#Compute confidence/credible intervals based on the four methods decsrcivbed for stimulated data sampled
#from a population that is Guassian distributed with mean (mu=10) and standard deviation = 2 
#for n=5, 10, 20, 40, 80, 160, 1000 at a 95% confidence level 


mu = 10 
sigma = 2 
alpha = 0.95 
num_bootstraps = 1000 

# Loop through the ns. Note that the different approaches converge on the same answer as n gets large 
for n in [5, 10, 20, 40, 80, 160, 1000]:

  # simulate some data 
  samples = np.random.normal(mu, sigma, n) 

  # save the mean 
  sample_mean = np.mean(samples)

  # show the mean, n 
  print(f'n = {n}, mean = {sample_mean:.2f}') 

  # Method 1: analytic solution assuming Gaussian 
  # We 
  # Get the z-score for the given confidence level (make it negative so we can subtract it to make the lower interval)
  z = -st.norm.ppf((1-alpha)/2) 

  #1a. Use the given sigma 
  sem = sigma/np.sqrt(n) 
  print(f'la: CI=[{sample_mean-sem*z:.2f}, {sample_mean+sem*z:.2f}]')

  #1b. Use the sample sigma 
  # Best if n is large (>30)
  #sem = np.std(samples)/np.sqrt(n) 
  #print(f'1b: CI=[{sample_mean-sem*z:.2f}, {sample_mean+sem*z:.2f}]')




n = 5, mean = 10.43
la: CI=[8.68, 12.18]
n = 10, mean = 10.67
la: CI=[9.43, 11.91]
n = 20, mean = 10.39
la: CI=[9.51, 11.27]
n = 40, mean = 9.95
la: CI=[9.33, 10.57]
n = 80, mean = 10.46
la: CI=[10.03, 10.90]
n = 160, mean = 10.07
la: CI=[9.76, 10.38]
n = 1000, mean = 9.97
la: CI=[9.85, 10.09]


In [14]:
 
 
mu = 10 
sigma = 2 
alpha = 0.95 
num_bootstraps = 1000 

# Loop through the ns. Note that the different approaches converge on the same answer as n gets large 
for n in [5, 10, 20, 40, 80, 160, 1000]:

  # simulate some data 
  samples = np.random.normal(mu, sigma, n) 

  # save the mean 
  sample_mean = np.mean(samples)

  # show the mean, n 
  print(f'n = {n}, mean = {sample_mean:.2f}') 
 
 #1b. Use the sample sigma 
   # Best if n is large (>30)
  sem = np.std(samples)/np.sqrt(n) 
  print(f'2: CI=[{sample_mean-sem*z:.2f}, {sample_mean+sem*z:.2f}]')

n = 5, mean = 9.80
2: CI=[7.90, 11.69]
n = 10, mean = 10.55
2: CI=[9.58, 11.53]
n = 20, mean = 9.86
2: CI=[8.98, 10.74]
n = 40, mean = 9.89
2: CI=[9.18, 10.61]
n = 80, mean = 9.96
2: CI=[9.49, 10.43]
n = 160, mean = 9.83
2: CI=[9.53, 10.13]
n = 1000, mean = 10.03
2: CI=[9.91, 10.16]


In [20]:
mu = 10 
sigma = 2 
alpha = 0.95 
num_bootstraps = 1000 

# Loop through the ns. Note that the different approaches converge on the same answer as n gets large 
for n in [5, 10, 20, 40, 80, 160, 1000]:

  # simulate some data 
  samples = np.random.normal(mu, sigma, n) 

  # save the mean 
  sample_mean = np.mean(samples)

  # show the mean, n 
  print(f'n = {n}, mean = {sample_mean:.2f}') 

  # Method 2: analytic solution assuming t-distribution 
  # Best is n is small (<30) ... note that as n increases, the t distribution approaches a Gaussian and methods 1 and 2 become more and more similar

  # Get the cutoff using the t distribution, which is said to have n-1 degrees of freedom 
  t = -st.t.ppf((1-alpha)/2,df=n-1)
  sem = np.std(samples)/np.sqrt(n); 
  print(f'3 : CI=[{sample_mean-sem*t:.2f}, {sample_mean+sem*t:.2f}]')

n = 5, mean = 9.47
3 : CI=[7.75, 11.19]
n = 10, mean = 9.30
3 : CI=[8.53, 10.06]
n = 20, mean = 10.20
3 : CI=[9.61, 10.79]
n = 40, mean = 10.14
3 : CI=[9.62, 10.66]
n = 80, mean = 9.85
3 : CI=[9.38, 10.32]
n = 160, mean = 9.93
3 : CI=[9.66, 10.21]
n = 1000, mean = 9.98
3 : CI=[9.85, 10.11]


In [19]:
mu = 10 
sigma = 2 
alpha = 0.95 
num_bootstraps = 1000 

# Loop through the ns. Note that the different approaches converge on the same answer as n gets large 
for n in [5, 10, 20, 40, 80, 160, 1000]:

  # simulate some data 
  samples = np.random.normal(mu, sigma, n) 

  # save the mean 
  sample_mean = np.mean(samples)

  # show the mean, n 
  print(f'n = {n}, mean = {sample_mean:.2f}') 

  # Method 3: bootstrap 
  # Resample the data with replacement to get new estimates of mu 
  # Note that here we do not make assumptions about the nature of the real distribution. 
  mu_star = [np.mean(np.random.choice(samples, size=n)) for ii in np.arange(num_bootstraps)]

  # Now report the CI directly from the bootstrapped distribution 
  print(f'4 : CI=[{np.percentile(mu_star, 100*(1-alpha)/2):.2f}, {np.percentile(mu_star, 100*(alpha+(1-alpha)/2)):.2f}]')


n = 5, mean = 11.24
4 : CI=[9.83, 12.73]
n = 10, mean = 9.98
4 : CI=[9.12, 10.88]
n = 20, mean = 10.14
4 : CI=[9.39, 11.03]
n = 40, mean = 9.91
4 : CI=[9.36, 10.46]
n = 80, mean = 9.97
4 : CI=[9.58, 10.41]
n = 160, mean = 10.31
4 : CI=[10.01, 10.58]
n = 1000, mean = 9.97
4 : CI=[9.85, 10.09]


In [18]:
mu = 10 
sigma = 2 
alpha = 0.95 
num_bootstraps = 1000 

# Loop through the ns. Note that the different approaches converge on the same answer as n gets large 
for n in [5, 10, 20, 40, 80, 160, 1000]:

  # simulate some data 
  samples = np.random.normal(mu, sigma, n) 

  # save the mean 
  sample_mean = np.mean(samples)

  # show the mean, n 
  print(f'n = {n}, mean = {sample_mean:.2f}') 

  # Method 4: Credible interval 
  print(f'----')

n = 5, mean = 12.21
----
n = 10, mean = 9.55
----
n = 20, mean = 10.41
----
n = 40, mean = 10.16
----
n = 80, mean = 10.41
----
n = 160, mean = 9.82
----
n = 1000, mean = 9.97
----
