<a href="https://colab.research.google.com/github/djamoreland/Quantitative-Neuroscience-Moreland/blob/main/Moreland_Confidence_Intervals_Exercise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Compute confidence/credible intervals based on the four methods above for simulated data sampled from a population that is Gaussian distributed

mean =10 and standard deviation =2,

for n=5, 10, 20, 40, 80, 160, 1000 at a 95% confidence level.

In [45]:
import numpy as np
import math
import scipy.stats as stats


#Cosmetics
np.set_printoptions(legacy='1.25')

In [9]:
mean = 10
std=2
n_list = [5,10,20,40,80,160,1000]

In [24]:
#Simple Analytical Approach

CI_dict = {}

for n in n_list:
  #Calculate SEM
  SEM = std / math.sqrt(n)

  #Calculate 95% z_score and factor to SEM
  z_score_95 = 1.96
  norm_SEM = SEM * z_score_95

  #Calculate Confidence Interval
  Confidence_Interval = [(mean - (norm_SEM)),mean + (norm_SEM)]

  CI_dict[f'95% Confidence Interval when n = {str(n)}'] = Confidence_Interval
  print(f'A 95% Confidence Interval at n = {n} is {Confidence_Interval}')

A 95% Confidence Interval at n = 5 is [8.246922705640165, 11.753077294359835]
A 95% Confidence Interval at n = 10 is [8.760387157213994, 11.239612842786006]
A 95% Confidence Interval at n = 20 is [9.123461352820083, 10.876538647179917]
A 95% Confidence Interval at n = 40 is [9.380193578606997, 10.619806421393003]
A 95% Confidence Interval at n = 80 is [9.561730676410042, 10.438269323589958]
A 95% Confidence Interval at n = 160 is [9.6900967893035, 10.3099032106965]
A 95% Confidence Interval at n = 1000 is [9.8760387157214, 10.1239612842786]


In [47]:
#Simple Analytical Approach with Small N

for n in n_list:

  #Correct standard deviation w Bessel's Correction
  std_corrected = math.sqrt((1/(n-1)) * std**2)

  #Calculate SEM
  SEM = std_corrected / math.sqrt(n)

  #Calculate 95% t_score and factor to SEM
  df = n-1
  t_score_95 = stats.t.ppf((0.5*(1-0.95)), df, loc=0, scale=1)
  t_score_95 *= -1

  norm_SEM = SEM * t_score_95

  #Calculate Confidence Interval
  Confidence_Interval = [(mean - (norm_SEM)),mean + (norm_SEM)]


  print(f'A 95% Confidence Interval at n = {n} is {Confidence_Interval}')

A 95% Confidence Interval at n = 5 is [8.758336001796234, 11.241663998203766]
A 95% Confidence Interval at n = 10 is [9.523095396007774, 10.476904603992226]
A 95% Confidence Interval at n = 20 is [9.785260262978362, 10.214739737021638]
A 95% Confidence Interval at n = 40 is [9.897577063891804, 10.102422936108196]
A 95% Confidence Interval at n = 80 is [9.949924790492515, 10.050075209507485]
A 95% Confidence Interval at n = 160 is [9.975235035524467, 10.024764964475533]
A 95% Confidence Interval at n = 1000 is [9.996073353263288, 10.003926646736712]


In [72]:
#Bootstrapping - made with help of chat gpt


for n in n_list:

  #Create sample data from normal distribution centered around mean
  sample = stats.norm.rvs(loc = mean,scale = std, size = n)

  #Create number of samples
  boot_samples = 1000
  boot_means = []

  #Resample
  for i in range(boot_samples):
    new_sample = np.random.choice(sample, size=n, replace=True)
    boot_means.append(np.mean(new_sample))

  #Calculate Confidence Interval
  ci_bootstrap = np.percentile(boot_means, [2.5, 97.5])

  print(f'Bootstrap 95% Confidence Interval for n of {n} is {ci_bootstrap}')



Bootstrap 95% Confidence Interval for n of 5 is [ 7.09039151 11.39585994]
Bootstrap 95% Confidence Interval for n of 10 is [ 7.77088543 11.48589734]
Bootstrap 95% Confidence Interval for n of 20 is [ 8.47908868 10.4367196 ]
Bootstrap 95% Confidence Interval for n of 40 is [ 9.20383732 10.46172171]
Bootstrap 95% Confidence Interval for n of 80 is [ 9.59817381 10.45687304]
Bootstrap 95% Confidence Interval for n of 160 is [ 9.86927001 10.48232609]
Bootstrap 95% Confidence Interval for n of 1000 is [ 9.96433948 10.21018926]


In [78]:
#Bayesian Credible Intervals - made with help of chatgpt

for n in n_list:

  #Create sample data from normal distribution centered around mean
  sample = stats.norm.rvs(loc = mean,scale = std, size = n)

  #Get sample mean and std from simulated data
  sample_mean = np.mean(sample)
  sample_std = np.std(sample, ddof=1)

  #Calculate posterior using t distribution
  posterior = stats.t(df=n-1, loc=sample_mean, scale=sample_std/np.sqrt(n))

  #Calculate credible interval
  ci_bayes = posterior.ppf([0.025, 0.975])


  print(f'95% Credible Interval for n of {n} is {ci_bayes}')



95% Credible Interval for n of 5 is [ 6.2725314  11.65103858]
95% Credible Interval for n of 10 is [ 8.03360305 10.87221569]
95% Credible Interval for n of 20 is [ 9.24471773 11.01036694]
95% Credible Interval for n of 40 is [ 9.6291338  10.75288202]
95% Credible Interval for n of 80 is [ 9.68977975 10.54835852]
95% Credible Interval for n of 160 is [ 9.59431352 10.21730111]
95% Credible Interval for n of 1000 is [ 9.81781532 10.07092645]
