<a href="https://colab.research.google.com/github/emsiefert/NeuroCoreCourse/blob/main/ConfidenceIntervalsAndBootstrapping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Confidence Intervals and Bootstrapping HW assignment

Exercises
Compute confidence/credible intervals based on the four methods above for simulated data sampled from a population that is Gaussian distributed with mean 
μ
=10 and standard deviation 
σ
=2, for n=5, 10, 20, 40, 80, 160, 1000 at a 95% confidence level.

In [1]:
import numpy as np
import random as rnd
import collections
import matplotlib.pyplot as plt
import time
import scipy.stats as st

from scipy.stats import bernoulli, binom, poisson, chi2
from IPython.display import clear_output
from operator import itemgetter
from statsmodels.stats import proportion

from numpy import matlib

In [9]:
# 1. The simple, analytic approach with large n and/or known standard deviation. 
# Based on that paragraph, sounds like we want to do a standard error of the mean
mu = 10
sigma = 2
sample = [5, 10, 20, 40, 80, 160, 1000]

for n in sample:
  data_generated = np.random.normal(mu,sigma,n) # simulate data
  sd = np.std(data_generated) # get actual standard deviation of simulated data
  mean = np.mean(data_generated) # get actual mean of the simualted data
  SEM_cf = (sd/np.sqrt(n))*1.96 # 1.96 is from the documentation
  confidence_interval = [mean-SEM_cf, mean+SEM_cf] # calculate confidence interval based on the dcoumentation
  print(confidence_interval) # print!



[8.325511440931951, 12.343692567660899]
[7.341418538751203, 10.35636562264009]
[9.351903267137878, 11.105679621463334]
[9.747151681395815, 10.776917193318353]
[9.791910553634795, 10.688013576914665]
[9.60190173091741, 10.193554324216558]
[9.901977918482613, 10.153686258624964]


In [14]:
#2. The simple, analytic approach with small n and unknown population standard deviation
# sounds like we wanna do the same as above, but this time with t-values instead of z-values
mu = 10
sigma = 2
sample = [5, 10, 20, 40, 80, 160, 1000]

t_vals = [2.776, 2.262, 2.093, 2.023, 1.99, 1.975, 1.962] #t_vals are from online t val calculator, for each n (with df = n-1)

for idx, n in enumerate(sample):
  data_generated = np.random.normal(mu,sigma,n) # simulate data
  sd = np.std(data_generated) # get actual standard deviation of simulated data
  mean = np.mean(data_generated) # get actual mean of the simualted data

  SEM_cf = (sd/np.sqrt(n))*t_vals[idx] # t val is from t_vals
  confidence_interval = [mean-SEM_cf, mean+SEM_cf] # calculate confidence interval based on documentation
  print(confidence_interval) # print!


[6.752149579947492, 13.61682319826092]
[9.107872256840334, 12.605064666412254]
[10.578425835772885, 12.66686611264988]
[9.274846972404031, 10.439535534754562]
[9.501042626902185, 10.386638916280637]
[9.667862201849275, 10.251089537667973]
[9.901207107173995, 10.151390673201789]


In [22]:
#3. Bootstrapped confidence intervals
mu = 10
sigma = 2
sample = [5, 10, 20, 40, 80, 160, 1000]

for n in sample:
  data_generated = np.random.normal(mu,sigma,n) # simulate data
  data_generated = (data_generated,)
  bootstrapped_confidence_interval = st.bootstrap(data_generated, np.mean, confidence_level = .95) #function found from googling
  ci_l, ci_u =  bootstrapped_confidence_interval.confidence_interval #pull out info you want
  print(ci_l, ci_u)


10.67964231474015 13.097638527981275
9.721730522011724 11.388530691799858
8.814466537358138 10.413447967050663
9.399458935296982 10.578341577125329
9.608130275240969 10.593516549249479
9.598835142817135 10.266151108045255
9.820063819905773 10.0621156923036


In [None]:
#4. Bayesian credible intervals
# feels like here, its asking me to do the same hting i did in the first one but on the posterior probability distribution?
mu = 10
sigma = 2
sample = [5, 10, 20, 40, 80, 160, 1000]

for n in sample:
  data_generated = np.random.normal(mu,sigma,n) # simulate data
  #here, i wanna get the posterior probability distribution
  sd = np.std(data_generated) # get actual standard deviation of simulated data
  mean = np.mean(data_generated) # get actual mean of the simualted data
  SEM_cf = (sd/np.sqrt(n))*1.96 # 1.96 is from the documentation
  confidence_interval = [mean-SEM_cf, mean+SEM_cf] # calculate confidence interval based on the dcoumentation
  print(confidence_interval) # print!


