**The Bootstrap!**

In [1]:
import pandas as pd
import random
import numpy as np
from sklearn.linear_model import LinearRegression

def bootstrap_sample(data):
    random_inds = np.random.choice(data.shape[0], size=data.shape[0])
    return data[random_inds]

In [2]:
bootstrap_sample(np.array([1,2,3,4])) #array

array([2, 1, 2, 2])

In [3]:
bootstrap_sample(np.array([[3,4,5],[1,2,1]])) #2d array

array([[3, 4, 5],
       [1, 2, 1]])

**Part 1: confidence interval for the target median**

In [4]:
df = pd.read_csv('pair_boot.csv')
y = df.y

medians = []
bootstrap_num = 1000
for _ in range(bootstrap_num):
    boot_sample = bootstrap_sample(y)
    medians.append(np.median(boot_sample))
    
avg = np.mean(medians)
p_lower = np.percentile(medians, 2.5)
p_upper = np.percentile(medians, 97.5)

print('%d Bootstrap samples estimated median: %.3f' % 
      (bootstrap_num, avg))
print('With a 95%% confidence interval from %.3f to %.3f \n' % 
      (p_lower, p_upper))

1000 Bootstrap samples estimated median: -0.400
With a 95% confidence interval from -0.521 to -0.261 



**Part 2: confidence interval for regression coefficients**

In [5]:
coefs= []
bootstrap_num = 1000
for _ in range(bootstrap_num):
    boot_data = bootstrap_sample(df.values)
    boot_X, boot_y = boot_data[:,:5], boot_data[:,-1]
    lm = LinearRegression().fit(boot_X,boot_y)
    coefs.append(lm.coef_)

coefs = np.array(coefs)
coef_avgs = np.mean(coefs,axis=0)
coef_p_lowers = np.percentile(coefs,2.5,axis=0)
coef_p_uppers = np.percentile(coefs,97.5,axis=0)

for i in range(coefs.shape[1]):
    print('Bootstrapped coef estimate for x%d: %.3f' % 
          (i+1,coef_avgs[i]))
    print('With a 95%% conf interval from %.3f to %.3f \n' %
          (coef_p_lowers[i],coef_p_uppers[i]))



Bootstrapped coef estimate for x1: -1.412
With a 95% conf interval from -1.474 to -1.351 

Bootstrapped coef estimate for x2: -0.242
With a 95% conf interval from -0.311 to -0.176 

Bootstrapped coef estimate for x3: 0.552
With a 95% conf interval from 0.491 to 0.614 

Bootstrapped coef estimate for x4: 0.431
With a 95% conf interval from 0.366 to 0.498 

Bootstrapped coef estimate for x5: 0.254
With a 95% conf interval from 0.188 to 0.321 

