# Appendices

In [1]:
import numpy as np
import statsmodels.api as sm


In [2]:
from joblib import Parallel, delayed


## A1: bootstrap

In [12]:
n, MC, nboot = 500, 1000, 500
def onerep():
    Y = np.random.normal(1, 1, n)
    oneboot = lambda: np.mean(Y[np.random.randint(0, n, n)])
    boot_mu_hat = np.array([oneboot() for _ in range(nboot)])
    return np.mean(Y), np.var(Y, ddof=1)/n, np.var(boot_mu_hat, ddof=1)
onerep()


(0.9889370765217561, 0.002111898300454035, 0.0021215077032831677)

In [15]:
res = Parallel(n_jobs=-1)(delayed(onerep)() for _ in range(MC))
res = np.array(res)
np.round(res.mean(axis=0), 3)


array([1.002, 0.002, 0.002])

## A2: Robust SEs

In [21]:
import statsmodels.formula.api as smf


In [52]:
boston = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv")
boston.describe()


Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063,22.532806
std,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062,9.197104
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73,5.0
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95,17.025
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36,21.2
75%,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955,25.0
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97,50.0


In [29]:
(f := f"medv ~ {'+'.join(boston.columns[:-1])}")


'medv ~ crim+zn+indus+chas+nox+rm+age+dis+rad+tax+ptratio+b+lstat'

In [49]:
m = smf.ols(f, data=boston)
coef = m.fit().params
se_types = ["HC0", "HC1", "HC2", "HC3"]
SEs = [m.fit(cov_type = x).bse for x in se_types]

pd.DataFrame(np.c_[coef, *SEs], columns = ["coef", *se_types], index = m.exog_names).round(3)


Unnamed: 0,coef,HC0,HC1,HC2,HC3
Intercept,36.459,7.89,8.001,8.145,8.412
crim,-0.108,0.029,0.029,0.031,0.034
zn,0.046,0.014,0.014,0.014,0.014
indus,0.021,0.05,0.05,0.051,0.052
chas,2.687,1.276,1.294,1.31,1.345
nox,-17.767,3.733,3.786,3.827,3.924
rm,3.81,0.833,0.845,0.861,0.89
age,0.001,0.016,0.016,0.017,0.017
dis,-1.476,0.212,0.215,0.217,0.222
rad,0.306,0.061,0.061,0.062,0.064
