# Homework Reflections Week 9 - Week 12

In [11]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats
from sklearn.utils import resample


# Week 9

Question 1

In [10]:
def simulate_se(true_effect=1, var_x=1, sample_size=1000, n_sims=1000, seed=0):
    np.random.seed(seed)
    slope_list = []
    ols_se_list = []

    for _ in range(n_sims):
        x = np.random.normal(0, np.sqrt(var_x), sample_size)
        eps = np.random.normal(0, np.abs(x), sample_size)
        y = true_effect * x + eps

        X = sm.add_constant(x)
        fit = sm.OLS(y, X).fit()
        slope_list.append(fit.params[1])
        ols_se_list.append(fit.bse[1])

    empirical_std = float(np.std(slope_list))
    mean_ols_se = float(np.mean(ols_se_list))

    print(round(empirical_std, 6), "empirical std of slope")
    print(round(mean_ols_se, 6), "mean OLS-reported standard error")

simulate_se()

0.053725 empirical std of slope
0.03155 mean OLS-reported standard error


Question 2

In [12]:
np.random.seed(0)

def make_data(n, rho):
    x = np.linspace(0, 10, n)
    # create correlated errors
    cov = rho ** np.abs(np.subtract.outer(np.arange(n), np.arange(n)))
    e = np.random.multivariate_normal(np.zeros(n), cov)
    y = 2 * x + e
    return x, y

def run_sim(n=100, rho=0.9, sims=200, boots=100):
    slopes = []
    ols_se = []
    boot_se = []

    for _ in range(sims):
        x, y = make_data(n, rho)
        X = sm.add_constant(x)
        model = sm.OLS(y, X).fit()
        slopes.append(model.params[1])
        ols_se.append(model.bse[1])

        # bootstrap residuals
        boot_slopes = []
        for _ in range(boots):
            r = resample(model.resid)
            yb = model.fittedvalues + r
            b_model = sm.OLS(yb, X).fit()
            boot_slopes.append(b_model.params[1])
        boot_se.append(np.std(boot_slopes))

    print("rho =", rho)
    print("True SD:", np.std(slopes))
    print("OLS SE:", np.mean(ols_se))
    print("Bootstrap SE:", np.mean(boot_se))
    print()

for rho in [0, 0.5, 0.9]:
    run_sim(rho=rho)

rho = 0
True SD: 0.034961095241031345
OLS SE: 0.034365599495386354
Bootstrap SE: 0.03397123700874447

rho = 0.5
True SD: 0.05480789361052515
OLS SE: 0.0335361108459322
Bootstrap SE: 0.03282796958384208

rho = 0.9
True SD: 0.11967134331095981
OLS SE: 0.02809543976701594
Bootstrap SE: 0.02756967309808162

