# DX702 Homework Reflections 9, 11, 12
Justin Hawkins

## Reflection 9.1 – Heteroskedasticity and Standard Errors

In [4]:

import numpy as np
import statsmodels.api as sm
import pandas as pd

rng = np.random.default_rng(42)

def simulate_heteroskedastic(n=200):
    X = rng.normal(size=n)
    sigma = 0.5 + 1.5 * np.abs(X)
    errors = rng.normal(scale=sigma)
    y = 1 + 2*X + errors
    model = sm.OLS(y, sm.add_constant(X)).fit()
    return model.params[1], model.bse[1], model.get_robustcov_results(cov_type="HC1").bse[1]

R = 1000
betas, ses_default, ses_robust = [], [], []

for _ in range(R):
    b, sd, sr = simulate_heteroskedastic()
    betas.append(b)
    ses_default.append(sd)
    ses_robust.append(sr)

pd.DataFrame({
    "true_sd": [np.std(betas)],
    "avg_default_se": [np.mean(ses_default)],
    "avg_robust_se": [np.mean(ses_robust)]
})


Unnamed: 0,true_sd,avg_default_se,avg_robust_se
0,0.214266,0.135709,0.211494


## Reflection 9.2 – Correlated Errors and Bootstrap Limitations

In [5]:

import numpy as np
import statsmodels.api as sm
import pandas as pd

rng = np.random.default_rng(123)

def correlated_errors(n, rho):
    cov = rho * np.ones((n, n))
    np.fill_diagonal(cov, 1)
    L = np.linalg.cholesky(cov)
    return L @ rng.normal(size=n)

def simulate(n=40, rho=0.8):
    X = np.linspace(-1, 1, n)
    y = 1 + 1.5*X + correlated_errors(n, rho)
    model = sm.OLS(y, sm.add_constant(X)).fit()
    return X, y, model

R = 2000
betas = [simulate()[2].params[1] for _ in range(R)]
true_sd = np.std(betas)

X, y, model = simulate()
resid = model.resid
boot = []

for _ in range(2000):
    yb = model.fittedvalues + rng.choice(resid, size=len(resid), replace=True)
    boot.append(sm.OLS(yb, sm.add_constant(X)).fit().params[1])

pd.DataFrame({"true_sd":[true_sd], "bootstrap_sd":[np.std(boot)]})


Unnamed: 0,true_sd,bootstrap_sd
0,0.119098,0.099715


## Reflection 11.1 – Event Study with Level, Slope, and Curvature Changes

In [6]:

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

T = 200
t = np.arange(T)
T0 = 100
post = (t >= T0).astype(int)

y = 1 + 0.03*t + 0.0005*t**2 + post*(2 + 0.02*t + 0.0005*t**2) + np.random.normal(size=T)

df = pd.DataFrame({"y": y, "t": t, "t2": t**2, "post": post})

m1 = smf.ols("y ~ t + t2 + post", df).fit()
m2 = smf.ols("y ~ t + t2 + post + post:t + post:t2", df).fit()

print("Model with value only")
print(m1.summary().tables[1])
print("\nModel with value, slope, and curvature shifts")
print(m2.summary().tables[1])


Model with value only
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.7790      0.241      7.386      0.000       1.304       2.254
t             -0.0424      0.006     -7.331      0.000      -0.054      -0.031
t2             0.0013   2.58e-05     51.147      0.000       0.001       0.001
post           7.6603      0.308     24.879      0.000       7.053       8.268

Model with value, slope, and curvature shifts
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.6016      0.277      2.171      0.031       0.055       1.148
t              0.0390      0.013      3.015      0.003       0.013       0.065
t2             0.0005      0.000      3.571      0.000       0.000       0.001
post          -2.8807      2.779     -1.037      0.301      -8

## Reflection 11.2 – Group Fixed Effects with Common Event Shift

In [7]:

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

T = 40
G = 3
event = 25
alphas = [0, 3, 6]
slope = 0.2
tau = 2.5
rows = []

for g in range(G):
    for tt in range(T):
        post = int(tt >= event)
        y = alphas[g] + slope*tt + tau*post + np.random.normal()
        rows.append({"group": g, "t": tt, "post": post, "y": y})

panel = pd.DataFrame(rows)
panel["group"] = panel["group"].astype("category")

model = smf.ols("y ~ C(group) + t + post", panel).fit()
print(model.summary().tables[1])


                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept         0.2428      0.257      0.944      0.347      -0.267       0.753
C(group)[T.1]     2.9133      0.234     12.440      0.000       2.449       3.377
C(group)[T.2]     5.9059      0.234     25.219      0.000       5.442       6.370
t                 0.1824      0.015     11.992      0.000       0.152       0.213
post              2.8273      0.363      7.795      0.000       2.109       3.546


## Reflection 12 – Differences-in-Differences with Non-parallel Trends

In [8]:

import numpy as np
import pandas as pd

T = 40
event = 20
t = np.arange(T)
post = (t >= event).astype(int)

y_c = 2 + 0.1*t + np.random.normal(size=T)
y_t = 2 + 0.3*t + np.random.normal(size=T)

df = pd.DataFrame({
    "t": np.concatenate([t, t]),
    "post": np.concatenate([post, post]),
    "group": np.concatenate([np.zeros(T), np.ones(T)]),
    "y": np.concatenate([y_c, y_t])
})

pre = df["post"] == 0
postm = df["post"] == 1
treated = df["group"] == 1
control = df["group"] == 0

mean_treated_pre = df.loc[treated & pre, "y"].mean()
mean_treated_post = df.loc[treated & postm, "y"].mean()
mean_control_pre = df.loc[control & pre, "y"].mean()
mean_control_post = df.loc[control & postm, "y"].mean()

did = (mean_treated_post - mean_treated_pre) - (mean_control_post - mean_control_pre)

did


np.float64(4.75659986415388)