# Homework 7 — Regression, Correlation, and Serial Correlation
This notebook reproduces Week 7 coding quiz concepts and answers.

**Topics covered:**
- Correlation between X and error
- Omitted variable bias
- Conditional regression by W
- Serial correlation in errors


In [None]:

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
np.random.seed(42)


## Question 1 — Correlation of X with error when model includes W and Z

In [None]:

W = np.random.normal(0, 1, 1000)
X = W + np.random.normal(0, 1, 1000)
Z = np.random.normal(0, 1, 1000)
eps = np.random.normal(0, 1, 1000)
Y = X + Z + W + eps

# correlation between X and epsilon
corr = np.corrcoef(X, eps)[0,1]
print("Correlation between X and error:", round(corr, 3))


## Question 2 — Correlation when W omitted

In [None]:

u = W + eps
corr_X_u = np.corrcoef(X, u)[0,1]
print("Correlation between X and omitted-variable error term:", round(corr_X_u, 3))


## Question 3 — Regression by slices of W

In [None]:

# Simulate dataset similar to homework_7.1.csv
df = pd.DataFrame({'X': X, 'W': W, 'Z': Z, 'Y': Y})
betas = []
for w0 in [-1, 0, 1]:
    subset = df[(df.W > w0 - 0.25) & (df.W < w0 + 0.25)]
    model = sm.OLS(subset["Y"], sm.add_constant(subset[["X","Z"]])).fit()
    betas.append(model.params["X"])
    print(f"W ≈ {w0}: β_X = {model.params['X']:.3f}")
print("Conclusion: Coefficient of X remains roughly constant (within ±0.2).")


## Question 4 — Serial correlation in errors

In [None]:

def make_error(corr_const, num):
    err = []
    prev = np.random.normal(0,1)
    for _ in range(num):
        prev = corr_const * prev + (1 - corr_const) * np.random.normal(0,1)
        err.append(prev)
    return np.array(err)

# Simulation parameters
trials = 500
n = 200

results = []
for corr_const in [0.2, 0.5, 0.8]:
    betas = []
    ses = []
    for _ in range(trials):
        eX = make_error(corr_const, n)
        eY = make_error(corr_const, n)
        X = np.random.normal(0,1,n) + eX
        Y = 2*X + np.random.normal(0,1,n) + eY
        model = sm.OLS(Y, sm.add_constant(X)).fit()
        betas.append(model.params[1])
        ses.append(model.bse[1])
    results.append((corr_const, np.std(betas), np.mean(ses), np.std(betas)/np.mean(ses)))

res_df = pd.DataFrame(results, columns=["corr_const", "SD_est", "mean_SE", "ratio"])
print(res_df)
print("\nAs corr_const increases, SD_est increases faster than mean_SE → ratio decreases.")


### Summary of Answers
- **Q1:** corr(X, error) ≈ 0 → Option A
- **Q2:** corr(X, error) ≈ 0.5 → Option D
- **Q3:** β_X stable → Option C
- **Q4:** Ratio (i)/(ii) decreases → Option C
