Part 2 concerns adding measurement error into the simulation.

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
iterations = 1000
n = 100
secret_beta = 2

$$
y = \beta x + \epsilon
$$

We only have data on $\tilde{x} = x + u$, where $u \sim N(0, \sigma_u^2)$

The OLS estimator for $\beta$ is

$$
\hat{\beta} = \frac{cov(\tilde{x}, y)}{var(\tilde{x})} = \frac{cov(x + u, \beta x + \epsilon)}{var(\tilde{x})}
$$

$$
plim \hat{\beta} = \frac{\sigma^2_x}{\sigma^2_x + \sigma^2_u} \beta
$$

Thus, the OLS estimator $\hat{\beta}$ is biased. The bias is $\hat{\beta} - \beta = -\frac{\sigma^2_u}{\sigma^2_x + \sigma^2_u} \beta$

Also, $\sigma^2_x + \sigma^2_u = var(\tilde{x})$

If $\sigma^2_u$ is known, we can obtain an unbiased estimator $\tilde{\beta} = \hat{\beta} \cdot \frac{var(\tilde{x})}{var(\tilde{x}) - \sigma^2_u}$

Here we try different $\sigma^2_u$ and $\sigma^2_\epsilon$ and calculate the bias and MSE before and after adjusting for biasness.

In [3]:
print("Normally distributed epsilon")

for var_u in [0, 0.5, 1, 2, 3]:
    for var_eps in [2,3,4]:
        bias_beta = np.array([])
        sqerr_beta = np.array([])
        bias_beta_adj = np.array([])
        sqerr_beta_adj = np.array([])
        
        for _ in range(iterations):
            x = np.linspace(-10,10,n)
            x_obs = x + np.random.normal(scale=np.sqrt(var_u), size=n)
            y = x*secret_beta + np.random.normal(scale=np.sqrt(var_eps), size=n)

            beta_est_normal = np.cov(x_obs, y)[0][1]/np.var(x_obs, ddof=1)
            beta_est_normal_adj = np.cov(x_obs, y)[0][1]/np.var(x_obs, ddof=1) * np.var(x_obs, ddof=1)/(np.var(x_obs, ddof=1) - var_u)
            
            bias_beta = np.append(bias_beta, (beta_est_normal-secret_beta))
            sqerr_beta = np.append(sqerr_beta, (beta_est_normal-secret_beta)**2)
            bias_beta_adj = np.append(bias_beta_adj, (beta_est_normal_adj-secret_beta))
            sqerr_beta_adj = np.append(sqerr_beta_adj, (beta_est_normal_adj-secret_beta)**2)

        print("var_u=%.2f, var_eps=%.2f:  Unadjusted: Bias = %f, MSE = %f"%(var_u, var_eps, bias_beta.mean(), sqerr_beta.mean()))
    
        print("var_u=%.2f, var_eps=%.2f:  Adjusted:   Bias = %f, MSE = %f"%(var_u, var_eps, bias_beta_adj.mean(), sqerr_beta_adj.mean()))

Normally distributed epsilon
var_u=0.00, var_eps=2.00:  Unadjusted: Bias = 0.001192, MSE = 0.000589
var_u=0.00, var_eps=2.00:  Adjusted:   Bias = 0.001192, MSE = 0.000589
var_u=0.00, var_eps=3.00:  Unadjusted: Bias = -0.000913, MSE = 0.000860
var_u=0.00, var_eps=3.00:  Adjusted:   Bias = -0.000913, MSE = 0.000860
var_u=0.00, var_eps=4.00:  Unadjusted: Bias = 0.000051, MSE = 0.001213
var_u=0.00, var_eps=4.00:  Adjusted:   Bias = 0.000051, MSE = 0.001213
var_u=0.50, var_eps=2.00:  Unadjusted: Bias = -0.029005, MSE = 0.001933
var_u=0.50, var_eps=2.00:  Adjusted:   Bias = -0.000313, MSE = 0.001157
var_u=0.50, var_eps=3.00:  Unadjusted: Bias = -0.026972, MSE = 0.002203
var_u=0.50, var_eps=3.00:  Adjusted:   Bias = 0.001812, MSE = 0.001559
var_u=0.50, var_eps=4.00:  Unadjusted: Bias = -0.029626, MSE = 0.002536
var_u=0.50, var_eps=4.00:  Adjusted:   Bias = -0.000965, MSE = 0.001740
var_u=1.00, var_eps=2.00:  Unadjusted: Bias = -0.055578, MSE = 0.004767
var_u=1.00, var_eps=2.00:  Adjusted:   B

In [5]:
print("T distributed epsilon")
for var_u in [0, 0.5, 1, 2, 3]:
    for var_eps in [2,3,4]:
        bias_beta = np.array([])
        sqerr_beta = np.array([])
        bias_beta_adj = np.array([])
        sqerr_beta_adj = np.array([])
        
        for _ in range(iterations):
            x = np.linspace(-10,10,n)
            x_obs = x + np.random.normal(scale=np.sqrt(var_u), size=n)
            y = x*secret_beta + np.random.standard_t(df=-2*var_eps/(1-var_eps), size=n)

            beta_est_normal = np.cov(x_obs, y)[0][1]/np.var(x_obs, ddof=1)
            beta_est_normal_adj = np.cov(x_obs, y)[0][1]/np.var(x_obs, ddof=1) * np.var(x_obs, ddof=1)/(np.var(x_obs, ddof=1) - var_u)
            
            bias_beta = np.append(bias_beta, (beta_est_normal-secret_beta))
            sqerr_beta = np.append(sqerr_beta, (beta_est_normal-secret_beta)**2)
            bias_beta_adj = np.append(bias_beta_adj, (beta_est_normal_adj-secret_beta))
            sqerr_beta_adj = np.append(sqerr_beta_adj, (beta_est_normal_adj-secret_beta)**2)

        print("var_u=%.2f, var_eps=%.2f:  Unadjusted: Bias = %f, MSE = %f"%(var_u, var_eps, bias_beta.mean(), sqerr_beta.mean()))
    
        print("var_u=%.2f, var_eps=%.2f:  Adjusted:   Bias = %f, MSE = %f"%(var_u, var_eps, bias_beta_adj.mean(), sqerr_beta_adj.mean()))

T distributed epsilon
var_u=0.00, var_eps=2.00:  Unadjusted: Bias = -0.000093, MSE = 0.000624
var_u=0.00, var_eps=2.00:  Adjusted:   Bias = -0.000093, MSE = 0.000624
var_u=0.00, var_eps=3.00:  Unadjusted: Bias = 0.000675, MSE = 0.000807
var_u=0.00, var_eps=3.00:  Adjusted:   Bias = 0.000675, MSE = 0.000807
var_u=0.00, var_eps=4.00:  Unadjusted: Bias = -0.000588, MSE = 0.001080
var_u=0.00, var_eps=4.00:  Adjusted:   Bias = -0.000588, MSE = 0.001080
var_u=0.50, var_eps=2.00:  Unadjusted: Bias = -0.029665, MSE = 0.002038
var_u=0.50, var_eps=2.00:  Adjusted:   Bias = -0.000978, MSE = 0.001228
var_u=0.50, var_eps=3.00:  Unadjusted: Bias = -0.029147, MSE = 0.002285
var_u=0.50, var_eps=3.00:  Adjusted:   Bias = -0.000465, MSE = 0.001512
var_u=0.50, var_eps=4.00:  Unadjusted: Bias = -0.029276, MSE = 0.002505
var_u=0.50, var_eps=4.00:  Adjusted:   Bias = -0.000583, MSE = 0.001730
var_u=1.00, var_eps=2.00:  Unadjusted: Bias = -0.055385, MSE = 0.004664
var_u=1.00, var_eps=2.00:  Adjusted:   Bias 