# Omitted Variable Bias

This notebook displays the effect of Omitted Variable Bias. (https://en.wikipedia.org/wiki/Omitted-variable_bias)
<br>
Full model: $Y=\alpha+\beta_1 \cdot X+\beta_2 \cdot Z + \epsilon$ with $\alpha=0.5, \beta_1=2$.
<br>
Reduced model (omitting Z): $Y=\alpha+\beta_1 \cdot X + \epsilon$
<br><br>
Omitted Variable Bias occurs when 
<br>1) Z is correlated with X ($cov(Z, X) \neq 0$) 
<br>2) Z has a non-zero coefficient ($\beta_2 \neq 0$).
<br><br>
Run the code chunk below, adjust the slidebars and press "Run Interact" to create the visualizations. 
<br>Make sure the two parameters, beta2 and cov, are set to nonzero to see the effect of the bias.

In [13]:
import numpy as np
import math 
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import scipy.stats as stats
from sklearn import datasets, linear_model
from mpl_toolkits.mplot3d import Axes3D

def OVB(beta2, cov):
    alpha = 0.5
    beta1 = 2.0
    N = 500
    mean = [0, 0]
    cov = [[1, cov], [cov, 1]]
    sampleX, sampleZ = np.random.multivariate_normal(mean, cov, N).T
    
    #sampleY = np.array([alpha + x * beta1 + z * beta2 -4 + np.random.rand(1)*8 for x, z in zip(sampleX, sampleZ)])
    sampleY = np.array([alpha + x * beta1 + z * beta2 + np.random.normal(0, 1)*4 for x, z in zip(sampleX, sampleZ)])

    fullregr = linear_model.LinearRegression()
    fullregr.fit(np.transpose(np.vstack((sampleX, sampleZ))), sampleY.reshape(N, 1))   
    
    smallregr = linear_model.LinearRegression()
    smallregr.fit(sampleX.reshape(N, 1), sampleY.reshape(N, 1))     
    
    fullxx = np.linspace(-3, 3, 300)
    fullzz = np.linspace(-3, 3, 300)
    fullyy = [fullregr.intercept_ + x * fullregr.coef_[0][0] + z * fullregr.coef_[0][1] for x,z in zip(fullxx, fullzz)]

    smallxx = np.linspace(-3, 3, 300)
    smallzz = np.linspace(-3, 3, 300)
    smallyy = [smallregr.intercept_ + x * smallregr.coef_[0][0] for x in smallxx]        
    
    fig = plt.figure(figsize=(20, 7))
    
    ax = fig.add_subplot(1, 2, 1, projection='3d')
    
    ax.plot3D(fullxx, fullzz, zs=fullyy, c='r', label="fitted line")
    ax.scatter3D(sampleX, sampleZ, sampleY, label="sample points")
    ax.view_init(azim=280)
    ax.set_xlabel("X")
    ax.set_ylabel("Z")
    ax.set_zlabel("Y")
    ax.set_title("3D plot of the full model")
    ax.legend()
    
    ax = fig.add_subplot(1, 2, 2)
    ax.set_xlabel("X")
    ax.set_ylabel("Y")
    ax.plot(fullxx, fullyy, 'r')
    ax.scatter(sampleX, sampleY)
    ax.set_title("2D plot (Y~X) of the full model")
    plt.show()


    fig = plt.figure(figsize=(20, 7))
    ax = fig.add_subplot(1, 2, 1, projection='3d')
    ax.plot3D(smallxx, smallzz, zs=smallyy, c='r', label="fitted line")
    ax.scatter3D(sampleX, sampleZ, sampleY, label="sample points")
    ax.view_init(azim=280)
    ax.set_xlabel("X")
    ax.set_ylabel("Z")
    ax.set_zlabel("Y")
    ax.set_title("3D plot of the reduced model")
    ax.legend()

    
    ax = fig.add_subplot(1, 2, 2)
    ax.set_xlabel("X")
    ax.set_ylabel("Y")
    ax.plot(smallxx, smallyy, 'r', label="reduced model")
    ax.plot(fullxx, fullyy, 'gray', label="full model")
    ax.scatter(sampleX, sampleY)
    ax.set_title("2D plot (Y~X) of the reduced model")
    ax.legend()
    plt.show()

    print ("\nTrue beta1 of the full model : 2")
    print ("beta1hat from the full model : "+str(fullregr.coef_[0][0]))
    print ("beta1hat from the reduced model, with Z omitted (N=500): " + str(smallregr.coef_[0][0]))
    
    
    
    N=5000
    sampleX, sampleZ = np.random.multivariate_normal(mean, cov, N).T
    sampleY = np.array([alpha + x * beta1 + z * beta2 + np.random.normal(0, 1)*4 for x, z in zip(sampleX, sampleZ)])  
    smallregr = linear_model.LinearRegression()
    smallregr.fit(sampleX.reshape(N, 1), sampleY.reshape(N, 1))  
    print ("beta1hat from the reduced model, with Z omitted (N=5000): " + str(smallregr.coef_[0][0]))
    print ("-> Thus, the bias is maintained even with very large Ns")
    
interact_manual(OVB, beta2=(-5, 5), cov=(-0.9, 0.9))

<function __main__.OVB>