# Standard Errors

In [20]:
import numpy as np
import math 
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import scipy.stats as stats
from sklearn import datasets, linear_model

def SE(option):
    alpha = 0.5
    beta = 2
    N = 300
    regr = linear_model.LinearRegression()
    
    X = np.linspace(-3, 3, N)
    Y1 = [alpha + beta * xx + np.random.normal(0, 2) for xx in X]
    Y2 = [alpha + beta * xx + np.random.normal(0, 2) * (xx+3)/3 for xx in X]    
    X = np.array(X)
    Y1 = np.array(Y1)
    Y2 = np.array(Y2)

    fig = plt.figure(figsize=(20, 7))    
    ax1 = fig.add_subplot(1, 2, 1)
    ax2 = fig.add_subplot(1, 2, 2)   
    



    cov = np.zeros((300, 300))
    for i in range(300):
        cov[i][i] = 1
    Yhomo = np.random.multivariate_normal(np.zeros(300), cov)

    #xx = np.linspace(-1, 1, 300)
    xx = np.random.normal(0, 1, 300)
    yy = [alpha + x * beta + y for x, y in zip(xx, Yhomo)]
    #yyy = [alpha + x * beta for x in xx]
    regr.fit(np.array(xx).reshape(N, 1), np.array(yy).reshape(N, 1))
    yyy = [regr.intercept_ + x * regr.coef_[0] for x in xx]
    residuals = [y - (regr.intercept_ + x * regr.coef_[0]) for x, y in zip(xx, yy)]
    betahomo = regr.coef_[0][0]
    sehomo = 1 / sum([(x - np.mean(xx))**2 for x in xx])

    if option == 'Homoscedastic':    
        ax1.plot(xx, yy, 'o')
        ax1.plot(xx, yyy)
       
        ax2.plot(residuals, 'o')
        ax2.axhline(y=0, c='C1')
        plt.show()






    cov = np.zeros((300, 300))
    for i in range(300):
        cov[i][i] = (i+1)/150.0
    Yhetero = np.random.multivariate_normal(np.zeros(300), cov) #increasing variance by index

    #xx = np.linspace(-1, 1, 300)
    xx = sorted(np.random.normal(0, 1, 300))
    yy = [alpha + x * beta + y for x, y in zip(xx, Yhetero)]
    #yyy = [alpha + x * beta for x in xx]
    regr.fit(np.array(xx).reshape(N, 1), np.array(yy).reshape(N, 1))
    yyy = [regr.intercept_ + x * regr.coef_[0] for x in xx]        
    residuals = [y - (regr.intercept_ + x * regr.coef_[0]) for x, y in zip(xx, yy)]
    betahetero = regr.coef_[0][0]
    sehetero = 1 / np.dot(xx, xx) * sum([resid**2 * x**2 for resid, x in zip(residuals, xx)]) * 1 / np.dot(xx, xx)
    sehetero = sehetero[0]

    if option == 'Heteroscedastic': 
        ax1.plot(xx, yy, 'o')
        ax1.plot(xx, yyy)
        ax2.plot(residuals, 'o')
        ax2.axhline(y=0, c='C1')
        plt.show()




    xx = np.random.normal(0, 1, 300)
    eps1 = np.random.normal(0, np.random.rand()*4+0.5, 100)
    eps2 = np.random.normal(0, np.random.rand()*4+0.5, 100)
    eps3 = np.random.normal(0, np.random.rand()*4+0.5, 100)
    Y1 = [alpha + beta*x + e for x, e in zip(xx[:100], eps1)]
    Y2 = [alpha + beta*x + e for x, e in zip(xx[100:200], eps2)]        
    Y3 = [alpha + beta*x + e for x, e in zip(xx[:200], eps3)]
    yy = np.hstack((Y1, Y2, Y3))
    regr.fit(np.array(xx).reshape(N, 1), yy.reshape(N, 1))
    yyy = [regr.intercept_ + x * regr.coef_[0] for x in xx]  
    r1 = [y - (regr.intercept_ + x * regr.coef_[0][0]) for x, y in zip(xx, Y1)]
    r2 = [y - (regr.intercept_ + x * regr.coef_[0][0]) for x, y in zip(xx, Y2)]
    r3 = [y - (regr.intercept_ + x * regr.coef_[0][0]) for x, y in zip(xx, Y3)]
    residuals = np.vstack((r1, r2, r3))

    middleterm = 0
    for i in range(3):
        middleterm += np.vdot(xx[100*i:100*(i+1)], residuals[100*i:100*(i+1)])**2

    betacluster = regr.coef_[0][0]
    secluster = 1 / np.dot(xx, xx) * middleterm * 1 / np.dot(xx, xx)

    if option == "Cluster-Robust":
        ax1.plot(xx[:100], Y1, 'bo')
        ax1.plot(xx[100:200], Y2, 'ro')
        ax1.plot(xx[200:], Y3, 'go')
        ax1.plot(xx, yyy, 'C1')

        ax2.plot(r1, 'bo')
        ax2.plot(r2, 'ro')
        ax2.plot(r3, 'go')
        ax2.axhline(y=0, c='C1')
        plt.plot()
        plt.show()

    print ("Estimated betahats: " + str(betahomo) + " " + str(betahetero) + " " + str(betacluster))
    print ("Variance of betahats: " + str(sehomo) + " " + str(sehetero) + " " + str(secluster))        

interact_manual(SE, option=widgets.Dropdown(options=['Homoscedastic', 'Heteroscedastic', 'Cluster-Robust']))

<function __main__.SE>