In [15]:
import pandas as pd
import numpy as np
import sklearn.metrics as metrics
import scipy.stats as stats
from sklearn import feature_selection

In [16]:
def add_constant(x):
    nobs = x.shape[0]
    x = np.concatenate( (np.ones(shape=(nobs,1)),x) ,axis=1)
    return x


def gen_data(nobs=1000, a=.5,corr_factor=1):
    x = np.random.normal(scale=1., size=(nobs,2))
    corr = np.random.normal(scale=1., size=(nobs,1))
    corr = np.repeat(corr,2).reshape(nobs,2)
    x = x+corr_factor*corr
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*x.sum(axis=1) + e
    x = add_constant(x)
    return y,x,nobs

def ols(y,x):
    xx = (x.T).dot(x)
    beta = np.linalg.inv( xx ).dot( x.T ).dot(y)
    return beta

def predict(y,x):   
    beta  = ols(y,x)
    return x.dot(beta)
    

In [20]:
import statsmodels.api as sm
#do standard errors go up, if i drop a covariate? yes...

y,x,nobs = gen_data(nobs=1000, a=.5,corr_factor=1)
print(sm.OLS(y,x).fit().cov_HC0)
print(sm.OLS(y,x[:,0:2]).fit().cov_HC0)

print('--')
y,x,nobs = gen_data(nobs=1000, a=.5,corr_factor=0)
print(sm.OLS(y,x).fit().cov_HC0)
print(sm.OLS(y,x[:,0:2]).fit().cov_HC0)

[[ 1.05278213e-03 -1.29307884e-05 -8.25780802e-05]
 [-1.29307884e-05  6.76278197e-04 -3.37312239e-04]
 [-8.25780802e-05 -3.37312239e-04  6.45911289e-04]]
[[ 1.41070301e-03 -3.39771314e-05]
 [-3.39771314e-05  7.02260532e-04]]
--
[[ 9.43254019e-04  3.57845187e-05 -8.24154045e-06]
 [ 3.57845187e-05  9.96449816e-04  2.37022299e-05]
 [-8.24154045e-06  2.37022299e-05  9.78094138e-04]]
[[0.00119785 0.00013352]
 [0.00013352 0.00125276]]


In [14]:
def compare_mse(corr_factor=1,v=False):
    y,x,nobs = gen_data(corr_factor=corr_factor)
    y_hat = predict(y,x)
    y_hat1 = predict(y,x[:,0:2].reshape(nobs,2))
    return y_hat1,y_hat


y_hat1,y_hat= compare_mse(corr_factor=1,v=True)
nobs = y_hat1.shape[0]
print('cov',np.cov((y_hat1,y_hat)))
print('corr',np.corrcoef((y_hat1,y_hat)))

cov [[1.1144768  1.1144768 ]
 [1.1144768  1.47421922]]
corr [[1.         0.86946976]
 [0.86946976 1.        ]]


In [None]:
#compute cov for reg1 xx sigma^2
#compute cov for reg2 xx sigma^2
#cov reg21 = X1'X sigma^2?
#cov reg12  X'X1 sigma^2?

#compute cov reg1, reg2 sigma1 sigma2?
#compute cov reg2, reg1 sigma1 sigma2?

In [None]:
def compute_eigen2(ll1,grad1,hess1,params1,ll2,grad2,hess2,params2):
    
    n = ll1.shape[0]
    hess1 = hess1/n
    hess2 = hess2/n

    k1 = params1.shape[0]
    k2 = params2.shape[0]
    k = k1 + k2
    
    #A_hat:
    A_hat1 = np.concatenate([hess1,np.zeros((k2,k1))])
    A_hat2 = np.concatenate([np.zeros((k1,k2)),-1*hess2])
    A_hat = np.concatenate([A_hat1,A_hat2],axis=1)

    #B_hat, covariance of the score...
    B_hat =  np.concatenate([grad1,-grad2],axis=1) #might be a mistake here..
    B_hat = np.cov(B_hat.transpose())

    #compute eigenvalues for weighted chisq
    sqrt_B_hat= linalg.sqrtm(B_hat)
    W_hat = np.matmul(sqrt_B_hat,linalg.inv(A_hat))
    W_hat = np.matmul(W_hat,sqrt_B_hat)
    V,W = np.linalg.eig(W_hat)

    return V