In [26]:
#math
import numpy as np
import scipy.stats as stats
import scipy.special
#graphing
import matplotlib.pyplot as plt
#stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

In [31]:
num_bidders = 4

def gen_data():
    nobs = 1000
    
    #parameters
    beta0 = 1
    beta1 = 1
    beta2 = 1
    sigma = 1
    
    gamma0 = 1
    gamma1 = 1
    eta = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    xi = stats.norm.rvs(loc=0,scale=eta,size=nobs)
    
    #censor data below x<0?
    z = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    x = gamma0 + gamma1*z + xi
    
    y = beta0+ beta1*x + beta2*z + epsilon
    
    #censor
    x[x<=0] = 0
    
    return y,np.concatenate([[z],[x]]).transpose(),nobs


yn,xn,nobs = gen_data()
print(xn.shape)
print(sm.add_constant(xn).shape)
print(scipy.stats.mode(sm.add_constant(xn)))

(1000, 2)
(1000, 3)
ModeResult(mode=array([[ 1.        , -3.69724161,  0.        ]]), count=array([[1000,    1,  238]]))


In [32]:
np.random.seed()
yn,xn,nobs = gen_data()

In [39]:
class MARregression(GenericLikelihoodModel):
    
    def __init__(self, *args, **kwargs):
        super(MARregression,self).__init__(*args,**kwargs)
        names = (['sigma^2'] + 
                 ['gamma'+str(i) for i in range(0,self.exog.shape[1]-1)] + 
                 ['eta^2'])
        self._set_extra_params_names(names)
        self.start_params = np.array([1]* (2*self.exog.shape[1]+1)) 
        #2 sets of params for z, 1 for x, 2 variances...
        
    def loglikeobs(self, params):
        y = self.endog
        x = self.exog[:,-1]
        m = 1*(self.exog[:,-1] == 0) #missingness
        z = self.exog[:,0:-1]
        
        betaz = params[0:self.exog.shape[1]-1]
        betax = params[self.exog.shape[1]-1]
        sigma2 = max(params[self.exog.shape[1]],1e-3)
        gamma = params[self.exog.shape[1]+1:-1]
        eta2 = max(params[-1],1e-3)
        
        a2 = max(betax**2 *eta2 + sigma2,1e-3)
        b = gamma*betax + betaz
        
        c2 = max(eta2 *sigma2/(betax**2*eta2 + sigma2),1e-3)
        d = (betax*eta2)/(betax**2*eta2+sigma2)
        e = gamma - (betax*eta2) * (betaz + gamma*betax)/(betax**2 * eta2 + sigma2) 
        
        mu_x = y*d + np.matmul(z,e)
        mu_y = np.matmul(z,b)
        
        pr_y = stats.norm.logpdf( y, loc = mu_y, scale=np.sqrt(a2))
        pr_x = stats.norm.logpdf( x, loc = mu_x, scale=np.sqrt(c2))
        pr_m = stats.norm.logcdf( x, loc = mu_x, scale=np.sqrt(c2))

        ll = pr_y + (1-m)*pr_x + m*pr_m
        return ll
    


model =  MARregression(yn,sm.add_constant(xn))
model_fit = model.fit(disp=False)
model_fit.summary()

0,1,2,3
Dep. Variable:,y,Log-Likelihood:,-2704.7
Model:,MARregression,AIC:,5415.0
Method:,Maximum Likelihood,BIC:,5430.0
Date:,"Tue, 24 Nov 2020",,
Time:,14:16:26,,
No. Observations:,1000,,
Df Residuals:,997,,
Df Model:,2,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.9150,0.050,18.399,0.000,0.818,1.013
x1,0.9472,0.045,20.831,0.000,0.858,1.036
x2,1.0589,0.034,30.963,0.000,0.992,1.126
sigma^2,0.9861,0.047,21.013,0.000,0.894,1.078
gamma0,0.9866,0.033,29.728,0.000,0.922,1.052
gamma1,0.9701,0.034,28.559,0.000,0.904,1.037
eta^2,0.9896,0.051,19.474,0.000,0.890,1.089


In [34]:
y_cens, x_cens = yn[xn[:,-1]!=0],xn[xn[:,-1]!=0]
model2 = sm.OLS(y_cens,sm.add_constant(x_cens))
model2_fit = model2.fit()
model2_fit.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.749
Model:,OLS,Adj. R-squared:,0.748
Method:,Least Squares,F-statistic:,1144.0
Date:,"Tue, 24 Nov 2020",Prob (F-statistic):,6.13e-231
Time:,14:10:13,Log-Likelihood:,-1086.8
No. Observations:,770,AIC:,2180.0
Df Residuals:,767,BIC:,2193.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.9666,0.069,14.100,0.000,0.832,1.101
x1,0.9465,0.049,19.324,0.000,0.850,1.043
x2,1.0330,0.042,24.359,0.000,0.950,1.116

0,1,2,3
Omnibus:,0.572,Durbin-Watson:,2.019
Prob(Omnibus):,0.751,Jarque-Bera (JB):,0.451
Skew:,0.046,Prob(JB):,0.798
Kurtosis:,3.075,Cond. No.,4.85
