
### The estimation of the MLE is based on the material from Quantecon

First, we need the following imports:

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (11, 5)  #set default figure size
import numpy as np
from numpy import exp
from scipy.special import factorial
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
import statsmodels.api as sm
from statsmodels.api import Poisson
from statsmodels.api import Probit
from statsmodels.api import Logit
from scipy import stats
from scipy.stats import norm
from statsmodels.iolib.summary2 import summary_col


Consider the probit model where the dependent variable ($Y$) is binary, and the independent variables are given by the vector $X$. 
In addition, assume that $Pr(Y=1|X)= Φ(X^T β)$ where $Φ$ is the CDF of the standard normal distribution. 

The objective is to maximize the likelihood function over the parameters to find the Maximum Likelihood Estimator (MLE). 
To find the MLE estimator we maximize the next Log-Likelihood function:

$$
\ln L(\beta \mid X, Y)=\sum_{i=1}^n\left[y_i \ln \Phi\left(x_i^{\prime} \beta\right)+\left(1-y_i\right) \ln \left(1-\Phi\left(x_i^{\prime} \beta\right)\right)\right]
$$


In [4]:
#Define the Log-Likelihood Function 

def LogLikeProbit(betas, y, x):
    """
    Probit Log Likelihood function
    Very slow naive Python version
    Input:
        betas is a np.array of parameters
        y is a one dimensional np.array of endogenous data
        x is a 2 dimensional np.array of exogenous data
            First vertical column of X is assumed to be constant term,
            corresponding to betas[0]
    returns:
        negative of log likehood value (scalar)
    """
    result = 0
    #Sum operation
    for i in range(0, len(y)):
        #Get X'_i * Beta value
        xb = np.dot(x[i], betas)
        
        #compute both binary probabilities from xb     
        #Add to total log likelihood
        llf = y[i]*np.log(norm.cdf(xb)) + (1-y[i])*np.log(1 - norm.cdf(xb))
        result += llf
    return -result

In [5]:
#####################
#ACTUAL DATA
#####################
df =pd.read_stata('C:/Users/USUARIO/Dropbox/PHD/CERGE_EI/2nd year/Microeconometrics I/MLE_GMM_SME_MSM/MLE_GMM_SME_MSM/admission.dta')  

df



Unnamed: 0,admit,gre,gpa,rank
0,0.0,380.0,3.61,3.0
1,1.0,660.0,3.67,3.0
2,1.0,800.0,4.00,1.0
3,1.0,640.0,3.19,4.0
4,0.0,520.0,2.93,4.0
...,...,...,...,...
395,0.0,620.0,4.00,2.0
396,0.0,560.0,3.04,3.0
397,0.0,460.0,2.63,2.0
398,0.0,700.0,3.65,2.0


In [37]:
#get latent LHS variable
y = df.admit
n = len(y)

#prepend vector of ones to RHS variables matrix
#for constant term
const = np.ones(n)
x = np.column_stack((const, df.gre))

x_1 = df.gre

x

x_1

0      380.0
1      660.0
2      800.0
3      640.0
4      520.0
       ...  
395    620.0
396    560.0
397    460.0
398    700.0
399    600.0
Name: gre, Length: 400, dtype: float32

In [1]:
#####################
#ARTIFICIAL DATA
######################

"""

#sample size
n = 1000

#random generators
z1 = np.random.randn(n)
z2 = np.random.randn(n)

#create artificial exogenous variables 
x1 = 0.8*z1 + 0.2*z2
x2 = 0.2*z1 + 0.8*z2
#create error term
u = 2*np.random.randn(n)

#create endogenous variable from x1, x2 and u
ystar = 0.5 + 0.75*x1 - 0.75*x2 + u

#create latent binary variable from ystar
def create_dummy(data, cutoff):
    result = np.zeros(len(data))
    for i in range(0, len(data)):
        if data[i] >= cutoff:
            result[i] = 1
        else:
            result[i] = 0
    return result

#get latent LHS variable
y = create_dummy(ystar, 0.5)

#prepend vector of ones to RHS variables matrix
#for constant term
const = np.ones(n)
x = np.column_stack((const, np.column_stack((x1, x2))))
"""


'\n\n#sample size\nn = 1000\n\n#random generators\nz1 = np.random.randn(n)\nz2 = np.random.randn(n)\n\n#create artificial exogenous variables \nx1 = 0.8*z1 + 0.2*z2\nx2 = 0.2*z1 + 0.8*z2\n#create error term\nu = 2*np.random.randn(n)\n\n#create endogenous variable from x1, x2 and u\nystar = 0.5 + 0.75*x1 - 0.75*x2 + u\n\n#create latent binary variable from ystar\ndef create_dummy(data, cutoff):\n    result = np.zeros(len(data))\n    for i in range(0, len(data)):\n        if data[i] >= cutoff:\n            result[i] = 1\n        else:\n            result[i] = 0\n    return result\n\n#get latent LHS variable\ny = create_dummy(ystar, 0.5)\n\n#prepend vector of ones to RHS variables matrix\n#for constant term\nconst = np.ones(n)\nx = np.column_stack((const, np.column_stack((x1, x2))))\n'

In [7]:
from scipy.optimize import minimize


In [8]:
#create beta hat vector to maximize on
#will store the values of maximum likelihood beta parameters
#Arbitrarily initialized to all zeros
bhat = np.zeros(len(x[0]))

#unvectorized MLE estimation
probit_est = minimize(LogLikeProbit, bhat, args=(y,x), method='nelder-mead')

#print vector of maximized betahats
probit_est['x']

array([-1.76821242,  0.00217499])

In [9]:
import statsmodels.tools.numdiff as smt
import scipy as sc

#Get inverse hessian for Cramer Rao lower bound
b_estimates = probit_est['x']
Hessian = smt.approx_hess3(b_estimates, LogLikeProbit, args=(y,x))
invHessian = np.linalg.inv(Hessian)

#Standard Errors from C-R LB
#from diagonal elements of invHessian
SE = np.zeros(len(invHessian))
for i in range(0, len(invHessian)):
    SE[i] =  np.sqrt(invHessian[i,i])
    
#t and p values
t_statistics = (b_estimates/SE)
pval = (sc.stats.t.sf(np.abs(t_statistics), 999)*2)

print("Beta Hats: ", b_estimates)
print("SE: ", SE)
print("t stat: ", t_statistics)
print("P value: ", pval)

Beta Hats:  [-1.76821242  0.00217499]
SE:  [0.35917796 0.00059006]
t stat:  [-4.92294244  3.68602656]
P value:  [9.96900476e-07 2.40032153e-04]


In [10]:
#Using other routine
stats_probit = Probit(y, x).fit()
print(stats_probit.summary())


Optimization terminated successfully.
         Current function value: 0.607481
         Iterations 5
                          Probit Regression Results                           
Dep. Variable:                  admit   No. Observations:                  400
Model:                         Probit   Df Residuals:                      398
Method:                           MLE   Df Model:                            1
Date:                Sun, 30 Oct 2022   Pseudo R-squ.:                 0.02798
Time:                        19:13:39   Log-Likelihood:                -242.99
converged:                       True   LL-Null:                       -249.99
Covariance Type:            nonrobust   LLR p-value:                 0.0001836
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -1.7682      0.359     -4.923      0.000      -2.472      -1.064
x1             0.0022      0.

### Maximum Simulated Likelihood

The subsimulator is:

$$
\begin{aligned}
&\widetilde{f}_{Y_i, X_i \mid U_s}\left(y_i, x_i \mid u_s ; \beta_0, \beta_1, \sigma^2\right)=\Lambda\left[\beta_0+\left(\beta_1+\sigma u_s\right) x_i\right]^{y_i}\left\{1-\Lambda\left[\beta_0+\left(\beta_1+\sigma u_s\right) x_i\right]\right\}^{1-y_i}
\end{aligned}
$$

400

In [71]:
#set the number of simulations
S = 10

#Define the Random Parameter Mixed Simulated Likelihood Function.
def mslf(S, β_0, β_1, σ, Y, X):
  #Set up the major variables that will be used to created a likelihood  
    Y = Y
    X = X
    
  #set of parameters we are hoping to find the MSL estimates of.
    β_0 = β_0
    β_1 = β_1  
    σ = σ   
    
    sim_avg_f=0

  #Simulation
    vector_subsim = []
    vector_sim = []
    #Establish the seed
    np.random.seed(1234)
    ##Take a random sample for u_i from Hu(u_i) (Cumulative distribution of u_i that is assumed normal)
    u_s = np.random.normal(loc=0, scale=1, size=n)
    #Let's create the loop to generate the mslf
    np.seterr(divide = 'ignore')
    for i in range(n):
        for j in range(S):
            θ = np.dot((β_0 + (β_1 + σ*u_s)),X)
            Λ = 1/(1+ np.exp(-θ))
            subsim = Y*np.log(Λ)+(1-Y)*np.log(1-Λ)  
            vector_subsim.append(subsim)
        sl = sum(vector_subsim)/S
        vector_sim.append(sl)
        
    print(vector_sim)
    


In [72]:
mslf(S, 3, 2, 1, y, x_1)

[0     -inf
1      NaN
2      NaN
3      NaN
4     -inf
      ... 
395   -inf
396   -inf
397   -inf
398   -inf
399   -inf
Name: admit, Length: 400, dtype: float32, 0     -inf
1      NaN
2      NaN
3      NaN
4     -inf
      ... 
395   -inf
396   -inf
397   -inf
398   -inf
399   -inf
Name: admit, Length: 400, dtype: float32, 0     -inf
1      NaN
2      NaN
3      NaN
4     -inf
      ... 
395   -inf
396   -inf
397   -inf
398   -inf
399   -inf
Name: admit, Length: 400, dtype: float32, 0     -inf
1      NaN
2      NaN
3      NaN
4     -inf
      ... 
395   -inf
396   -inf
397   -inf
398   -inf
399   -inf
Name: admit, Length: 400, dtype: float32, 0     -inf
1      NaN
2      NaN
3      NaN
4     -inf
      ... 
395   -inf
396   -inf
397   -inf
398   -inf
399   -inf
Name: admit, Length: 400, dtype: float32, 0     -inf
1      NaN
2      NaN
3      NaN
4     -inf
      ... 
395   -inf
396   -inf
397   -inf
398   -inf
399   -inf
Name: admit, Length: 400, dtype: float32, 0     -inf
1      NaN

0.52497918747894

0.52497918747894

In [65]:
Λ = np.exp(0.1)/(1+ np.exp(0.1))
A = 1/(1+ np.exp(-0.1))

In [None]:

    

mslf <- function(param){
  #Set up the major variables that will be used to created a likelihood
  choice<-Data$preference_liking
  endowment<-Data$InitialGood_Stage1
  
  #set of parameters we are hoping to find the MSL estimates of.
  lambda_m<-param[1]
  u1<-param[2]
  u2<-param[3]
  u3<-param[4]
  u4<-param[5]
  delt<-param[6]
  sd<-exp(param[7])
  

  sim_avg_f=0
  
  set.seed(10101)
  
  #create the for loop over which we generate the simulated likelihood function
  for(i in 1:num_draws){
    #first, generate a set of random normal variables for each individual
    #This will represent the underlying (unobserved) heterogeneity in our random parameter model.
    unobserved_noise<-rnorm(nrow(Data), 0, 1)
    
    #Draw lambda value for an individual, sampling from the mean value (lambda_temp) with noise e*sd.
    lambda<-lambda_m+unobserved_noise*sd
    
    #Given individual context, generate the KR structural utilities.
    #Good a represents the endowment, so we compute U(a|a).
    kr_utils_good_a=u1*(endowment==1)+u2*(endowment==2)+u3*(endowment==3)+u4*(endowment==4)
    #Good b represents the alternative good, so we compute U(b|a)
    kr_utils_good_b=(2*u2-lambda*u1)*(endowment==1)+(2*u1-lambda*u2)*(endowment==2)+
                    (2*u4-lambda*u3)*(endowment==3)+(2*u3-lambda*u4)*(endowment==4)
    
    #Construct the likelihood at the given draw
    sim_f=(exp(kr_utils_good_a)/(exp(kr_utils_good_a)+exp(kr_utils_good_b+delt)))*(choice==1)+
          (exp(kr_utils_good_b)/(exp(kr_utils_good_b)+exp(kr_utils_good_a+delt)))*(choice==-1)+
        (1- (exp(kr_utils_good_b)/(exp(kr_utils_good_b)+exp(kr_utils_good_a+delt)))-
           (exp(kr_utils_good_a)/(exp(kr_utils_good_a)+exp(kr_utils_good_b+delt))) )*(choice==0)
        
    sim_avg_f = sim_avg_f + sim_f/num_draws

  }
  log(sim_avg_f)
} 





