In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math

import statsmodels.api as sm
from statsmodels.sandbox.regression.gmm import GMM
from statsmodels.base.model import GenericLikelihoodModel

from scipy.stats import norm
from scipy.stats import multivariate_normal

In [24]:
#load data into memory
data = pd.DataFrame(data = np.genfromtxt('sim3.dat', delimiter='  '), columns=['i','t','y_t','p_t'])

#set up lag
shift_data = data[['i','y_t','p_t']]
shift_data['t'] = data['t'] + 1
data = data.merge(shift_data,how='left',on=['i','t'],suffixes=['','-1'])
data = data.fillna(0) # the initial period is 0

# Part a


Because of state dependence, $U_{it}$ is deterministic when we know the values of $p_{it}, y_{it-1}$, and $\alpha_i$ (at least according to our model). As a result

$$Pr(\textbf{1}(U_{it} >0)| p_{it}, y_{it-1}) \perp Pr(\textbf{1}(U_{it+1} >0)| p_{it+1}, y_{it}) $$ so,

Since $y_{it} = \textbf{1}(U_{it} >0)$

$$Pr(y_{it}| p_{it}, y_{it-1}, \alpha_i) \perp Pr( y_{it+1}| p_{it+1}, y_{it}, \alpha_i)$$


Thus we can write 

$$\int Pr( y_{i1},... y_{iT} | p_{i1}, ... ,p_{it}, y_{i0} , \alpha_i; \theta ) d \alpha'_i = $$

$$\int Pr( \textbf{1}(U_{i1} >0), ... , \textbf{1}(U_{iT} >0) | p_{i1}, ... ,p_{it}, y_{i0}, .... y_{i,t-1}, , \alpha_i ; \theta ) d \alpha'_i = $$

$$\int Pr(\textbf{1}(U_{i1} >0)| p_{i1}, y_{i0}, \alpha_i) ... Pr(\textbf{1}(U_{iT} >0)| p_{iT}, y_{iT-1}, , \alpha_i)  d \alpha'_i = $$



$$\int Pr(y_{i1} | p_{i1}, y_{i0}, \alpha_i) ... Pr(y_{iT} | p_{iT}, y_{iT-1}, , \alpha_i)  d \alpha'_i $$

# Part b

It would not be true without integrating out $\alpha_i$.  Now the $\epsilon_{it}$ may be correlated over time due to individual heterogeneity. The terms inside the product are no longer independent of each other.

# Part c

$$Pr(y_{it} | \alpha_i, y_{it-1}, p_{it}) =Pr(\textbf{1}(U_{it} >0)| p_{it}, y_{it-1})$$ according to our model


When $y_{it} = 1$ we know that $U_{it} >0$, so 

$$Pr(\textbf{1}(U_{it} >0)| p_{it}, y_{it-1}) = \dfrac{e^{\theta_0 + \theta_1 p_{it} + \theta_2 y_{it-1} + \sigma_\alpha \alpha_i } }{ 1 + e^{\theta_0 + \theta_1 p_{it} + \theta_2 y_{it-1} + \sigma_\alpha \alpha_i}}$$

When $y_{it} = 0 $ we know that $U_{it} <0$ so  

$$ Pr(\textbf{1}(U_{it} >0)| p_{it}, y_{it-1}) = \dfrac{1 }{ 1 + e^{\theta_0 + \theta_1 p_{it} + \theta_2 y_{it-1} + \sigma_\alpha \alpha_i}}$$

So,

$$Pr(y_{it} | \alpha_i, y_{it-1}, p_{it}) = y_{it} \dfrac{e^{\theta_0 + \theta_1 p_{it} + \theta_2 y_{it-1} + \sigma_\alpha \alpha_i } }{ 1 + e^{\theta_0 + \theta_1 p_{it} + \theta_2 y_{it-1} + \sigma_\alpha \alpha_i}} + (1-y_{it})  \dfrac{1 }{ 1 + e^{\theta_0 + \theta_1 p_{it} + \theta_2 y_{it-1} + \sigma_\alpha \alpha_i}}$$

# Part d

In [21]:
#set up useful global variables

NSIM = 1000
T = int(data.groupby('i').count().max()['t'])
I = len(data.i.unique())

alpha = np.random.normal(0,1 ,(NSIM, I))
alpha = alpha.reshape( (1, NSIM, I) )
alpha = np.tile(alpha, (T, 1,1))

In [22]:
def shape_data(x):
    """ format data as a 3d array to make working with it easier"""
    x = np.array([x])
    x = x.reshape(I,1,T)
    x = np.tile(x ,(1,NSIM,1)).transpose() 
    return x

print shape_data(data['y_t']).shape

(20, 1000, 100)


In [27]:
class part_d(GenericLikelihoodModel):
    """class for evaluating question 3 part d"""
    
    def __init__(self, sims, *args, **kwds):
        # set appropriate counts for moment conditions and parameters
        super(part_d, self).__init__(*args, **kwds)
        self.sims = sims
    
    
    def nloglikeobs(self, params, v=False):
        t0, t1, t2, sigma = params
        y = shape_data(self.endog)
        
        p = shape_data(self.exog.transpose()[0])
        y_lag = shape_data(self.exog.transpose()[1])
        alpha = self.sims
        
        #calculate the mean 'delta' for the inside good
        U1 = np.exp(t0 + t1*p + t2*y_lag + sigma*alpha)
     
        #calculate ll, for each simulation
        like =  y*U1/(1+U1) + (1-y)/(1+U1)
        like =  1./NSIM * (like.prod(axis=0)).sum(axis=0)
        like = np.log(like).sum(axis = 0)

        return - like

    
    def fit(self, start_params=None, maxiter=1000, maxfun=5000, **kwds):
        if start_params == None:
            start_params = [-.798,.5,.5,.5]
        return super(part_d, self).fit(start_params=start_params,
                                       maxiter=maxiter, maxfun=maxfun, **kwds)
    

model_d = part_d(alpha, data['y_t'] ,data[['p_t','y_t-1']])

result_d = model_d.fit()
print(result_d.summary(xname=['theta_0', 'theta_1', 'theta_2', 'sigma']))

Optimization terminated successfully.
         Current function value: 0.620078
         Iterations: 146
         Function evaluations: 256
                                part_d Results                                
Dep. Variable:                    y_t   Log-Likelihood:                -1240.2
Model:                         part_d   AIC:                             2484.
Method:            Maximum Likelihood   BIC:                             2496.
Date:                Wed, 07 Nov 2018                                         
Time:                        17:28:57                                         
No. Observations:                2000                                         
Df Residuals:                    1998                                         
Df Model:                           1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------

# Part e

The coefficient on $\theta_2$ and $\sigma_\alpha$ are both significant. However, the coefficient on $\sigma_\alpha$ is larger than $\theta_2$. As a result, individual heterogeniety may be more important in explaining the correlation acrros time

# Part f

By re-estimating the model (below) the coefficient on $\theta_2$ increases. This is individual heterogenity is now an omitted variable which is correlated with the preivous state. As a result, excluding the heterogeniety causes an upward bias.

In [25]:
class part_f(GenericLikelihoodModel):
    """class for evaluating question 3 part f"""
    
    def nloglikeobs(self, params, v=False):
        t0, t1, t2 = params
        y = self.endog
        p,y_lag = self.exog.transpose()
    
        #calculate the mean 'delta' for the inside good
        U1 = t0 + t1*p + t2*y_lag
        U1 = np.exp(U1)
        
        #calculate ll, for each simulation
        likelihood_sims = np.log(y*U1/(1+U1) + (1-y)/(1+U1))
        likelihood = likelihood_sims.sum(axis=0)
        
        if v: raise Exception('Stop drop and roll')
        return - likelihood.sum()

    
    def fit(self, start_params=None, maxiter=1000, maxfun=5000, **kwds):
        if start_params == None:
            start_params = [-.948,.5,.5]
        return super(part_f, self).fit(start_params=start_params,
                                       maxiter=maxiter, maxfun=maxfun, **kwds)
    

model_f = part_f(data['y_t'],data[['p_t','y_t-1']])

result_f = model_f.fit()
print(result_f.summary(xname=['theta_0', 'theta_1', 'theta_2']))

Optimization terminated successfully.
         Current function value: 0.646619
         Iterations: 94
         Function evaluations: 164
                                part_f Results                                
Dep. Variable:                    y_t   Log-Likelihood:                -1293.2
Model:                         part_f   AIC:                             2590.
Method:            Maximum Likelihood   BIC:                             2602.
Date:                Wed, 07 Nov 2018                                         
Time:                        17:16:39                                         
No. Observations:                2000                                         
Df Residuals:                    1998                                         
Df Model:                           1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------

# Part g

A crude way to test the null hypothesis would be using the following linear probability model

$$y_{it} = \theta_0 + \theta_1 p_{it} + \theta_2 y_{it-1} + \alpha_i  + \epsilon_{it}$$

Here $\alpha_i$ is a fixed effect to capture the individual specific heterogenity.

We could run a t-test on $\theta_2$ to test for state dependence.

The exlcusion restriction would be that $\alpha_i$ and $y_{it-1}$ are uncorrelated. If $\alpha_i$ determined $y_{it-1}$ then the coefficient would be biased upward