In [1]:
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM
from statsmodels.base.model import GenericLikelihoodModel

In [6]:
data = pd.read_csv('data.csv')
print data.max()
print '\n'

#pre-processing to calculate outside good shares
shares = data[['Market_ID','Inside Good Share','PPO']].copy()
shares['PPO Share'] = data['Inside Good Share'] * data['PPO']
shares['HMO Share'] = data['Inside Good Share'] * 1 - data['PPO']

group_shares = shares.groupby('Market_ID').sum()

group_shares['Nest Market Size'] = group_shares[['PPO Share','HMO Share']].max(axis=1)
group_shares['Outside Good Share'] = 1 - group_shares['Inside Good Share']

data = pd.merge(data,group_shares[['Outside Good Share','Nest Market Size']], 
                right_index=True, left_on = 'Market_ID')

print data.mean()

#calculate ln(Inside Good Share)
data['ln(Inside Good Share)'] = np.log( data['Inside Good Share']/data['Nest Market Size'] ) 

Market_ID             600.000000
Plan_ID                16.000000
PPO                     1.000000
Network Score           0.920000
Satisfaction Score      0.925000
Premium                 3.090915
Inside Good Share       0.355610
dtype: float64


Market_ID             353.530303
Plan_ID                 8.570606
PPO                     0.494545
Network Score           0.871382
Satisfaction Score      0.842170
Premium                 2.465963
Inside Good Share       0.155441
Outside Good Share      0.134506
Nest Market Size        0.521274
dtype: float64


In [8]:
#calculate hausmann insturments
mkt_dum = pd.get_dummies(data['Market_ID'],prefix='mkt',drop_first=True)
plan_dum = pd.get_dummies(data['Plan_ID'],prefix='plan',drop_first=True)
hausman_instr = pd.concat([mkt_dum,plan_dum],axis=1)


#hausman_model = sm.OLS(data['Premium'],x_hausman)
#hausman_result = hausman_model.fit()
#data['Hausman'] = hausman_result.fittedvalues

In [12]:
#set up data for logit
y = data[['Inside Good Share','Outside Good Share']]
x =  data[['Network Score','Satisfaction Score','Premium','PPO']]

# add ln(inside good share) as regressor like formula
x_nested = data[['Network Score','Satisfaction Score','Premium','PPO','ln(Inside Good Share)']]

In [10]:
#first estimate using logit
class logit(GMM):
    
    def __init__(self, *args, **kwds):
        # set appropriate counts for moment conditions and parameters
        super(logit, self).__init__(*args, **kwds)

        
    def momcond(self, params):
        #unwrap stuff
        shares = np.array(self.endog).transpose()
        exog = np.array(self.exog)
        instr = np.array(self.instrument)
        
        lshare = np.log(shares[0]) -  np.log(shares[1])
        lshare = lshare.transpose()
       
        lshare_fit = np.matmul(exog,params) #linear equation    
        
        xi = lshare_fit - lshare
        g = instr * xi[:, np.newaxis]
        
        return g 

    
#set up initial est
beta_init = np.full(len(x.columns),1)

#set up model
model1 = logit(y , x, hausman_instr)

result1 = model1.fit(beta_init, maxiter=2, optim_method='nm', wargs=dict(centered=False))
print(result1.summary())

Optimization terminated successfully.
         Current function value: 0.000042
         Iterations: 290
         Function evaluations: 501
Optimization terminated successfully.
         Current function value: 0.598377
         Iterations: 187
         Function evaluations: 318
                                             logit Results                                             
Dep. Variable:     ['Inside Good Share', 'Outside Good Share']   Hansen J:                        1975.
Model:                                                   logit   Prob (Hansen J):             1.90e-143
Method:                                                    GMM                                         
Date:                                         Wed, 03 Oct 2018                                         
Time:                                                 19:58:54                                         
No. Observations:                                         3300                                  

In [13]:
#set up and run model
beta_nested = np.full(len(x_nested.columns),1)
model2 = logit(y , x_nested, hausman_instr)
result2 = model2.fit(beta_nested, maxiter=2, optim_method='nm', wargs=dict(centered=False))

print(result2.summary())

Optimization terminated successfully.
         Current function value: 0.000010
         Iterations: 607
         Function evaluations: 979
Optimization terminated successfully.
         Current function value: 0.513649
         Iterations: 275
         Function evaluations: 452
                                             logit Results                                             
Dep. Variable:     ['Inside Good Share', 'Outside Good Share']   Hansen J:                        1695.
Model:                                                   logit   Prob (Hansen J):             4.42e-103
Method:                                                    GMM                                         
Date:                                         Wed, 03 Oct 2018                                         
Time:                                                 19:59:57                                         
No. Observations:                                         3300                                  