In [2]:
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM
from statsmodels.base.model import GenericLikelihoodModel

In [72]:
data = pd.read_csv('data.csv')
print data.mean()

#pre-processing to calculate outside good shares
shares = data[['Market_ID','Inside Good Share']]
group_shares = shares.groupby('Market_ID').sum()
group_shares['Outside Good Share'] = 1 - group_shares['Inside Good Share']
data = pd.merge(data,group_shares[['Outside Good Share']], right_index=True, left_on = 'Market_ID')
print data.mean()

#calculate hausmann insturments
data['Hausman'] = 1

Market_ID             353.530303
Plan_ID                 8.570606
PPO                     0.494545
Network Score           0.871382
Satisfaction Score      0.842170
Premium                 2.465963
Inside Good Share       0.155441
dtype: float64
Market_ID             353.530303
Plan_ID                 8.570606
PPO                     0.494545
Network Score           0.871382
Satisfaction Score      0.842170
Premium                 2.465963
Inside Good Share       0.155441
Outside Good Share      0.134506
dtype: float64


In [75]:
#first estimate using logit
class logit(GMM):
    
    def __init__(self, *args, **kwds):
        # set appropriate counts for moment conditions and parameters
        super(logit, self).__init__(*args, **kwds)

        
    def momcond(self, params):
        #unwrap stuff
        shares = np.array(self.endog).transpose()
        exog = np.array(self.exog)
        instr = np.array(self.instrument)
        
        lshare = np.log(shares[0]) -  np.log(shares[1])
        lshare = lshare.transpose()
       
        lshare_fit = np.matmul(exog,params) #linear equation    
        
        xi = lshare_fit - lshare
        g = instr * xi[:, np.newaxis]
        
        return g 

    
#set up data
y = data[['Inside Good Share','Outside Good Share']]
x =  data[['Network Score','Satisfaction Score','Premium']]
z = data[['Network Score','Satisfaction Score','Hausman']]

#set up
beta_init = np.full(len(x.columns),1)

#set up model
model1 = logit(y , x, z)

result1 = model1.fit(beta_init, maxiter=2, optim_method='nm', wargs=dict(centered=False))
print(result1.summary())

Optimization terminated successfully.
         Current function value: 0.000010
         Iterations: 49
         Function evaluations: 98
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 93
         Function evaluations: 170
                                             logit Results                                             
Dep. Variable:     ['Inside Good Share', 'Outside Good Share']   Hansen J:                    1.321e-07
Model:                                                   logit   Prob (Hansen J):                   nan
Method:                                                    GMM                                         
Date:                                         Mon, 01 Oct 2018                                         
Time:                                                 19:52:58                                         
No. Observations:                                         3300                                     

In [27]:
class logit(GMM):
    def __init__(self, *args, **kwds):
        # set appropriate counts for moment conditions and parameters
        super(logit, self).__init__(*args, **kwds)

        
    def momcond(self, params):
        #unwrap stuff
        shares = self.endog
        exog = self.exog
        #instr = self.instrument

        lshare = np.log(shares['Inside Good Share']) -  np.log(shares['Outside Good Share']) #my market share 
        lshare_fit = np.matmul(exog,params) #linear equation    
        
        xi = lshare_fit - lshare
        g = exog * xi[:, np.newaxis]
        return g 