In [49]:
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM
from statsmodels.base.model import GenericLikelihoodModel

In [50]:
data = pd.read_csv('data.csv')

#pre-processing to calculate outside good shares
shares = data[['Market_ID','Inside Good Share','PPO']].copy()
shares['PPO Share'] = data['Inside Good Share'] * data['PPO']
shares['HMO Share'] = data['Inside Good Share'] * (1 - data['PPO'])

group_shares = shares.groupby('Market_ID').sum()
group_shares['Outside Good Share'] = 1 - group_shares['Inside Good Share']

data = pd.merge(data,group_shares[['Outside Good Share','PPO Share','HMO Share']], 
                right_index=True, left_on = 'Market_ID')

data['Nest Market Size'] = ( (data['Inside Good Share'] * data['PPO']) / data['PPO Share'] 
                            + (data['Inside Good Share'] * (1 - data['PPO'])) /data['HMO Share'])

#calculate ln(Inside Good Share)
data['ln(Inside Good Share)'] = np.log( data['Inside Good Share']/data['Nest Market Size'] ) 
data = data.fillna(0) #necessary for log(1), unsure why

print data.min()

Market_ID                1.000000
Plan_ID                  1.000000
PPO                      0.000000
Network Score            0.780000
Satisfaction Score       0.725000
Premium                  2.269728
Inside Good Share        0.037670
Outside Good Share       0.096070
PPO Share                0.000000
HMO Share                0.000000
Nest Market Size         0.000000
ln(Inside Good Share)   -2.428602
dtype: float64


In [51]:
#calculate hausmann insturments
mkt_dum = pd.get_dummies(data['Market_ID'],prefix='mkt',drop_first=True)
plan_dum = pd.get_dummies(data['Plan_ID'],prefix='plan',drop_first=True)
#hausman_instr = pd.concat([mkt_dum,plan_dum],axis=1)
hausman_instr = plan_dum

In [52]:
#set up data for logit
y = data[['Inside Good Share','Outside Good Share']]
x =  data[['Network Score','Satisfaction Score','PPO','Premium']]

# add ln(inside good share) as regressor like formula
x_nested = data[['Network Score','Satisfaction Score','PPO','Premium','ln(Inside Good Share)']]

In [53]:
#first estimate using logit
class logit(GMM):
    
    def __init__(self, *args, **kwds):
        # set appropriate counts for moment conditions and parameters
        super(logit, self).__init__(*args, **kwds)

        
    def momcond(self, params):
        #unwrap stuff
        shares = np.array(self.endog).transpose()
        exog = np.array(self.exog)
        instr = np.array(self.instrument)
        
        lshare = np.log(shares[0]) -  np.log(shares[1])
        lshare = lshare.transpose()
       
        lshare_fit = np.matmul(exog,params) #linear equation    
        
        xi = lshare_fit - lshare
        g = instr * xi[:, np.newaxis]
        
        return g 

    
#set up initial est
beta_init = np.full(len(x.columns),1)

#set up model
model1 = logit(y , x, hausman_instr)

result1 = model1.fit(beta_init, maxiter=2, optim_method='nm', wargs=dict(centered=False))
print(result1.summary())

Optimization terminated successfully.
         Current function value: 0.000008
         Iterations: 292
         Function evaluations: 497
Optimization terminated successfully.
         Current function value: 0.005149
         Iterations: 139
         Function evaluations: 249
                                             logit Results                                             
Dep. Variable:     ['Inside Good Share', 'Outside Good Share']   Hansen J:                        16.99
Model:                                                   logit   Prob (Hansen J):                 0.108
Method:                                                    GMM                                         
Date:                                         Tue, 09 Oct 2018                                         
Time:                                                 16:18:07                                         
No. Observations:                                         3300                                  

In [54]:
#set up and run model
beta_nested = np.full(len(x_nested.columns),1)
model2 = logit(y , x_nested, hausman_instr)
result2 = model2.fit(beta_nested, maxiter=2, optim_method='nm', wargs=dict(centered=False))

print(result2.summary())

Optimization terminated successfully.
         Current function value: 0.000008
         Iterations: 524
         Function evaluations: 850
Optimization terminated successfully.
         Current function value: 0.004920
         Iterations: 153
         Function evaluations: 267
                                             logit Results                                             
Dep. Variable:     ['Inside Good Share', 'Outside Good Share']   Hansen J:                        16.24
Model:                                                   logit   Prob (Hansen J):                0.0931
Method:                                                    GMM                                         
Date:                                         Tue, 09 Oct 2018                                         
Time:                                                 16:18:07                                         
No. Observations:                                         3300                                  

In [67]:
#compute exp(delta_j)
delta = data.copy()[['Plan_ID','Market_ID','PPO','Premium']]
x = np.array(x)
beta1 = result1.params
alpha1 = abs(beta1[3])
delta['exp_delta'] = np.exp(np.matmul(x,beta1))

#compute 1 + sum_j exp(delta_j)
sum_delta = delta.groupby('Market_ID').sum()
sum_delta['sum_exp_delta'] = 1 + sum_delta['exp_delta'] 

delta = pd.merge(delta, sum_delta[['sum_exp_delta']], 
                right_index=True, left_on = 'Market_ID')

#compute s_j
delta['fitted_share'] = delta['exp_delta']/delta['sum_exp_delta']

print delta.groupby('Market_ID').sum().mean()

print delta['fitted_share'].max(), delta['fitted_share'].min(), delta['fitted_share'].mean(), np.sqrt(delta['fitted_share'].var())

Plan_ID          47.138333
PPO               2.720000
Premium          13.562795
exp_delta         6.297526
sum_exp_delta    44.228315
fitted_share      0.843098
dtype: float64
0.3211705667781474 0.04175067681732296 0.153290506773 0.0527060652944428


In [39]:
#agregate elasticities
delta = delta.groupby('Plan_ID').mean()

shares = np.array(delta['fitted_share'])
price = np.array(delta['Premium'])

own_price = np.identity(len(shares))
cross_price = 1 - own_price

cross_elasticity = shares * alpha1 * price
own_elasticity  = -(1-shares) * alpha1 * price

elasticity =  cross_price*cross_elasticity + own_price *own_elasticity

print elasticity[1][1],elasticity[2][1],elasticity[3][1] #second index tells you wrt what good i.e. denom

-4.308596601690402 0.5554552982100271 0.5554552982100271


In [40]:
#compute exp(delta_j)
delta_nest = data.copy()[['Market_ID','Plan_ID','PPO','Premium']]
beta2 = result2.params[:-1]
alpha2 = abs(beta2[3])
sigma = abs(result2.params[-1]) #not sure about this, feel like it should be positive?
delta_nest['exp_delta'] = np.exp(np.matmul(x,beta2)/(1-sigma))

#compute Dg = sum_j|g exp(delta_j)
s1 = delta_nest[['Market_ID','exp_delta','PPO']].copy()
s1['PPO_delta'] = delta_nest['exp_delta'] * delta_nest['PPO']
s1['HMO_delta'] = delta_nest['exp_delta'] * (1 - delta_nest['PPO'])

group_s1 = s1.groupby('Market_ID').sum()
group_s1['PP0_delta_sigma'] = group_s1['PPO_delta']**(sigma)
group_s1['HMO_delta_sigma'] = group_s1['HMO_delta']**(sigma)
group_s1['sum_g'] = group_s1['PPO_delta']**(1-sigma) + group_s1['HMO_delta']**(1-sigma)

delta_nest = pd.merge(delta_nest,group_s1[['PPO_delta','HMO_delta','PP0_delta_sigma','sum_g']], 
                right_index=True, left_on = 'Market_ID')

#compute sum_g Dg^(1-sigma)
delta_nest['denom'] = ( (1 - delta_nest['PPO'])*delta_nest['HMO_delta']  +
                 delta_nest['PPO']*delta_nest['PPO_delta']) * (delta_nest['sum_g'])

delta_nest['fitted_share'] = delta_nest['exp_delta']/delta_nest['denom']

print delta_nest['fitted_share'].max(),delta_nest['fitted_share'].min(),delta_nest['fitted_share'].mean(), np.sqrt(delta_nest['fitted_share'].var())

0.5186716445245331 0.01170248845783048 0.0732291327029 0.07212266746307026


In [41]:
#fitted nest share
fitted_shares = delta_nest[['Market_ID','fitted_share','PPO']].copy()
fitted_shares['fitted_ppo'] = delta_nest['fitted_share'] * delta_nest['PPO']
fitted_shares['fitted_hmo'] = delta_nest['fitted_share'] * (1 - delta_nest['PPO'])

group_s2 = fitted_shares.groupby('Market_ID').sum()
delta_nest = pd.merge(delta_nest,group_s2[['fitted_ppo','fitted_hmo']], 
                right_index=True, left_on = 'Market_ID')

delta_nest['fitted_nest_share'] = ( (delta_nest['fitted_share'] * delta_nest['PPO']) / delta_nest['fitted_ppo'] 
                            + (delta_nest['fitted_share'] * (1 - delta_nest['PPO'])) /delta_nest['fitted_hmo'])

In [42]:
#aggregate elasticities
delta_nest = delta_nest.groupby('Plan_ID').mean()

#diagonal formula
nest_shares = np.array(delta_nest['fitted_nest_share'])
sharesn = np.array(delta_nest['fitted_share'])
prices = np.array(delta_nest['Premium'])
ppo = np.array([delta_nest['PPO']]) #this one is a matrix

#selector matrices
inside_nest = np.matmul(ppo.transpose(),ppo) + np.matmul((1-ppo.transpose()),(1-ppo))
outside_nest = 1 - inside_nest 
own_price = np.identity(len(sharesn))

#elasticity variables
inside_elasticity = np.matmul( np.array( [ (sigma/(1-sigma) * nest_shares + sharesn)/sharesn ] ).transpose(),
          np.array([alpha2*price*sharesn]))

own_elasticity = -alpha2*((1/(1-sigma)) - sigma/(1-sigma) * nest_shares - sharesn)*prices

outside_elasticity =  sharesn * alpha2 * prices

nest_elasticity = own_price*own_elasticity + inside_nest*inside_elasticity + outside_nest*outside_elasticity


print outside_nest
print nest_elasticity[1][1]
print nest_elasticity[0][1],nest_elasticity[2][1] #should be the same (outside nest)
print nest_elasticity[4][1],nest_elasticity[5][1] #should be different

[[0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1.]
 [1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0.]
 [0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1.]
 [0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1.]
 [1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0.]
 [1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0.]
 [0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1.]
 [1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0.]
 [0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1.]
 [1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0.]
 [1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0.]
 [0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1.]
 [0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1.]
 [1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0.]
 [0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1.]
 [1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0.]]
-4.381805375785701
0.33923356160990165 0.33923356160990165
0.4327571052033485 0.4321921035146931


In [43]:
print (shares - sharesn).mean()

0.08048610885751606


In [45]:
#solve for marginal costs
p_divide_q = (1/shares * np.ones( (len(shares), len(shares)) )).transpose() * price

print p_divide_q[0][1], prices[1]/shares[0]

inv_derivative = np.linalg.inv( elasticity * p_divide_q)

mc = np.array([prices]).transpose() - np.matmul(inv_derivative, np.array([shares]).transpose()) 

mc = mc.transpose()[0]

print mc

12.410684346595144 12.410684346595144
[2.59083602 2.35673823 2.60768873 2.52299844 2.33739461 2.35360695
 2.59488989 2.40818186 2.52999745 2.33440917 2.32182747 2.59993908
 2.51966688 2.40866557 2.59619547 2.36380635]


In [75]:
#simplified version to get things working

def profits(p):
    p[p < 0] = 0 #set things if necessary?
    xes =  data[['Plan_ID','Network Score','Satisfaction Score','PPO']].groupby('Plan_ID').mean()
    constant = np.exp(np.matmul(xes,beta1[:-1]))
    delta_js = constant*np.exp(alpha1*p)
    denom = 1+sum(delta_js)
    s_js = delta_js/denom
    return - np.matmul(s_js, np.array([prices-mc+.25]).transpose())

prices = np.array(delta_nest['Premium'])

sum(profits(prices))

-0.2511224321308708

In [76]:
import numpy as np
from scipy.optimize import minimize

res = minimize(profits, prices, method='nelder-mead',
     options={'xtol': 1e-8, 'disp': True})

print(res.x) #doesn't seem right

Optimization terminated successfully.
         Current function value: -0.252226
         Iterations: 285
         Function evaluations: 853
[1.00271692e-02 9.75547188e-02 1.11861770e-01 4.75887662e+00
 1.24817262e+01 7.04289160e+00 3.91753341e+00 8.20092864e+00
 9.78134182e+00 1.68702280e+01 3.86840732e-01 3.28303747e+00
 2.09218577e+01 3.69849983e+01 4.98921183e-02 1.37640899e+01]


In [None]:
#profits per enrollee
#outside good shares

In [None]:
#consumer surplus