In [1]:
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM
from statsmodels.base.model import GenericLikelihoodModel

In [2]:
def comp_outside_good(data,name):
    """pre-processing to calculate outside good shares"""
    shares = data[['Market_ID',name,'PPO']].copy()

    group_shares = shares.groupby('Market_ID').sum()
    group_shares['Outside Good Share'] = 1 - group_shares[name]

    data = pd.merge(data,group_shares[['Outside Good Share']], 
                right_index=True, left_on = 'Market_ID')
    return data


def comp_nest_shares(x,name):
    """calculate shares within nest"""
    nest_x = x[['Market_ID',name,'PPO']].copy()
    nest_x['ppo_share'] = nest_x[name] * nest_x['PPO']
    nest_x['hmo_share'] = nest_x[name] * (1 - nest_x['PPO'])
    
    group_shares = nest_x.groupby('Market_ID').sum()
    
    x = pd.merge(x, group_shares[['hmo_share','ppo_share']], right_index=True,
                 left_on = 'Market_ID')
    
    x['nest_size'] =   x['PPO'] * x['ppo_share'] + (1 - x['PPO']) * x['hmo_share']
    x = x.drop(labels=['ppo_share','hmo_share'],axis=1)
    return x


data = pd.read_csv('data.csv')
data = comp_outside_good(data,'Inside Good Share')
data = comp_nest_shares(data,'Inside Good Share')
data['ln(Within Nest Share)'] = np.log( data['Inside Good Share']/data['nest_size'] )

In [3]:
#first estimate using logit
class logit(GMM):
    
    def __init__(self, *args, **kwds):
        # set appropriate counts for moment conditions and parameters
        super(logit, self).__init__(*args, **kwds)

        
    def momcond(self, params):
        #unwrap stuff
        shares = np.array(self.endog).transpose()
        exog = np.array(self.exog)
        instr = np.array(self.instrument)
        
        lshare = np.log(shares[0]) -  np.log(shares[1])
        lshare = lshare.transpose()
       
        lshare_fit = np.matmul(exog,params) #linear equation    
        
        xi = lshare_fit - lshare
        g = instr * xi[:, np.newaxis]
        
        return g

# Question 1 - Estimate the Model

In [4]:
#calculate hausmann insturments
plan_dum = pd.get_dummies(data['Plan_ID'], prefix='plan',drop_first=True)
mkt_dum = pd.get_dummies(data['Market_ID'], prefix='plan',drop_first=True)
#hausman_instr =  pd.concat( [data[['Network Score','Satisfaction Score','PPO']],
#                               mkt_dum],axis=1)

hausman_instr = plan_dum

#set up data for logit
y = data[['Inside Good Share','Outside Good Share']]

# add ln(inside good share) as regressor like formula
x_nested = data[['Network Score','Satisfaction Score','PPO',
                 'Premium','ln(Within Nest Share)']]

I use dummy variables for the plan as my instrument. When regressing these dummy variables on plan, my model has the form 

$$p_{jm} = p_j + \epsilon_{jm}$$ 

as is standard where $\epsilon_{jm}$ is a market specific shock for product $j$. I estimated a model with the other exogeneous characteristics (i.e. Network score, Satisfaction, PPO) in the instrument. However, the model coefficients where similar, so for simplicitiy I just use the dummy variables.


Additionally, I agrue these dummies are also valid instruments for nest share

$$ln(s_{jm|g}) = ln(s_{j|g}) + \epsilon_{jm}$$

It is plausible that average nested share accross markets will be exogenous from the share in any given market. This would true if roughly the same proportion of consumers prefer plan $j$ in each market (for its exogenous characteristics), but the plans available in each market are differ(as is the case)

In [5]:
#set up and run model
beta_nested = np.full(len(x_nested.columns),1)
model = logit(y , x_nested, hausman_instr)
result = model.fit(beta_nested, maxiter=2, optim_method='nm',
                   wargs=dict(centered=False))

print(result.summary())

Optimization terminated successfully.
         Current function value: 0.000006
         Iterations: 571
         Function evaluations: 928
Optimization terminated successfully.
         Current function value: 0.005680
         Iterations: 235
         Function evaluations: 380
                                             logit Results                                             
Dep. Variable:     ['Inside Good Share', 'Outside Good Share']   Hansen J:                        18.74
Model:                                                   logit   Prob (Hansen J):                0.0437
Method:                                                    GMM                                         
Date:                                         Sun, 14 Oct 2018                                         
Time:                                                 12:46:55                                         
No. Observations:                                         3300                                  

In [6]:
#calc unobservables
data['Xi'] = (np.log(y['Inside Good Share']) - np.log(y['Outside Good Share']) 
              - np.matmul(np.array(x_nested),result.params))

In [7]:
#compute market shares

def comp_shares(x,beta,sigma):
    x = x.copy()
    characs = np.array(x[['Network Score','Satisfaction Score','PPO','Premium']])
    x['exp_delta'] = np.exp(( np.matmul(characs,beta) +x['Xi'])/(1-sigma))
    
    #compute Dg = sum_j|g exp(delta_j)
    shares = x[['Market_ID','exp_delta','PPO']].copy()
    shares['PPO_delta'] = x['exp_delta'] * x['PPO']
    shares['HMO_delta'] = x['exp_delta'] * (1 - x['PPO'])
    
    group_shares =  shares.groupby('Market_ID').sum()
    group_shares['PP0_delta_sigma'] = group_shares['PPO_delta']**(sigma)
    group_shares['HMO_delta_sigma'] = group_shares['HMO_delta']**(sigma)
    group_shares['sum_g'] = (group_shares['PPO_delta']**(1-sigma) 
                             + group_shares['HMO_delta']**(1-sigma) + 1)
    
    x = pd.merge(x,group_shares[['PPO_delta','HMO_delta','PP0_delta_sigma','sum_g']], 
                right_index=True, left_on = 'Market_ID')

    #compute sum_g Dg^(1-sigma)
    x['denom'] = ( (1 - x['PPO'])*x['HMO_delta']**sigma + 
                  x['PPO']*x['PPO_delta']**sigma) * (x['sum_g'])
    x['fitted_share'] = x['exp_delta']/x['denom']
    return x[['Market_ID','Plan_ID','PPO','fitted_share']]

In [8]:
#initialize parameters and compute fitted shares
characs = data[['Market_ID','Plan_ID','Network Score',
                'Satisfaction Score','PPO','Premium','Xi']]
beta = result.params[:-1]
alpha = abs(beta[3])
sigma = abs(result.params[-1])

fitted_shares = comp_shares(characs,beta,sigma)

In [9]:
#compute total share of each nest
fitted_shares = comp_nest_shares(fitted_shares,'fitted_share')
fitted_shares['nest_shares'] = (fitted_shares['fitted_share']/
                                fitted_shares['nest_size'])

Characteristics of the fitted shares

In [10]:
print 'Mean: %s'%fitted_shares['fitted_share'].mean()
print 'Max: %s'%fitted_shares['fitted_share'].max()
print 'Min: %s'%fitted_shares['fitted_share'].min()

Mean: 0.155440963636
Max: 0.35561000000000004
Min: 0.03767


# Question 2 - Compute Elasticities, Markups

## Elasticities

Below are the formulas for the elasticities in the Nested logit

Own Price: $$-\alpha p_{jm} ( \dfrac{1}{1-\sigma} -  \dfrac{1}{1-\sigma}s_{jm|g} -s_{jm} ) $$
Cross Prices Same Nest (good $j$, price $k$): $$\alpha \dfrac{p_{km}}{s_{jm}} s_{km} (\dfrac{\sigma}{1-\sigma}s_{jm|g} + s_{jm} )$$
Cross Prices Different Nest (good $j$, price $k$): $$-\alpha s_{km} p_{km}$$

In [17]:
#aggregate elasticities
fitted_shares['Premium'] = data['Premium']

fitted_shares = fitted_shares.groupby('Plan_ID').mean()

#diagonal formula
nest_shares = np.array(fitted_shares['nest_shares'])
shares = np.array(fitted_shares['fitted_share'])
prices = np.array(fitted_shares['Premium'])
ppo = np.array([fitted_shares['PPO']]) #this one is a matrix

#selector matrices
own_price = np.identity(len(shares))
inside_nest = (np.matmul(ppo.transpose(),ppo) + 
               np.matmul((1-ppo.transpose()),(1-ppo)) - own_price)
outside_nest = (1 - inside_nest) - own_price


#elasticity variables
inside_elasticity = np.matmul( np.array( [ (sigma/(1-sigma)* nest_shares
                                            + shares)/shares ] ).transpose(),
          np.array([alpha*prices*shares]))

own_elasticity = -alpha*((1/(1-sigma)) - sigma/(1-sigma) * 
                         nest_shares - shares)*prices

outside_elasticity =  shares * alpha * prices

nest_elasticity = (own_price*own_elasticity + 
                   inside_nest*inside_elasticity + 
                   outside_nest*outside_elasticity)

Below are the elasticities from the nested logit. Note that the indexing starts at 0

In [18]:
print pd.DataFrame(nest_elasticity)

          0         1         2         3         4         5         6   \
0  -3.614758  0.289348  1.427780  1.066116  0.301170  0.388905  1.310656   
1   0.553277 -3.625294  0.596565  0.445453  0.907179  1.171454  0.547628   
2   1.326098  0.289348 -3.540520  1.067664  0.301170  0.388905  1.312559   
3   1.387476  0.289348  1.496032 -3.694212  0.301170  0.388905  1.373310   
4   0.553277  0.888736  0.596565  0.445453 -3.534876  1.194525  0.547628   
5   0.553277  0.843088  0.596565  0.445453  0.877533 -3.356309  0.547628   
6   1.335440  0.289348  1.439925  1.075185  0.301170  0.388905 -3.624992   
7   0.553277  0.871988  0.596565  0.445453  0.907614  1.172015  0.547628   
8   1.360937  0.289348  1.467417  1.095713  0.301170  0.388905  1.347042   
9   0.553277  0.910438  0.596565  0.445453  0.947634  1.223694  0.547628   
10  0.553277  0.875027  0.596565  0.445453  0.910777  1.176100  0.547628   
11  1.336768  0.289348  1.441356  1.076254  0.301170  0.388905  1.323119   
12  1.440995

We can see the elasticities fall relative to the simple logit within the nest, this is because you are restricted to buying within the nest. Conversely, the between nest elasticities go up.

## Markups

In [13]:
#solve for marginal costs
def comp_markup(shares,nest_shares):
    shares_vector = np.array([shares])
    
    #set up matrix    
    own_price = np.identity(len(shares))

    #caclulate formula
    own_deriv  =  -alpha*((1/(1-sigma)) - sigma/(1-sigma) * nest_shares
                          - shares)*shares
    
    derivative = own_price *own_deriv
    #take inverse and calc markup
    inv_derivative = np.linalg.inv(derivative)

    markup = - np.matmul(inv_derivative, shares_vector.transpose()) 
    return markup.transpose()[0]



fitted_shares =  comp_shares(characs,beta,sigma)
fitted_shares = comp_nest_shares(fitted_shares,'fitted_share')
fitted_shares['nest_shares'] = fitted_shares['fitted_share']/fitted_shares['nest_size']

nest_shares = np.array(fitted_shares['nest_shares']).squeeze()
shares = np.array(fitted_shares['fitted_share']).squeeze()
prices = np.array(data['Premium']).squeeze()

data['Markup'] = comp_markup(shares,nest_shares)
print data[['Plan_ID','Markup']].groupby('Plan_ID').mean()

           Markup
Plan_ID          
1        0.731614
2        0.660008
3        0.755827
4        0.699635
5        0.675452
6        0.717732
7        0.735489
8        0.729162
9        0.696208
10       0.639947
11       0.682427
12       0.730343
13       0.695612
14       0.699386
15       0.761726
16       0.698719


Overall the markups are higher than with the pure logit model. This is because there is less competition within the nest.

In [16]:
mean_markup = data[['Market_ID','Markup']].groupby('Market_ID').mean()
no_firms = data[['Market_ID','Plan_ID']].groupby('Market_ID').count()


model_q2 = sm.OLS(mean_markup,sm.add_constant(no_firms))
result_q2 = model_q2.fit()
print result_q2.summary()

                            OLS Regression Results                            
Dep. Variable:                 Markup   R-squared:                       0.749
Model:                            OLS   Adj. R-squared:                  0.749
Method:                 Least Squares   F-statistic:                     1784.
Date:                Sun, 14 Oct 2018   Prob (F-statistic):          1.30e-181
Time:                        12:51:22   Log-Likelihood:                 906.08
No. Observations:                 600   AIC:                            -1808.
Df Residuals:                     598   BIC:                            -1799.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0326      0.007    140.103      0.0

As we can see the Markup is decreasing in the number of firms in the market

# Question 3 - Marginal Costs against Plan Characteristics

In [15]:
model_q3 = sm.OLS(data['Premium'] - data['Markup'], 
                   sm.add_constant(data[['Network Score','Satisfaction Score','PPO']]))
result_q3 = model_q3.fit()
print result_q3.summary()

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.383
Model:                            OLS   Adj. R-squared:                  0.383
Method:                 Least Squares   F-statistic:                     682.2
Date:                Sun, 14 Oct 2018   Prob (F-statistic):               0.00
Time:                        12:47:03   Log-Likelihood:                 2672.2
No. Observations:                3300   AIC:                            -5336.
Df Residuals:                    3296   BIC:                            -5312.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                  1.5074      0