In [1]:
import pandas as pd
import math
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM
from statsmodels.base.model import GenericLikelihoodModel

In [2]:
def comp_outside_good(data,name):
    #pre-processing to calculate outside good shares
    shares = data[['Market_ID',name]].copy()

    group_shares = shares.groupby('Market_ID').sum()
    group_shares['Outside Good Share'] = 1 - group_shares[name]

    data = pd.merge(data,group_shares[['Outside Good Share']], 
                right_index=True, left_on = 'Market_ID')
    return data


data = pd.read_csv('data.csv')
data = comp_outside_good(data,'Inside Good Share')

In [3]:
#first estimate using logit
class logit(GMM):
    
    def __init__(self, *args, **kwds):
        # set appropriate counts for moment conditions and parameters
        super(logit, self).__init__(*args, **kwds)

        
    def momcond(self, params):
        #unwrap stuff
        shares = np.array(self.endog).transpose()
        exog = np.array(self.exog)
        instr = np.array(self.instrument)
        
        lshare = np.log(shares[0]) -  np.log(shares[1])
        lshare = lshare.transpose()
       
        lshare_fit = np.matmul(exog,params) #linear equation    
        
        xi = lshare_fit - lshare
        g = instr * xi[:, np.newaxis]
        
        return g

# Question 1 - Estimates

I use dummy variables for the plan as my instrument. When regressing these dummy variables on plan, my model has the form 

$$p_{jm} = p_j + \epsilon_{jm}$$ 

as is standard where $\epsilon_{jm}$ is a market specific shock for product $j$. The idea is that the average price of plan $j$ varies exogenously to each market. I estimated a model with the other exogeneous characteristics (i.e. Network score, Satisfaction, PPO) in the instrument. However, the model coefficients where similar, so for simplicitiy I just use the dummy variables.

In [4]:
#calculate hausmann insturments
mkt_dum = pd.get_dummies(data['Market_ID'],prefix='mkt',drop_first=True)
plan_dum = pd.get_dummies(data['Plan_ID'],prefix='plan',drop_first=True)
hausman_instr = plan_dum

#set up x and y
y = data[['Inside Good Share','Outside Good Share']]
x =  data[['Network Score','Satisfaction Score','PPO','Premium']]

In [5]:
#set up initial est
beta_init = np.full(len(x.columns),1)

#set up model
model = logit(y , x, hausman_instr)

result = model.fit(beta_init, maxiter=2, optim_method='nm', 
                   wargs=dict(centered=False))
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.000008
         Iterations: 292
         Function evaluations: 497
Optimization terminated successfully.
         Current function value: 0.005149
         Iterations: 139
         Function evaluations: 249
                                             logit Results                                             
Dep. Variable:     ['Inside Good Share', 'Outside Good Share']   Hansen J:                        16.99
Model:                                                   logit   Prob (Hansen J):                 0.108
Method:                                                    GMM                                         
Date:                                         Sun, 14 Oct 2018                                         
Time:                                                 12:48:34                                         
No. Observations:                                         3300                                  

In [6]:
data['Xi'] = (np.log(y['Inside Good Share']) - np.log(y['Outside Good Share']) 
              - np.matmul(np.array(x),result.params))

# Question 2 - Elasticities and Markups

## Elasticities

Below are the formulas for elasticity

Own price : $$-\alpha (1-s_{jm}) p_{jm}$$
Cross price (good $j$, price $k$): $$-\alpha s_{km} p_{km}$$

The logit has a very inflexible substitution pattern. 

In [7]:
def comp_shares(x,p,alpha,beta,labels=False):
    #compute exp(delta_j)
    x = x.copy()
    x['exp_delta'] =  np.exp ( np.matmul(x[['Network Score',
                                            'Satisfaction Score','PPO']],
                                         beta) - alpha*p + x['Xi'])
    #print x['exp_delta']
    
    #compute 1 + sum_j exp(delta_j)
    sum_delta = x.groupby('Market_ID').sum()
    sum_delta['sum_exp_delta'] = 1 + sum_delta['exp_delta'] 
    
    x = pd.merge(x, sum_delta[['sum_exp_delta']], 
                right_index=True, left_on = 'Market_ID')

    #compute s_j
    x['fitted_share'] = x['exp_delta']/x['sum_exp_delta']
    
    if labels: 
        return x[['Plan_ID','Market_ID','fitted_share']]
    
    return np.array(x['fitted_share']).squeeze()
        

#set up parameters 
observ = data[['Plan_ID','Market_ID','Network Score','Satisfaction Score','PPO','Xi']]
prices = np.array( data['Premium'] )

beta = result.params[:-1]
alpha = abs(result.params[3])

shares = comp_shares(observ,prices,alpha,beta)

print shares - np.array(data['Inside Good Share']).squeeze()

[ 2.77555756e-17  5.55111512e-17  5.55111512e-17 ... -1.38777878e-17
  0.00000000e+00  0.00000000e+00]


In [8]:
def comp_elasticity(shares, prices, alpha):
    #set up matrix skeleton
    own_price = np.identity(len(shares))
    cross_price = 1 - own_price

    #actually calculate elasticity
    cross_elasticity = shares * alpha * prices
    own_elasticity  = -(1-shares) * alpha * prices

    return cross_price*cross_elasticity + own_price *own_elasticity


#agregate elasticities
elasticity = comp_elasticity(shares, prices, alpha)

#average elasticity
avg_shares =  np.array( comp_shares(observ,prices,alpha,beta,
                                    labels=True).groupby(
    'Plan_ID').mean()['fitted_share']).squeeze()
avg_price = np.array( data[['Plan_ID','Premium']].groupby(
    'Plan_ID').mean() ).squeeze()
avg_elasticity = comp_elasticity(avg_shares, avg_price, alpha)

Below are the model's implied elasticities

In [9]:
print pd.DataFrame(avg_elasticity)

          0         1         2         3         4         5         6   \
0  -4.300662  0.544704  1.123047  0.838574  0.566958  0.732122  1.030922   
1   1.041556 -4.319348  1.123047  0.838574  0.566958  0.732122  1.030922   
2   1.041556  0.544704 -4.253176  0.838574  0.566958  0.732122  1.030922   
3   1.041556  0.544704  1.123047 -4.365580  0.566958  0.732122  1.030922   
4   1.041556  0.544704  1.123047  0.838574 -4.257134  0.732122  1.030922   
5   1.041556  0.544704  1.123047  0.838574  0.566958 -4.123943  1.030922   
6   1.041556  0.544704  1.123047  0.838574  0.566958  0.732122 -4.319801   
7   1.041556  0.544704  1.123047  0.838574  0.566958  0.732122  1.030922   
8   1.041556  0.544704  1.123047  0.838574  0.566958  0.732122  1.030922   
9   1.041556  0.544704  1.123047  0.838574  0.566958  0.732122  1.030922   
10  1.041556  0.544704  1.123047  0.838574  0.566958  0.732122  1.030922   
11  1.041556  0.544704  1.123047  0.838574  0.566958  0.732122  1.030922   
12  1.041556

Note indexing starts at 0. We can see the elasticities are higher in the Logit than in the nested logit

## Markups

The following formula relates marginal costs with prices:

$$\hat{mc}_i = pi - (\dfrac{ \partial s_i} { \partial p_i})^{-1} s_i$$

We can use it to calculate the markup

In [10]:
#solve for marginal costs
def comp_markup(shares):
    shares_vector = np.array([shares])
    
    #set up matrix    
    own_price = np.identity(len(shares))

    #caclulate formula
    own_deriv  = - alpha * (1-shares)  * shares
    
    derivative = own_price *own_deriv
    #take inverse and calc markup
    inv_derivative = np.linalg.inv(derivative)

    markup = - np.matmul(inv_derivative, shares_vector.transpose()) 
    return markup.transpose()[0]


markup = comp_markup(shares)
mc = prices - markup

data['Markup'] = markup
print data[['Plan_ID','Markup']].groupby('Plan_ID').mean()

           Markup
Plan_ID          
1        0.605243
2        0.547706
3        0.616228
4        0.580313
5        0.550945
6        0.573316
7        0.604003
8        0.574877
9        0.581282
10       0.535394
11       0.555452
12       0.601756
13       0.571446
14       0.563932
15       0.616249
16       0.566054


Overall the markups are lower than with the nested logit. This follows because without the nest, there is more competition between firms

In [11]:
mean_markup = data[['Market_ID','Markup']].groupby('Market_ID').mean()
no_firms = data[['Market_ID','Plan_ID']].groupby('Market_ID').count()

model_q2 = sm.OLS(mean_markup,sm.add_constant(no_firms))
result_q2 = model_q2.fit()
print result_q2.summary()

                            OLS Regression Results                            
Dep. Variable:                 Markup   R-squared:                       0.908
Model:                            OLS   Adj. R-squared:                  0.908
Method:                 Least Squares   F-statistic:                     5907.
Date:                Sun, 14 Oct 2018   Prob (F-statistic):          3.98e-312
Time:                        12:48:45   Log-Likelihood:                 1807.3
No. Observations:                 600   AIC:                            -3611.
Df Residuals:                     598   BIC:                            -3602.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Plan_ID       -0.0219      0.000    -76.859      0.0

As we can see the Markup is decreasing in the number of firms in the market, like with the nested logit (see `nest_logit` output for more details...)

# Question 3 - Marginal Costs vs Plan Characteristics

In [12]:
model_q3 = sm.OLS(data['Premium'] - data['Markup'], 
                   sm.add_constant(data[['Network Score','Satisfaction Score','PPO']]))
result_q3 = model_q3.fit()
print result_q3.summary()

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.779
Model:                            OLS   Adj. R-squared:                  0.779
Method:                 Least Squares   F-statistic:                     3875.
Date:                Sun, 14 Oct 2018   Prob (F-statistic):               0.00
Time:                        12:48:47   Log-Likelihood:                 5512.2
No. Observations:                3300   AIC:                        -1.102e+04
Df Residuals:                    3296   BIC:                        -1.099e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                  1.4255      0

# Question 4  - Counterfactuals

In [15]:
#set up function to calc FOC for Nash Bargainining
def comp_foc(p, costs, x, alpha, beta):
    shares = comp_shares(x,p,alpha, beta)
    new_markup = comp_markup(shares)
    return new_markup - (p - costs)


max_mkt = data.groupby('Market_ID').count().shape[0]
new_prices = [[]] * max_mkt

#numerically solve on a market by market basis
for i in range(1,max_mkt+1):
    
    #set up mkt level variables
    mkt_data = data[data['Market_ID'] == i]
    mkt_prices = np.array(mkt_data['Premium']).squeeze()
    mkt_markup = np.array(mkt_data['Markup']).squeeze()
    mkt_mc = mkt_prices - mkt_markup - .25 #the rebate reduces MC
    mkt_x = mkt_data[['Plan_ID','Market_ID','Network Score','Satisfaction Score','PPO','Xi']]
   
    #calculate FOCs
    mkt_new_prices = fsolve(comp_foc, mkt_prices, args= (mkt_mc, mkt_x, alpha, beta) )
    new_prices[i-1] = mkt_new_prices
    
#flatten result to 1d array
new_prices = np.array([ p for  mkt_new_prices in new_prices for p in  mkt_new_prices ])

#write to file
np.savetxt("prices2.csv", new_prices, delimiter=",")

In [27]:
#avoid caclulating everytime
new_prices = np.genfromtxt('prices2.csv', delimiter=',')

## Part 1 - Unisurance Rate

In [28]:
#outside good shares
new_data = data[['Market_ID']].copy()
new_data['New Inside Good'] =  comp_shares(observ, new_prices, alpha, beta)
new_data = comp_outside_good(new_data,'New Inside Good')

#compare the mean outside good before and after the rebate. It decreases.
print 'Outside Good (Before Rebate): %s'%data['Outside Good Share'].mean()
print 'Outside Good (After Rebate): %s'%new_data['Outside Good Share'].mean()

Outside Good (Before Rebate): 0.134505918182
Outside Good (After Rebate): 0.0866271205604


## Part 2 - Change in Profits

In [29]:
def industry_profits(x, p, alpha, beta, mc):
    """computes agregate profits"""
    shares = comp_shares(x, p, alpha, beta)
    return np.matmul(shares, np.array([prices - mc]).transpose())

#industry wide profits
print 'Industry Profits (Before Rebate): %s'%industry_profits(observ,
                                                              prices, 
                                                              alpha, beta , mc)[0]
print 'Industry Profits (After Rebate): %s'%industry_profits(observ,
                                                             new_prices,
                                                             alpha, beta , mc)[0]

#profits per enrollee, comparision
print 'Per Enrollee (Before Rebate): %s'%(prices - mc).mean()
print 'Per Enrollee (After Rebate): %s'%(new_prices + .25 - mc).mean()

Industry Profits (Before Rebate): 304.7568008664531
Industry Profits (After Rebate): 323.06702291696183
Per Enrollee (Before Rebate): 0.5774424394884409
Per Enrollee (After Rebate): 0.5844861247221862


## Part 3 - Change in Consumer Surplus

As in Train Ch. 3 

$$\Delta E(CS) = \dfrac{1}{\alpha} [ln(\sum_{j} e^{\delta'_{jm}} ) - ln(\sum_{j} e^{\delta_{jm}} )  ]$$

In [30]:
def comp_surplus(x,p1,p2,alpha,beta,labels=False):
    #compute exp(delta_j)
    x = x.copy()
    x['exp_delta1'] =  np.exp ( np.matmul(x[['Network Score',
                                             'Satisfaction Score','PPO']],
                                          beta) - alpha*p1)
    x['exp_delta2'] =  np.exp ( np.matmul(x[['Network Score',
                                             'Satisfaction Score','PPO']],
                                          beta) - alpha*p2) 
    #note the xi's cancel out in the formula
    
    #1/alpha *  ( sum(np.exp(delta1_j)) - sum(np.exp(delta0_j)) )
    utility_ratio = x['exp_delta2'].sum()/x['exp_delta1'].sum()
    return 1/alpha * np.log( utility_ratio )


print comp_surplus(x,prices,new_prices,alpha,beta)

0.24304126881360041
