In [1]:
import pandas as pd
import math
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM

# Part 0 - Set up

In [2]:
def comp_outside_good(data,name):
    #pre-processing to calculate outside good shares
    shares = data[['Market_ID',name]].copy()

    group_shares = shares.groupby('Market_ID').sum()
    group_shares['Outside Good Share'] = 1 - group_shares[name]

    data = pd.merge(data,group_shares[['Outside Good Share']], 
                right_index=True, left_on = 'Market_ID')
    return data

data = pd.read_csv('data.csv')

In [3]:
#set up useful global variables 
NMKTS = data['Market_ID'].nunique()
NPLANS = data['Plan_ID'].nunique()
NOBS = data['Plan_ID'].count()
NSIM = 50

#initialize theta1 and theta2 based on estimates
theta1 = np.array([ 3.15820237,  1.65121504,  0.64728765, -1.08950616])
theta2 =  np.array([2.32968109, 2.16972611, 0.81662519])
xi = np.genfromtxt('xi.csv', delimiter=',')

v = np.genfromtxt('simulations.csv', delimiter=',').reshape(NSIM,3,3300)

#print global variables
print NMKTS,NPLANS,NOBS

600 16 3300


In [4]:
def cal_same_mkt(data):
    same_mkt = np.array([data['Market_ID']],dtype=np.float32 )
    same_mkt = (same_mkt.transpose()).dot( 1/same_mkt)
    same_mkt = np.equal(same_mkt,np.ones((NOBS,NOBS)) )
    same_mkt = same_mkt.astype(np.float32)
    return same_mkt

In [5]:
def cal_sim_s(data, v, xi, theta1, theta2):
    """calculate market share across each simulated consumer
    
    Note: we are not summing accross simulations when this is called
    
    Also note: this function takes different arguments than the 
    one from estimation"""
    
    #copy x and delta for simulations using tiling
    x =  np.array(data.copy()[['Network Score','Satisfaction Score','PPO', 'Premium']])
    delta = xi + np.matmul(np.array(x),theta1)
    delta  = np.tile( delta  ,(NSIM,1))
    
    
    x = (x.transpose()[:-1])
    x  = np.tile(x,(NSIM,1,1))
    theta2 = np.tile( np.array([theta2]).transpose()  ,(NSIM,1, NOBS))
    
    #add to calcualte market shares
    sim_exp = pd.DataFrame( np.exp(delta + (theta2*v*x).sum(axis=1)).transpose() , 
                           index= data.index ) 
    
    #sum up between markets
    sim_exp['mkt_id'] = data['Market_ID']
    sum_exp = sim_exp.groupby('mkt_id').sum() 
    sum_exp = pd.merge(data.copy()[['Market_ID']], sum_exp, 
                       right_index=True, left_on = 'Market_ID')
    
    #format so I can broadcast
    sim_exp = np.array(sim_exp).transpose()[:-1]
    sum_exp = np.array(sum_exp).transpose()[1:] + 1
    
    return sim_exp/sum_exp


def cal_s(data, v, xi, theta1, theta2):
    """Calculate market share
    Calculates choice probability in each simulation, 
    then takes the sum"""
    
    shares = (1./NSIM)*cal_sim_s(data, v, xi, theta1, theta2)
    shares = (1./NSIM)*cal_sim_s(data, v, xi, theta1, theta2).sum(axis=0)
    return shares

sim_s = cal_s(data, v, xi, theta1, theta2)

# Part 2 - Elasticities

In [6]:

def cal_price_deriv(data, v, xi, theta1, theta2):
    """calculate own price derivative""" 
    alpha = abs(theta1[-1])
    same_mkt = cal_same_mkt(data)
    sim_shares = cal_sim_s(data, v, xi, theta1, theta2)
    cross_deriv = np.zeros((NOBS,NOBS))
    for sim_share in sim_shares:
        sim_share = sim_share.reshape((NOBS,1))
        cross_deriv = cross_deriv + sim_share.dot(sim_share.transpose())*same_mkt
        
    own = np.identity(NOBS)
    cross = (1 - own)
    own_deriv  = -(1-sim_shares) * sim_shares

    own_deriv = own_deriv.sum(axis=0)
    
    sim_deriv = 1./(NSIM) * alpha * (cross_deriv*cross +own_deriv*own )
    return sim_deriv


def cal_price_elast(data, v, xi, theta1, theta2):
    """calculate the elasticity using the price derivative matrix"""
    share = np.array([data['Inside Good Share']])
    price = np.array([data['Premium']]).transpose()
    deriv = cal_price_deriv(data, v, xi, theta1, theta2)
    return (price).dot(1/share)*deriv

In [7]:
def comp_avg_elasticity(data, v, xi, theta1, theta2):
    """figure out what the average elasticty between goods is"""
    elast = cal_price_elast(data, v, xi, theta1, theta2)
    
    #who cares about run time?
    avg_elasticity = np.zeros((NPLANS,NPLANS))
    same_mkt = cal_same_mkt(data)
    elast = elast
    
    
    plan_mkt = np.array(data[['Plan_ID']])
    for plan_j in range(1,NPLANS+1):
        for plan_k in range(1,NPLANS+1):
            data_k = data[ (data['Plan_ID'] == plan_k) ].index.values
            data_j = data[ (data['Plan_ID'] == plan_j)].index.values
            if len(data_k) >0 and len(data_j) > 0:
                lenobs = 0
                for ind_j in data_j:
                    for ind_k in data_k:
                        avg_elasticity[plan_j-1][plan_k-1] = (avg_elasticity[plan_j-1][plan_k-1]
                                                               + elast[ind_j][ind_k]) 
                        lenobs = lenobs + same_mkt[ind_j][ind_k]
                avg_elasticity[plan_j-1][plan_k-1] =  (avg_elasticity[plan_j-1][plan_k-1]
                                                        /lenobs)
                
    return avg_elasticity
                
avg_elasticity = comp_avg_elasticity(data, v, xi, theta1, theta2)
np.savetxt("elasticity.csv", avg_elasticity, delimiter=",")

Below are the average cross price elasticities calculated using the random coefficients among the 16 plans

In [8]:
print pd.DataFrame(avg_elasticity)

          0         1         2         3         4         5         6   \
0  -2.492264  0.466191  0.560306  0.563392  0.481553  0.493344  0.517168   
1   0.205594 -2.505202  0.236450  0.237618  0.303568  0.301846  0.236140   
2   0.604815  0.539695 -2.470747  0.586466  0.527039  0.528866  0.574588   
3   0.416995  0.395443  0.400046 -2.517787  0.386871  0.396935  0.441552   
4   0.240685  0.332397  0.236744  0.254490 -2.519823  0.314748  0.242538   
5   0.309590  0.425489  0.316905  0.343587  0.410343 -2.322832  0.308082   
6   0.502243  0.478123  0.508541  0.573823  0.476418  0.462339 -2.533480   
7   0.347041  0.492499  0.332557  0.327373  0.430056  0.444087  0.333033   
8   0.439049  0.387585  0.428355  0.488396  0.373164  0.383760  0.443882   
9   0.183570  0.241364  0.171521  0.190233  0.254180  0.246180  0.186626   
10  0.245003  0.344347  0.244190  0.263465  0.310051  0.326322  0.251096   
11  0.544921  0.481930  0.531515  0.573617  0.458386  0.468117  0.540137   
12  0.355177

## Markups

How does the markup vary with market structure?

In [9]:
def comp_markup(data, v, xi, theta1, theta2):
    """compute the markup implied by demand
    from the Nash Betrand equilibrium"""
    
    shares = np.array(data['Inside Good Share'])
    #caclulate formula
    own_deriv  = cal_price_deriv(data, v, xi, theta1, theta2)
    own_deriv = np.diag(own_deriv)
    
    #take inverse and calc markup
    inv_deriv = 1/own_deriv
    markup = - inv_deriv*shares
    return markup


data['Markup'] = comp_markup(data, v, xi, theta1, theta2)
data['Marginal Cost'] = data['Premium'] - data['Markup']
data['Unobs'] = xi

Below we calculate the average markup per plan

In [10]:
print data[['Plan_ID','Markup']].groupby('Plan_ID').mean()

           Markup
Plan_ID          
1        1.200935
2        1.066559
3        1.233132
4        1.140333
5        1.075550
6        1.136478
7        1.199869
8        1.150914
9        1.140497
10       1.037810
11       1.087632
12       1.198169
13       1.123580
14       1.115016
15       1.234100
16       1.116287


As we can see the more plans in a market, the lower the markup. This is the same as the previous homework

In [11]:
mean_markup = data[['Market_ID','Markup']].groupby('Market_ID').mean()
no_firms = data[['Market_ID','Plan_ID']].groupby('Market_ID').count()

model_q2 = sm.OLS(mean_markup,sm.add_constant(no_firms))
result_q2 = model_q2.fit()
print result_q2.summary()

                            OLS Regression Results                            
Dep. Variable:                 Markup   R-squared:                       0.867
Model:                            OLS   Adj. R-squared:                  0.867
Method:                 Least Squares   F-statistic:                     3908.
Date:                Mon, 12 Nov 2018   Prob (F-statistic):          2.02e-264
Time:                        10:39:11   Log-Likelihood:                 1090.5
No. Observations:                 600   AIC:                            -2177.
Df Residuals:                     598   BIC:                            -2168.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.4952      0.005    275.854      0.0

# Part 3 - Marginal Costs

The following regression will show the relationship between Marginal costs and the plan characteristics. We can se it is increasing in all except satisfaction score.

In [12]:
model_q3 = sm.OLS(data['Marginal Cost'], 
                   sm.add_constant(data[['Network Score','Satisfaction Score','PPO']]))
result_q3 = model_q3.fit()
print result_q3.summary()

                            OLS Regression Results                            
Dep. Variable:          Marginal Cost   R-squared:                       0.359
Model:                            OLS   Adj. R-squared:                  0.358
Method:                 Least Squares   F-statistic:                     614.2
Date:                Mon, 12 Nov 2018   Prob (F-statistic):          4.08e-317
Time:                        10:39:11   Log-Likelihood:                 3556.3
No. Observations:                3300   AIC:                            -7105.
Df Residuals:                    3296   BIC:                            -7080.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                  1.3735      0

# Part 4 - Counterfactuals

The code below is designed to recompute the Nash Betrand Equilibrium in each market

In [13]:
def cal_mkt_sim_s(p, data, v, xi, theta1, theta2,nobs):
    """only calculate sims within the same market
    will use this when recalculating the FOCs"""
    
    #copy x and delta for simulations using tiling
    x =  np.array(data.copy()[['Network Score','Satisfaction Score','PPO']])
    delta = xi + np.matmul(np.array(x),theta1[:-1]) + p*theta1[-1]
    delta  = np.tile( delta  ,(NSIM,1))
    
    x = x.transpose()
    x  = np.tile(x,(NSIM,1,1))
    theta2 = np.tile( np.array([theta2]).transpose()  ,(NSIM,1, nobs))
    
    #add to calcualte market shares
    sim_exp = np.exp(delta + (theta2*v*x).sum(axis=1)).transpose()
    
    return  (1./ (sim_exp.sum(axis=0) +1) ) * sim_exp


def cal_mkt_s(p, data, v, xi, theta1, theta2,nobs):
    """calc market share within the same market"""
    shares = (1./NSIM)*cal_mkt_sim_s(p, data, v, xi, theta1, theta2,nobs).sum(axis=1)

    return shares


def cal_mkt_deriv(p, data, v, xi, theta1, theta2 , nobs):
    """calculate price derivative, but only in the same market"""
    alpha = abs(theta1[-1])
    sim_shares = cal_mkt_sim_s(p, data, v, xi, theta1, theta2, nobs)
    own_deriv  = -(1-sim_shares) * sim_shares
    
    own_deriv = own_deriv.sum(axis=1)
    sim_deriv = 1./(NSIM) * alpha * (own_deriv)

    return sim_deriv


def comp_foc(p, data, v, xi, theta1, theta2, subs, nobs):
    """compute the first order condition (market by market)"""
    shares =  cal_mkt_s(p, data, v, xi, theta1, theta2,  nobs)
    #caclulate formula
    own_deriv  = cal_mkt_deriv(p, data, v, xi, theta1, theta2 , nobs)
    inv_deriv = 1/own_deriv
    markup = - inv_deriv*shares
    return markup - (p - data['Marginal Cost'] + subs)

In [14]:
#numerically solve on a market by market basis
new_prices = [[]] * NMKTS


for i in range(1,NMKTS+1):
    #set up mkt level variables
    mkt_data = data.copy()[data['Market_ID'] == i]
    
    mkt_data['Marginal Costs'] = (mkt_data.copy()['Marginal Cost'] ) #apply subsidy
    mkt_obs = mkt_data['Plan_ID'].count()
    mkt_prices = np.array(mkt_data['Premium']).squeeze()
    mkt_xi = mkt_data['Unobs']
    
    #get the right simulation draws
    first_ind = mkt_data.index.values[0]
    last_ind = mkt_data.index.values[-1] + 1
    mkt_v = v[:,:,first_ind:last_ind]
    
    #calculate FOCs
    mkt_new_prices = fsolve(comp_foc, mkt_prices, args= (mkt_data, mkt_v, mkt_xi,
                                                         theta1, theta2, .25, mkt_obs) )
    new_prices[i-1] = mkt_new_prices
    

#flatten result to 1d array
new_prices = np.array([ p for  mkt_new_prices in new_prices for p in  mkt_new_prices ])

#write to file
np.savetxt('prices_blp.csv', new_prices, delimiter=',')

In [15]:
#avoid caclulating everytime
new_prices = np.genfromtxt('prices_blp.csv', delimiter=',')

# Part 1 - Uninsurance rate

Below we calcualte how much the uninsurance rate delcined after the the subsidy

In [16]:
#outside good shares

cf_data = data.copy()
data = comp_outside_good(data,'Inside Good Share')

cf_data['Premium'] = new_prices
cf_data['New Inside Good'] =  cal_s(cf_data, v, xi, theta1, theta2)
cf_data = comp_outside_good(cf_data,'New Inside Good')

#compare the mean outside good before and after the rebate. It decreases.
print 'Outside Good (Before Rebate): %s'%data['Outside Good Share'].mean()
print 'Outside Good (After Rebate): %s'%cf_data['Outside Good Share'].mean()

Outside Good (Before Rebate): 0.134505918182
Outside Good (After Rebate): 0.118173319137


# Part 2 - Change in Profits
Below we ecalculate the change in profits per enrollee after the rebate

In [17]:
#profits per enrollee, comparision
print 'Per Enrollee (Before Rebate): %s'%(data['Premium'] - cf_data['Marginal Cost']).mean()
print 'Per Enrollee (After Rebate): %s'%(cf_data['Premium'] - cf_data['Marginal Cost'] + .25).mean()

Per Enrollee (Before Rebate): 1.14041233609
Per Enrollee (After Rebate): 1.1421989598


# Part 3 - Change in Consumer Surplus

Below we calculate the change in consumer surplus using the formula in Train. Specifically, we use:

$$\Delta E(CS_n)  = \dfrac{1}{\alpha} [ln(\sum_j ln( e^{\delta^1_{ji}} ) - ln(e^{\delta^0_{ji}}) $$

In [18]:
def comp_sim_exp(data, v, xi, theta1, theta2):
    """simulate utility in every market"""
    #copy x and delta for simulations using tiling
    x =  np.array(data.copy()[['Network Score','Satisfaction Score','PPO', 'Premium']])
    delta = xi + np.matmul(np.array(x),theta1)
    delta  = np.tile( delta  ,(NSIM,1))
    
    x = (x.transpose()[:-1])
    x  = np.tile(x,(NSIM,1,1))
    theta2 = np.tile( np.array([theta2]).transpose()  ,(NSIM,1, NOBS))
    
    #add to calcualte market shares
    sim_exp = pd.DataFrame( np.exp(delta + (theta2*v*x).sum(axis=1)).transpose() , 
                           index= data.index )
    return sim_exp


def comp_exp(data, v, xi, theta1, theta2):
    """Calculate market share
    Calculates individual choice probability first, then take sum"""
    
    shares = (1./NSIM)*comp_sim_exp(data, v, xi, theta1, theta2 ).sum(axis=1)
    return shares

In [19]:
def comp_surplus(data, cf_data, v, xi, theta1, theta2 ):
    """ compute exp(delta_j) to compute the change in consumer surplus """
    
    alpha = abs(theta1[-1])
    exp = comp_exp(data, v, xi, theta1, theta2)
    
    cf_exp = comp_exp(cf_data, v, xi, theta1, theta2 )
    
    utility_ratio = cf_exp.sum()/exp.sum()
    return 1/alpha * np.log( utility_ratio )


print 'Change in consumer surplus: %s'%comp_surplus(data, cf_data, v, xi, theta1, theta2 )

Change in consumer surplus: 0.2493774901778668


## Surplus and market structure

In [20]:
def sum_by_mkt(inner, data):
    """function for summing utility by market"""
    inner['mkt_id'] = data['Market_ID']
    inner = inner.groupby('mkt_id').sum()
    return inner


def comp_surplus_mkt(data, cf_data, v, xi, theta1, theta2 ):
    """compute the change in consumer surplus on a 
    per market basis """
    alpha = abs(theta1[-1])
    
    #compute surplus change by market
    exp = comp_exp(data, v, xi, theta1, theta2 )
    exp = sum_by_mkt(exp, data)
    cf_exp = comp_exp(cf_data, v, xi, theta1, theta2 )
    cf_exp = sum_by_mkt(cf_exp, data)
    
    utility_ratio = np.array(cf_exp/exp)
    return 1/alpha * np.log( utility_ratio )
    
    

mkt_surplus = comp_surplus_mkt(data, cf_data, v, xi, theta1, theta2 )
no_firms = data[['Market_ID','Plan_ID']].groupby('Market_ID').count()

model_q4 = sm.OLS(mkt_surplus,sm.add_constant(no_firms))
result_q4 = model_q4.fit()
print result_q4.summary()

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.689
Model:                            OLS   Adj. R-squared:                  0.688
Method:                 Least Squares   F-statistic:                     1322.
Date:                Mon, 12 Nov 2018   Prob (F-statistic):          1.34e-153
Time:                        10:39:54   Log-Likelihood:                 2980.5
No. Observations:                 600   AIC:                            -5957.
Df Residuals:                     598   BIC:                            -5948.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2395      0.000   1031.209      0.0

Using the table above, we can see that surplus is increasing with the number of firms in each market. This is because when there are more firms, the rebate is passed more directly to consumers.

# Part 4

The answers are roughly the same between BLP and the logit model.