In [19]:
import pandas as pd
import math
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM

In [20]:
def comp_outside_good(data,name):
    #pre-processing to calculate outside good shares
    shares = data[['Market_ID',name]].copy()

    group_shares = shares.groupby('Market_ID').sum()
    group_shares['Outside Good Share'] = 1 - group_shares[name]

    data = pd.merge(data,group_shares[['Outside Good Share']], 
                right_index=True, left_on = 'Market_ID')
    return data

data = pd.read_csv('data.csv')

In [21]:
#set up useful global variables 
NMKTS = data['Market_ID'].nunique()
NPLANS = data['Plan_ID'].nunique()
NOBS = data['Plan_ID'].count()
NSIM = 20

#initialize theta1 and theta2 based on estimates
theta1 = np.array([3.34952545,  1.95632696,  0.6506052 , -1.5642128])
theta2 = np.array([1.43279579, 1.25336653, 0.50565619])
xi = np.genfromtxt('xi.csv', delimiter=',')

v0 = np.genfromtxt('simulations.csv', delimiter=',')
v = np.tile(v0.reshape(NSIM,3,1) , (1,1,NOBS))

#print global variables
print NMKTS,NPLANS,NOBS

600 16 3300


In [22]:
def cal_sim_s(data, v, xi, theta1, theta2):
    """calculate market share for each simulated consumer"""
    
    #copy x and delta for simulations using tiling
    x =  np.array(data.copy()[['Network Score','Satisfaction Score','PPO', 'Premium']])
    delta = xi + np.matmul(np.array(x),theta1)
    delta  = np.tile( delta  ,(NSIM,1))
    
    
    x = (x.transpose()[:-1])
    x  = np.tile(x,(NSIM,1,1))
    theta2 = np.tile( np.array([theta2]).transpose()  ,(NSIM,1, NOBS))
    
    #add to calcualte market shares
    sim_exp = pd.DataFrame( np.exp(delta + (theta2*v*x).sum(axis=1)).transpose() , 
                           index= data.index ) 
    
    #sum up between markets
    sim_exp['mkt_id'] = data['Market_ID']
    sum_exp = sim_exp.groupby('mkt_id').sum() 
    sum_exp = pd.merge(data.copy()[['Market_ID']], sum_exp, 
                       right_index=True, left_on = 'Market_ID')
    
    #format so I can broadcast
    sim_exp = np.array(sim_exp).transpose()[:-1]
    sum_exp = np.array(sum_exp).transpose()[1:] + 1
    
    return sim_exp/sum_exp

def cal_s(data, v, xi, theta1, theta2):
    """Calculate market share
    Calculates individual choice probability first, then take sum"""
    
    shares = (1./NSIM)*cal_sim_s(data, v, xi, theta1, theta2).sum(axis=0)
    return shares

sim_s = cal_s(data, v, xi, theta1, theta2)

# Part 2 - Elasticities

In [28]:
def cal_price_deriv(data, v, xi, theta1, theta2):
    alpha = abs(theta1[-1])
    
    sim_shares = cal_sim_s(data, v, xi, theta1, theta2)
    cross_deriv = np.zeros((NOBS,NOBS))
    for sim_share in sim_shares:
        sim_share = sim_share.reshape((NOBS,1))
        cross_deriv = cross_deriv + sim_share.dot(sim_share.transpose())
        
    own = np.identity(NOBS)
    cross = (1 - own)
    own_deriv  = -(1-sim_shares) * sim_shares
    own_deriv = own_deriv.sum(axis=0)
    
    sim_deriv = 1./(NSIM) * alpha * (cross_deriv*cross +own_deriv*own )
    return sim_deriv


def cal_price_elast(data, v, xi, theta1, theta2):

    share = np.array(data['Inside Good Share'])
    price = np.array([data['Premium']]).transpose()
    deriv = cal_price_deriv(data, v, xi, theta1, theta2)
    
    return share.dot(1/price)*deriv

elast = cal_price_elast(data, v, xi, theta1, theta2)

In [24]:
#need to think about getting average elasticity

def comp_avg_elasticity(data, v, xi, theta1, theta2):
    elast = cal_price_elast(data, v, xi, theta1, theta2)
    
    #who cares about run time?
    avg_elasticity = np.zeros((NPLANS,NPLANS))
    
    same_mkt = np.array([data['Market_ID']],dtype=np.float32 )
    same_mkt = (same_mkt.transpose()).dot( 1/same_mkt)
    same_mkt = np.equal(same_mkt,np.ones((NOBS,NOBS)) )
    same_mkt = same_mkt.astype(np.float32)
    
    elast = elast*same_mkt
    
    
    plan_mkt = np.array(data[['Plan_ID']])
    for plan_j in range(1,NPLANS+1):
        for plan_k in range(1,NPLANS+1):
            data_k = data[ (data['Plan_ID'] == plan_k) ].index.values
            data_j = data[ (data['Plan_ID'] == plan_j)].index.values
            if len(data_k) >0 and len(data_j) > 0:
                lenobs = 0
                for ind_j in data_j:
                    for ind_k in data_k:
                        avg_elasticity[plan_j-1][plan_k-1] = (avg_elasticity[plan_j-1][plan_k-1]
                                                               + elast[ind_j][ind_k]) 
                        lenobs = lenobs + same_mkt[ind_j][ind_k]
                avg_elasticity[plan_j-1][plan_k-1] =  (avg_elasticity[plan_j-1][plan_k-1]
                                                        /lenobs)
                
    return avg_elasticity
                
avg_elasticity = comp_avg_elasticity(data, v, xi, theta1, theta2)
np.savetxt("elasticity.csv", avg_elasticity, delimiter=",")
print pd.DataFrame(avg_elasticity)

          0         1         2         3         4         5         6   \
0  -2.404092  0.254645  0.614399  0.448612  0.310901  0.399155  0.476252   
1   0.254645 -1.545957  0.343568  0.256235  0.212894  0.281903  0.312730   
2   0.614399  0.343568 -2.525627  0.443360  0.335651  0.435968  0.523575   
3   0.448612  0.256235  0.443360 -2.086232  0.272076  0.379331  0.487807   
4   0.310901  0.212894  0.335651  0.272076 -1.608848  0.272553  0.316079   
5   0.399155  0.281903  0.435968  0.379331  0.272553 -1.962772  0.373036   
6   0.476252  0.312730  0.523575  0.487807  0.316079  0.373036 -2.386800   
7   0.435678  0.319254  0.405457  0.315309  0.262095  0.361852  0.381023   
8   0.469018  0.251067  0.508879  0.449548  0.248591  0.340284  0.476122   
9   0.242579  0.163055  0.247309  0.205309  0.188172  0.239659  0.251774   
10  0.335056  0.242694  0.357978  0.298883  0.198456  0.294005  0.338927   
11  0.559193  0.294964  0.547191  0.488212  0.292433  0.385220  0.520189   
12  0.385246

## Markups

How does the markup vary with market structure?

In [72]:
def comp_markup(data, v, xi, theta1, theta2):
    
    shares = np.array(data['Inside Good Share'])
    
    #caclulate formula
    own_deriv  = cal_price_deriv(data, v, xi, theta1, theta2)
    own_deriv = np.diag(own_deriv)
    
    
    #take inverse and calc markup
    inv_deriv = 1/own_deriv
    
    markup = - inv_deriv*shares
    return markup


data['Markup'] = comp_markup(data, v, xi, theta1, theta2)
data['Marginal Cost'] = data['Premium'] - data['Markup']
data['Unobs'] = xi

-0.21928086540972883


In [57]:
print data[['Plan_ID','Markup']].groupby('Plan_ID').mean()

           Markup
Plan_ID          
1        0.820045
2        0.732974
3        0.840634
4        0.781211
5        0.738904
6        0.777593
7        0.819460
8        0.785513
9        0.782057
10       0.713782
11       0.745086
12       0.817716
13       0.770790
14       0.765512
15       0.839426
16       0.765007


In [31]:
mean_markup = data[['Market_ID','Markup']].groupby('Market_ID').mean()
no_firms = data[['Market_ID','Plan_ID']].groupby('Market_ID').count()

model_q2 = sm.OLS(mean_markup,sm.add_constant(no_firms))
result_q2 = model_q2.fit()
print result_q2.summary()

                            OLS Regression Results                            
Dep. Variable:                 Markup   R-squared:                       0.890
Model:                            OLS   Adj. R-squared:                  0.889
Method:                 Least Squares   F-statistic:                     4823.
Date:                Sat, 03 Nov 2018   Prob (F-statistic):          1.94e-288
Time:                        19:42:26   Log-Likelihood:                 1442.2
No. Observations:                 600   AIC:                            -2880.
Df Residuals:                     598   BIC:                            -2872.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9999      0.003    331.536      0.0

# Part 3 - Marginal Costs

In [32]:
model_q3 = sm.OLS(data['Marginal Cost'], 
                   sm.add_constant(data[['Network Score','Satisfaction Score','PPO']]))
result_q3 = model_q3.fit()
print result_q3.summary()

                            OLS Regression Results                            
Dep. Variable:          Marginal Cost   R-squared:                       0.679
Model:                            OLS   Adj. R-squared:                  0.678
Method:                 Least Squares   F-statistic:                     2321.
Date:                Sat, 03 Nov 2018   Prob (F-statistic):               0.00
Time:                        19:42:27   Log-Likelihood:                 5059.9
No. Observations:                3300   AIC:                        -1.011e+04
Df Residuals:                    3296   BIC:                        -1.009e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                  1.4339      0

# Part 4 - Counterfactuals

In [82]:
def cal_mkt_sim_s(p, data, v, xi, theta1, theta2,nobs):
    #copy x and delta for simulations using tiling
    x =  np.array(data.copy()[['Network Score','Satisfaction Score','PPO']])
    delta = xi + np.matmul(np.array(x),theta1[:-1]) + p*theta1[-1]
    delta  = np.tile( delta  ,(NSIM,1))
    
    x = x.transpose()
    x  = np.tile(x,(NSIM,1,1))
    theta2 = np.tile( np.array([theta2]).transpose()  ,(NSIM,1, nobs))
    
    #add to calcualte market shares
    sim_exp = np.exp(delta + (theta2*v*x).sum(axis=1)).transpose()
    
    return  (1./ (sim_exp.sum(axis=0) +1) ) * sim_exp


def cal_mkt_s(p, data, v, xi, theta1, theta2,nobs):
    shares = (1./NSIM)*cal_mkt_sim_s(p, data, v, xi, theta1, theta2,nobs).sum(axis=1)

    return shares


def cal_mkt_deriv(p, data, v, xi, theta1, theta2 , nobs):
    alpha = abs(theta1[-1])
    sim_shares = cal_mkt_sim_s(p, data, v, xi, theta1, theta2, nobs)
    own_deriv  = -(1-sim_shares) * sim_shares
    
    own_deriv = own_deriv.sum(axis=1)
    sim_deriv = 1./(NSIM) * alpha * (own_deriv)
    
    #print -0.2192
    #print sim_deriv

    return sim_deriv


def comp_foc(p, data, v, xi, theta1, theta2, subs, nobs):
    
    shares =  cal_mkt_s(p, data, v, xi, theta1, theta2,  nobs)
    #caclulate formula
    
    own_deriv  = cal_mkt_deriv(p, data, v, xi, theta1, theta2 , nobs)
    inv_deriv = 1/own_deriv
    
    markup = - inv_deriv*shares
    
    
    return markup - (p - data['Marginal Cost'] + subs)


mkt_data = data.copy()[data['Market_ID'] == i]
mkt_data['Marginal Costs'] = (mkt_data.copy()['Marginal Cost'] ) #apply subsidy
mkt_obs = mkt_data['Plan_ID'].count()
mkt_prices = np.array(mkt_data['Premium']).squeeze()
mkt_v = np.tile(v0.reshape(NSIM,3,1) , (1,1,mkt_obs))
mkt_xi = mkt_data['Unobs']


print comp_foc(mkt_prices, mkt_data, mkt_v, mkt_xi, theta1, theta2, 0, mkt_obs)

0   -0.000043
1   -0.000050
2   -0.000059
Name: Marginal Cost, dtype: float64


In [86]:
#numerically solve on a market by market basis
new_prices = [[]] * NMKTS


for i in range(1,NMKTS+1):
    #set up mkt level variables
    mkt_data = data.copy()[data['Market_ID'] == i]
    mkt_data['Marginal Costs'] = (mkt_data.copy()['Marginal Cost'] ) #apply subsidy
    mkt_obs = mkt_data['Plan_ID'].count()
    mkt_prices = np.array(mkt_data['Premium']).squeeze()
    mkt_v = np.tile(v0.reshape(NSIM,3,1) , (1,1,mkt_obs))
    mkt_xi = mkt_data['Unobs']
    
    #calculate FOCs
    mkt_new_prices = fsolve(comp_foc, mkt_prices, args= (mkt_data, mkt_v, mkt_xi,
                                                         theta1, theta2, .25, mkt_obs) )
    new_prices[i-1] = mkt_new_prices
    

#flatten result to 1d array
new_prices = np.array([ p for  mkt_new_prices in new_prices for p in  mkt_new_prices ])

#write to file
np.savetxt('prices_blp.csv', new_prices, delimiter=',')

In [87]:
#avoid caclulating everytime
new_prices = np.genfromtxt('prices_blp.csv', delimiter=',')

# Part 1 - Uninsurance rate

In [90]:
#outside good shares

cf_data = data.copy()
data = comp_outside_good(data,'Inside Good Share')

cf_data['Premium'] = new_prices
cf_data['New Inside Good'] =  cal_s(cf_data, v, xi, theta1, theta2)
cf_data = comp_outside_good(cf_data,'New Inside Good')

#compare the mean outside good before and after the rebate. It decreases.
print 'Outside Good (Before Rebate): %s'%data['Outside Good Share'].mean()
print 'Outside Good (After Rebate): %s'%cf_data['Outside Good Share'].mean()

Outside Good (Before Rebate): 0.134505918182
Outside Good (After Rebate): 0.1067712938


# Part 2 - Change in Profits

In [91]:
#profits per enrollee, comparision
print 'Per Enrollee (Before Rebate): %s'%(data['Premium'] - cf_data['Marginal Cost']).mean()
print 'Per Enrollee (After Rebate): %s'%(cf_data['Premium'] - cf_data['Marginal Cost'] + .25).mean()

Per Enrollee (Before Rebate): 0.780581746388
Per Enrollee (After Rebate): 0.78325433363


# Part 3 - Change in Consumer Surplus

In [92]:
def comp_sim_exp(data, v, xi, theta1, theta2 , nobs):

    #copy x and delta for simulations using tiling
    x =  np.array(data.copy()[['Network Score','Satisfaction Score','PPO', 'Premium']])
    delta = xi + np.matmul(np.array(x),theta1)
    delta  = np.tile( delta  ,(NSIM,1))
    
    x = (x.transpose()[:-1])
    x  = np.tile(x,(NSIM,1,1))
    theta2 = np.tile( np.array([theta2]).transpose()  ,(NSIM,1, nobs))
    
    #add to calcualte market shares
    sim_exp = pd.DataFrame( np.exp(delta + (theta2*v*x).sum(axis=1)).transpose() , 
                           index= data.index )
    return sim_exp


def comp_exp(data, v, xi, theta1, theta2 , nobs):
    """Calculate market share
    Calculates individual choice probability first, then take sum"""
    
    shares = (1./NSIM)*comp_sim_exp(data, v, xi, theta1, theta2, nobs).sum(axis=0)
    return shares

exp = comp_sim_exp(data, v, xi, theta1, theta2 , NOBS)

In [93]:
def comp_surplus(data, cf_data, v, xi, theta1, theta2 , nobs):
    #compute exp(delta_j)
    
    alpha = abs(theta1[-1])
    exp = comp_exp(data, v, xi, theta1, theta2 , nobs)
    
    cf_exp = comp_exp(cf_data, v, xi, theta1, theta2 , nobs)
    
    utility_ratio = cf_exp.sum()/exp.sum()
    return 1/alpha * np.log( utility_ratio )


print comp_surplus(data, cf_data, v, xi, theta1, theta2 , NOBS)

0.24748710215045042
