In [1]:
import pandas as pd
import math
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM

In [2]:
def comp_outside_good(data,name):
    #pre-processing to calculate outside good shares
    shares = data[['Market_ID',name]].copy()

    group_shares = shares.groupby('Market_ID').sum()
    group_shares['Outside Good Share'] = 1 - group_shares[name]

    data = pd.merge(data,group_shares[['Outside Good Share']], 
                right_index=True, left_on = 'Market_ID')
    return data

data = pd.read_csv('data.csv')
data = comp_outside_good(data,'Inside Good Share')

In [3]:
def setup_data(data):
    """simplify setting up data correctly"""
    #calculate hausmann insturments
    mkt_dum = pd.get_dummies(data['Market_ID'],prefix='mkt',drop_first=True)
    plan_dum = pd.get_dummies(data['Plan_ID'],prefix='plan',drop_first=True)
    exog = np.array( data[['Network Score','Satisfaction Score', 'PPO']])
    
    #concat hausman instr with exog variables
    #hausman_instr = np.concatenate( (exog, np.array(plan_dum)), axis =1 )
    
    hausman_instr = plan_dum
    
    #set up x and y
    y = data[['Inside Good Share','Outside Good Share']]
    x =  data[['Network Score','Satisfaction Score','PPO','Premium']]
    return x,y,hausman_instr

x,y,z =  setup_data(data)

In [13]:
#set up useful global variables 
NMKTS = data['Market_ID'].nunique()
NPLANS = data['Plan_ID'].nunique()
NOBS = data['Plan_ID'].count()
NSIM = 20

theta2 = np.array([3,2,1]) # initialize theta2 for testing purposes
delta = np.ones(NOBS)*(-2)

#print global variables
print NMKTS,NPLANS,NOBS

600 16 3300


In [34]:
#set up v
v0 = np.genfromtxt('simulations.csv', delimiter=',')
v = np.tile(v0.reshape(NSIM,3,1) , (1,1,NOBS))

In [15]:
def cal_sim_s(data, v, delta, theta2):
    """calculate market share for each simulated consumer"""
    
    #copy x and delta for simulations using tiling
    x =  np.array(data.copy()[['Network Score','Satisfaction Score','PPO']]).transpose()
    x  = np.tile(x,(NSIM,1,1))
    theta2 = np.tile( np.array([theta2]).transpose()  ,(NSIM,1,3300))
    delta  = np.tile( delta  ,(NSIM,1))
    
    #add to calcualte market shares
    sim_exp = pd.DataFrame( np.exp(delta + (theta2*v*x).sum(axis=1)).transpose() ) 
    
    #sum up between markets
    sim_exp['mkt_id'] = data['Market_ID']
    sum_exp = sim_exp.groupby('mkt_id').sum() 
    sum_exp = pd.merge(data.copy()[['Market_ID']], sum_exp, 
                       right_index=True, left_on = 'Market_ID')
    
    #format so I can broadcast
    sim_exp = np.array(sim_exp).transpose()[:-1]
    sum_exp = np.array(sum_exp).transpose()[1:] + 1
    
    return sim_exp/sum_exp

def cal_s(data, v, delta, theta2):
    """Calculate market share
    Calculates individual choice probability first, then take sum"""
    
    shares = (1./NSIM)*cal_sim_s(data, v, delta, theta2).sum(axis=0)
    return shares


s = cal_s(data, v, delta, theta2)

In [16]:
def cal_delta(data, v, theta2, error = 1e-3, maxiter = 500):
    """Calculate mean utility via contraction mapping"""

    niter = 0
    
    #initialize loop parameters
    delta = np.zeros(NOBS)
    s = cal_s(data, v, delta, theta2)
    diff = np.log(data['Inside Good Share']) - np.log(s)
    
    
    while ((abs(diff).max() > 1e-6) #this is easier to converge
           and (abs(diff).mean() > error) 
           and niter < maxiter):
        
        s = cal_s(data, v, delta, theta2)
        diff = np.log(data['Inside Good Share']) - np.log(s)

        if np.isnan(diff).sum():
            raise Exception('nan in diffs')
            
        delta += diff
        niter += 1

    return delta


delta = cal_delta(data, v, theta2)
print delta.shape
print delta.mean()

(3300,)
2.6566766164


In [17]:
theta1 = np.array([3.56297634,  2.0886027 ,  0.88453571, -2.09580927])
theta2 = np.array([0.21248441, 0.07349874,  1.16047725])

# Part 2 - Elasticities

In [21]:
delta = cal_delta(data, v, theta2)

def cal_price_deriv(data, v, delta, theta1, theta2):

    sim_shares = cal_sim_s(data, v, delta, theta2)
    
    #print sim_shares[1][2]
    sim_shares1 = sim_shares.reshape((NSIM,NOBS,1))
    sim_shares1 = np.tile(sim_shares1,(1,1,NOBS))
    
    print sim_shares1.shape
    
    sim_shares2 = sim_shares.reshape((NSIM,1,NOBS))
    sim_shares2 = np.tile(sim_shares2,(1,NOBS,1))
    
    sim_derivs = sim_shares1*sim_shares2
    print sim_derivs.shape
    
    #sim_derivs = sim_derivs.reshape((NSIM,))
    #alpha = abs(theta1[-1])
    #constant_deriv = alpha*data['Inside Good Share']/data['Premium']
    #shares = (1./NSIM)*(sim_exp/sum_exp).sum(axis=0)
    
    #compute sj * sk in each simulation
    
    #compute (1-sj)sj in each simulation
    
    return sim_shares.sum(axis=0)


#deriv = cal_price_deriv(data, v, delta, theta1, theta2)
#print deriv.mean(), deriv.min(), deriv.max(), deriv.shape

In [45]:
def cal_price_deriv(data, v, delta, theta1, theta2):
    
    alpha = theta1[-1]
    
    sim_shares = cal_sim_s(data, v, delta, theta2)
    cross_deriv = np.zeros((NOBS,NOBS))
    for sim_share in sim_shares:
        sim_share = sim_share.reshape((NOBS,1))
        cross_deriv = cross_deriv + sim_share.dot(sim_share.transpose())
        
    own = np.identity(NOBS)
    cross = (1 - own)
    own_deriv  = -(1-sim_shares) * sim_shares
    own_deriv = own_deriv.sum(axis=0)
    
    share = np.array(data['Inside Good Share'])
    price = np.array([data['Premium']]).transpose()
    
    share.dot(1/price)
    
    sim_deriv = share.dot(1/price) * 1./(NSIM*NSIM) * (cross_deriv*cross +own_deriv*own )
    return sim_deriv

deriv = cal_price_deriv(data, v, delta, theta1, theta2)

print deriv

[[-1.44434335  0.53505077  0.47741104 ...  0.13903751  0.23267281
   0.21160912]
 [ 0.53505077 -1.91813266  0.72645931 ...  0.21158369  0.35470662
   0.32201726]
 [ 0.47741104  0.72645931 -1.82411033 ...  0.34877944  0.2056258
   0.53170395]
 ...
 [ 0.13903751  0.21158369  0.34877944 ... -0.79708811  0.07119705
   0.12396949]
 [ 0.23267281  0.35470662  0.2056258  ...  0.07119705 -0.89393626
   0.10825663]
 [ 0.21160912  0.32201726  0.53170395 ...  0.12396949  0.10825663
  -1.14909503]]


In [95]:
#need to think about getting average elasticity

def comp_avg_elasticity(data, v, delta, theta1, theta2):
    elast = cal_price_deriv(data, v, delta, theta1, theta2)
    
    #who cares about run time?
    avg_elasticity = np.zeros((NPLANS,NPLANS))
    
    same_mkt = np.array([data['Market_ID']],dtype=np.float32 )
    same_mkt = (same_mkt.transpose()).dot( 1/same_mkt)
    same_mkt = np.equal(same_mkt,np.ones((NOBS,NOBS)) )
    same_mkt = same_mkt.astype(np.float32)
    
    elast = elast*same_mkt
    
    
    plan_mkt = np.array(data[['Plan_ID']])
    for plan_j in range(1,NPLANS+1):
        for plan_k in range(1,NPLANS+1):
            data_k = data[ (data['Plan_ID'] == plan_k) ].index.values
            data_j = data[ (data['Plan_ID'] == plan_j)].index.values
            if len(data_k) >0 and len(data_j) > 0:
                lenobs = 0
                for ind_j in data_j:
                    for ind_k in data_k:
                        avg_elasticity[plan_j-1][plan_k-1] = (avg_elasticity[plan_j-1][plan_k-1]
                                                               + elast[ind_j][ind_k]) 
                        lenobs = lenobs + same_mkt[ind_j][ind_k]
                avg_elasticity[plan_j-1][plan_k-1] =  (avg_elasticity[plan_j-1][plan_k-1]
                                                        /lenobs)
                
    return avg_elasticity
                
avg_elasticity = comp_avg_elasticity(data, v, delta, theta1, theta2)
np.savetxt("elasticity.csv", avg_elasticity, delimiter=",")
print avg_elasticity

[[-1.49732294  0.12728626  0.41411709  0.3095944   0.1547665   0.19754667
   0.32067288  0.21322011  0.32678837  0.12193248  0.16520995  0.37841886
   0.26466345  0.17790976  0.3734235   0.16959571]
 [ 0.12728626 -0.97735135  0.16983297  0.12965351  0.14514568  0.18799143
   0.15438375  0.21077964  0.12676017  0.11262117  0.16367497  0.14683126
   0.12068152  0.1752993   0.16399192  0.17361678]
 [ 0.41411709  0.16983297 -1.57401919  0.31138511  0.16701607  0.21493282
   0.35596671  0.19919582  0.34985842  0.12428661  0.17683102  0.37231915
   0.35281195  0.18890545  0.39982836  0.20715252]
 [ 0.3095944   0.12965351  0.31138511 -1.29677679  0.13771118  0.19184232
   0.33664351  0.16112531  0.31228311  0.10450438  0.1511304   0.33852241
   0.23736973  0.14399064  0.3507113   0.15561161]
 [ 0.1547665   0.14514568  0.16701607  0.13771118 -1.01576912  0.1846128
   0.15649563  0.17806099  0.12636066  0.12903282  0.13680736  0.14563459
   0.13895067  0.19364788  0.17484246  0.20545552]
 [ 0.1

In [96]:
print avg_elasticity

[[-1.49732294  0.12728626  0.41411709  0.3095944   0.1547665   0.19754667
   0.32067288  0.21322011  0.32678837  0.12193248  0.16520995  0.37841886
   0.26466345  0.17790976  0.3734235   0.16959571]
 [ 0.12728626 -0.97735135  0.16983297  0.12965351  0.14514568  0.18799143
   0.15438375  0.21077964  0.12676017  0.11262117  0.16367497  0.14683126
   0.12068152  0.1752993   0.16399192  0.17361678]
 [ 0.41411709  0.16983297 -1.57401919  0.31138511  0.16701607  0.21493282
   0.35596671  0.19919582  0.34985842  0.12428661  0.17683102  0.37231915
   0.35281195  0.18890545  0.39982836  0.20715252]
 [ 0.3095944   0.12965351  0.31138511 -1.29677679  0.13771118  0.19184232
   0.33664351  0.16112531  0.31228311  0.10450438  0.1511304   0.33852241
   0.23736973  0.14399064  0.3507113   0.15561161]
 [ 0.1547665   0.14514568  0.16701607  0.13771118 -1.01576912  0.1846128
   0.15649563  0.17806099  0.12636066  0.12903282  0.13680736  0.14563459
   0.13895067  0.19364788  0.17484246  0.20545552]
 [ 0.1

# Part 3 - Marginal Costs

# Part 4 - Counterfactuals

In [None]:
#will need to compute the change in delta based on the difference in old and new prices
#multiply by alpha
#i.e. delta_new = delta_old - alpha*(p_new - p_old)
#cal_s(data, v, delta, theta2)