In [1]:
import pandas as pd
import math
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM
from statsmodels.base.model import GenericLikelihoodModel

In [2]:
def comp_outside_good(data,name):
    #pre-processing to calculate outside good shares
    shares = data[['Market_ID',name]].copy()

    group_shares = shares.groupby('Market_ID').sum()
    group_shares['Outside Good Share'] = 1 - group_shares[name]

    data = pd.merge(data,group_shares[['Outside Good Share']], 
                right_index=True, left_on = 'Market_ID')
    return data


data = pd.read_csv('data.csv')
data = comp_outside_good(data,'Inside Good Share')

In [3]:
#calculate hausmann insturments
mkt_dum = pd.get_dummies(data['Market_ID'],prefix='mkt',drop_first=True)
plan_dum = pd.get_dummies(data['Plan_ID'],prefix='plan',drop_first=True)
hausman_instr = plan_dum

#set up x and y
y = data[['Inside Good Share','Outside Good Share']]
x =  data[['Network Score','Satisfaction Score','PPO','Premium']]

# Calculating $\delta$

In [101]:
#set up useful global variables 
NMKTS = data['Market_ID'].nunique()
NPLANS = data['Plan_ID'].nunique()
NOBS = data['Plan_ID'].count()
NSIM = 20


#set up variables for testing purposes
v = np.random.normal(size=(NSIM,3,NOBS)) #initilize v
theta2 = np.array([3,2,1]) # initialize theta2 for testing purposes
delta = np.ones(NOBS)*(-2)

#print global variables
print NMKTS,NPLANS,NOBS

600 16 3300


In [105]:
def cal_s(data, delta, theta2, v):
    """Calculate market share
    Calculates individual choice probability first, then take sum"""
    
    #copy x and delta for simulations using tiling
    x =  np.array(data.copy()[['Network Score','Satisfaction Score','PPO']]).transpose()
    x  = np.tile(x,(NSIM,1,1))
    theta2 = np.tile( np.array([theta2]).transpose()  ,(NSIM,1,3300))
    delta  = np.tile( delta  ,(NSIM,1))
    
    #add to calcualte market shares
    sim_exp = pd.DataFrame( np.exp(delta + (theta2*v*x).sum(axis=1)).transpose() ) 
    
    #sum up between markets
    sim_exp['mkt_id'] = data['Market_ID']
    sum_exp = sim_exp.groupby('mkt_id').sum() 
    sum_exp = pd.merge(data.copy()[['Market_ID']], sum_exp, 
                       right_index=True, left_on = 'Market_ID')
    
    #format so I can broadcast
    sim_exp = np.array(sim_exp).transpose()[:-1]
    sum_exp = np.array(sum_exp).transpose()[1:] + 1
    
    shares = (1./NSIM)*(sim_exp/sum_exp).sum(axis=0)
    return shares


s = cal_s(data, delta, theta2, v)

print s.mean(), s.min(), s.max(), s.shape

(3300,)
0.14130278997238038 0.00512337974030539 0.5121225474022729 (3300,)


In [None]:
def cal_delta(v, x, sigma, error = 1e-2):
    """Calculate mean utility via contraction mapping"""

    niter = 0

    mu cal_mu(v, x, sigma)

    while (abs(diff).max() > error) and (abs(diff).mean() > error):
        s = cal_s(delta, mu)
        diff = ln_s_jt - np.log(s)

        if np.isnan(diff).sum():
            
            raise Exception('nan in diffs')
        delta += diff
        niter += 1

    print('contraction mapping finished in %s iterations'%niter))

    return delta

In [None]:
def _cal_theta1_and_xi(delta):
    """Calculate theta1 and xi with F.O.C"""
    X1, Z, Z_X1, LinvW = self.X1, self.Z, self.Z_X1, self.LinvW
        
    # Z'δ
    Z_delta = Z.T @ delta.flatten()

    #\[ \theta_1 = (\tilde{X}'ZW^{-1}Z'\tilde{X})^{-1}\tilde{X}'ZW^{-1}Z'\delta \]
    # θ1 from FOC
    theta1 = self.theta1 = solve(Z_X1.T @ cho_solve(LinvW, Z_X1),
                             Z_X1.T @ cho_solve(LinvW, Z_delta))

    xi = delta.flatten() - X1 @ theta1

return theta1, xi

# Calculating $\theta_1$ , $\theta_2$

$\theta_1$ = $(\alpha, \beta)$ and $\theta_2$ = $\Sigma$ as in Nevo

As per Nevo 2000, I must solve for $\theta_1$ as function of $\theta_2$. This way I can do GMM only over $\theta_2$, the non-linear parameters. 