In [4]:
import pandas as pd
import math
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM

In [5]:
def comp_outside_good(data,name):
    #pre-processing to calculate outside good shares
    shares = data[['Market_ID',name]].copy()

    group_shares = shares.groupby('Market_ID').sum()
    group_shares['Outside Good Share'] = 1 - group_shares[name]

    data = pd.merge(data,group_shares[['Outside Good Share']], 
                right_index=True, left_on = 'Market_ID')
    return data

data = pd.read_csv('data.csv')
data = comp_outside_good(data,'Inside Good Share')

In [6]:
#calculate hausmann insturments
mkt_dum = pd.get_dummies(data['Market_ID'],prefix='mkt',drop_first=True)
plan_dum = pd.get_dummies(data['Plan_ID'],prefix='plan',drop_first=True)
hausman_instr = plan_dum

#set up x and y
y = data[['Inside Good Share','Outside Good Share']]
x =  data[['Network Score','Satisfaction Score','PPO','Premium']]

# Calculating $\delta$, $\theta_1$, $\xi_{jt}$

Where $\theta_1$ = $(\alpha,\beta)$

In [7]:
#set up useful global variables 
NMKTS = data['Market_ID'].nunique()
NPLANS = data['Plan_ID'].nunique()
NOBS = data['Plan_ID'].count()
NSIM = 20


#set up variables for testing purposes
v = np.random.normal(size=(NSIM,3,NOBS)) #initilize v
theta2 = np.array([3,2,1]) # initialize theta2 for testing purposes
delta = np.ones(NOBS)*(-2)

#print global variables
print NMKTS,NPLANS,NOBS

600 16 3300


In [8]:
def cal_s(data, delta, theta2, v):
    """Calculate market share
    Calculates individual choice probability first, then take sum"""
    
    #copy x and delta for simulations using tiling
    x =  np.array(data.copy()[['Network Score','Satisfaction Score','PPO']]).transpose()
    x  = np.tile(x,(NSIM,1,1))
    theta2 = np.tile( np.array([theta2]).transpose()  ,(NSIM,1,3300))
    delta  = np.tile( delta  ,(NSIM,1))
    
    #add to calcualte market shares
    sim_exp = pd.DataFrame( np.exp(delta + (theta2*v*x).sum(axis=1)).transpose() ) 
    
    #sum up between markets
    sim_exp['mkt_id'] = data['Market_ID']
    sum_exp = sim_exp.groupby('mkt_id').sum() 
    sum_exp = pd.merge(data.copy()[['Market_ID']], sum_exp, 
                       right_index=True, left_on = 'Market_ID')
    
    #format so I can broadcast
    sim_exp = np.array(sim_exp).transpose()[:-1]
    sum_exp = np.array(sum_exp).transpose()[1:] + 1
    
    shares = (1./NSIM)*(sim_exp/sum_exp).sum(axis=0)
    return shares


s = cal_s(data, delta, theta2, v)

print s.mean(), s.min(), s.max(), s.shape

0.14153897501497079 0.007304715780626936 0.41106840513086723 (3300,)


In [31]:
def cal_delta(v, data, theta2, error = 1e-3, maxiter = 500):
    """Calculate mean utility via contraction mapping"""

    niter = 0
    
    #initialize loop parameters
    delta = np.zeros(NOBS)
    s = cal_s(data, delta, theta2, v)
    diff = np.log(data['Inside Good Share']) - np.log(s)
    
    
    while ((abs(diff).max() > 1e-6) #this is easier to converge
           and (abs(diff).mean() > error) 
           and niter < maxiter):
        
        s = cal_s(data, delta, theta2, v)
        diff = np.log(data['Inside Good Share']) - np.log(s)

        if np.isnan(diff).sum():
            raise Exception('nan in diffs')
            
        delta += diff
        niter += 1

    print 'contraction mapping finished in %s iterations'%niter
    return delta


delta = cal_delta(v, data, theta2)
print delta.shape
print delta.mean()

contraction mapping finished in 60 iterations
(3300,)
-1.38897021979


In [53]:
#initialize theta1 i.e. alpha and beta
theta1 = np.array([3,2,1,-2])

def cal_xi(x, delta, theta1):
    """Calculate xi with F.O.C"""
    xi = delta - np.matmul(np.array(x),theta1)
    return  xi


xi = cal_xi( x, delta,  theta1)
print xi.max(), xi.min(), xi.mean()

2.3413866387945883 -5.694802053604215 -1.2500750993


# Calculating  $\theta_2$


Use GMM over $\theta_2$, the non-linear parameters. 

In [43]:
def cal_theta1(X, Z, T, delta):
    """  calculate theta 1 using FOCs (X1'Z T Z'X )^-1 X1'Z T Z' delta """
    
    #build up to main equation
    XtZ = X.transpose().dot(Z)
    ZtX = Z.transpose().dot(X)
        
    first_exp = np.linalg.inv( XtZ.dot(T).dot(ZtX))
    second_exp = XtZ.dot(T).dot(Z.transpose()).dot(delta)
    theta1 = first_exp.dot(second_exp)
      
    return theta1


Z = np.array(hausman_instr)
T =  np.linalg.inv( Z.transpose().dot(Z) )
print cal_theta1(x, Z ,T ,delta)

[ 4.87267722  3.41441127  1.46799071 -3.7460133 ]


In [58]:
def gmm_objective(theta2_init, v, data, x, Z, T):
    delta = cal_delta(v, data, theta2_init)
    theta1 = cal_theta1(x, Z ,T, delta)
    xi = cal_xi( x, delta,  theta1)
    return xi.dot(Z).dot(T).dot(Z.transpose()).dot(xi)
    #Z'ww'Z of theta2


def calc_theta2(theta2, x, Z, T, delta):
    theta2 = minimize(gmm_objective, theta2_init, args=(v, data, x, Z, T))

print gmm_objective(np.array([1,3,1]),v, data,x,Z,T)

print calc_theta2(np.array([1,3,1]), x, Z, T, delta)

contraction mapping finished in 54 iterations
10.51735926026008
contraction mapping finished in 60 iterations
contraction mapping finished in 60 iterations
contraction mapping finished in 60 iterations
contraction mapping finished in 60 iterations
contraction mapping finished in 60 iterations
contraction mapping finished in 49 iterations
contraction mapping finished in 49 iterations
contraction mapping finished in 49 iterations
contraction mapping finished in 49 iterations
contraction mapping finished in 49 iterations
contraction mapping finished in 18 iterations
contraction mapping finished in 18 iterations
contraction mapping finished in 18 iterations
contraction mapping finished in 18 iterations
contraction mapping finished in 18 iterations
contraction mapping finished in 17 iterations
contraction mapping finished in 17 iterations
contraction mapping finished in 17 iterations
contraction mapping finished in 17 iterations
contraction mapping finished in 17 iterations
contraction mapp

KeyboardInterrupt: 

In [34]:
def calc_theta( data, v, x, Z, theta1_init= theta1, theta2_init=theta2):
    delta = cal_delta(v, data, theta2_init)
    
    for i in range(2):
        
        X1 , Z = np.array(x), np.array(Z) 
        
        #on first step, use consistent approximation of T
        if i==0:
            
            
        #on second step use estimated T using xi
        if i==1:
            xi = cal_xi(x, delta, theta1, theta2)
            xi =np.array([xi]).transpose()
            T =  np.linalg.inv( Z.transpose().dot(xi).dot(xi.transpose()).dot(Z) )
    
    
    delta = cal_delta(v, data, theta2)
    xi = cal_xi(x, delta, theta1, theta2)
    theta1 = cal_theta1(x, z, delta, theta1_init, theta2_init)
    calc_theta2(theta1, theta2_init, data, v )
    
    
    return theta1, theta2


calc_theta2( data, v, theta1_init = theta1, theta2_init=theta2)

contraction mapping finished in 60 iterations


NameError: global name 'z' is not defined