In [90]:
import numpy as np
import pandas as pd
from scipy.stats import norm, chi2

In [None]:
# #preallocate image matrices for choices
# #This pertains to estimating covariance matrices of the error differences
# #See Train book on discrete choice analysis p 113
# #"This matrix can be used to transform the covariance matrix of
# #errors into the covariance matrix of error differences: ~Ωi = MiΩMi.T .
# temp = np.identity(Jm-1)
# M = np.empty((Jm, Jm-1, 12))
# for i in range(1, Jm+1):
#     M[i-1] = np.concatenate((temp[:,0:i-1], -1*np.ones((Jm-1,1)), temp[:, i-1:]), axis=1)

# #Matrices for only the chosen options
# Mi=M[y-1]



In [2]:
def DivisiveNormalization(theta, data):
    denom = theta[0] + np.multiply(theta[1], np.linalg.norm(data, theta[2], 1))
    v=np.divide(data.T, denom)
    
    return v

In [3]:
def calcPiProbitQuad(Mi, v):
    
    MiT=np.transpose(Mi, axes=(0,2,1))
    T=v.shape[0]
    [x, w] = np.polynomial.hermite.hermgauss(100)

    #I honestly don't really know how tensordot works, but these lines of code return the correct values
    c = np.tensordot(MiT,v, axes=([1,0]))
    cT=np.transpose(c, axes=(0,2,1))
    vi = cT.diagonal() #This matches vi in MATLAB for s=1, trials 8,10,14
    
    #first part of equation in ProbaChoice.m, line 242
    z1=np.multiply(-2**0.5, vi)

    #second part of equation in ProbaChoice.m, line 242
    z2=np.multiply(-2**0.5, x)

    #These values have been validated
    zz = [z1-ele for ele in z2]

    aa=np.prod(norm.cdf(zz), axis=1)
    #Pi have been validated
    Pi=np.divide(np.sum(np.multiply(w.reshape(100,1), aa), axis=0), np.pi**0.5)
    
    return Pi
    

In [25]:
def calcPiAll(theta, data):
    
    v=DivisiveNormalization(theta, data)
    
    probs = np.empty(data.shape)
    #get the size of the choice array. Choice arrays must be the same size
    Jm=data.shape[1]
    temp = np.identity(Jm-1)
    M = np.empty((Jm, Jm-1, Jm))


    for i in range(Jm):
        M[i] = np.concatenate((temp[:,0:i], -1*np.ones((Jm-1,1)), temp[:, i:]), axis=1)
    
    for i in range(Jm):
        y=np.array([i]*data.shape[0])
        #print(y)
        
        #Matrices for only the chosen options
        Mi=M[y]
        #print(Mi)
        
        pi=calcPiProbitQuad(Mi,v)
        probs[:,i]=pi.T
        #print("pi is {}".format(pi))
    return probs

In [33]:
# ###CONFIRMATORY ANALYSIS TO TEST MATCH WITH WEBB DATA
# #create a vector of the WTP values. These values are from data(1).X, cells 8, 10, and 14
# d = np.array([
#              [4, 2.33, 1.875, 1.8, 1.5, 1.495, 1.335, 1.275, 1.125, 1.09, 1, 0.925],
#              [2.125, 2.125, 2.025, 2.0, 1.875, 1.495, 1.485, 1.335, 1.275, 1.075, 1.0, 0.625],
#              [4.0, 2.17,  2.0, 2.0, 1.875, 1.875, 1.5, 1.485, 1.335, 1.275, 1.09, 1.075],
#              ])

# # #These are the chosen options for s=1, on trials 8, 10,14
# y=np.array([4,3,2])

# # #Choice set size for trials 8,10,14 for subject 1
# Jm=12

# # #sigma, omega(w), beta
# theta = [0.0000, 0.2376, 0.9739]
# temp = np.identity(Jm-1)
# M = np.empty((Jm, Jm-1, 12))
# for i in range(1, Jm+1):
#     M[i-1] = np.concatenate((temp[:,0:i-1], -1*np.ones((Jm-1,1)), temp[:, i-1:]), axis=1)

# #Matrices for only the chosen options
# Mi=M[y-1]

# #This result has been spot checked against the values returned by the MATLAB code for data(1).X, cells 8, 10, and 14 
# v=DivisiveNormalization(theta=theta, data=d)
# pi=calcPiProbitQuad(Mi,v)

# print("probs for chosen options only: {}".format(pi))
# print(pi) 
# #[0.08327671 0.10499576 0.09305649]

# probs=calcPiAll(theta=theta,data=d)
# print("probs for all options: {}".format(probs))

probs for chosen options only: [0.08327671 0.10499576 0.09305649]
[0.08327671 0.10499576 0.09305649]
probs for all only: [[0.22598222 0.10790416 0.08645194 0.08327671 0.07151711 0.0713334
  0.06565703 0.06362727 0.05877966 0.05769393 0.05497855 0.05279801]
 [0.11051148 0.11051148 0.10499576 0.10365229 0.09714261 0.07936819
  0.07893925 0.07273064 0.0703627  0.06292494 0.06030962 0.04855103]
 [0.19882952 0.09305649 0.08619875 0.08619875 0.08142488 0.08142488
  0.06838878 0.06790556 0.06322691 0.06143192 0.0561619  0.05575166]]


# Simulation of user choices
We simulate 100,000 trials of each of the 3 choice sets and use the values yielded by the `DivisiveNormalization` method + a random noise vector and check that the choice probabilities are roughly in line with the analytic probabilities from `calcPiProbitQuad`.

In [None]:
#def chose_item_dn(d, num_it=1000, theta = [0.0000, 0.2376, 0.9739])
freq_chosen = np.array([0., 0., 0.])
num_it = 100000
v = DivisiveNormalization(theta=theta, data=d)
# the following covariance matrix has the structure
# [ 1    0.5    ...    0.5 ]
# [ 0.5    1    ...    0.5 ]
# [ 0.5   ...    1    0.5  ]
# [ 0.5   0.5   ...    1   ]
cov = np.ones((12, 12)) * 0.5
cov[np.arange(12), np.arange(12)] = 1
mean = np.zeros(12)
for i in range(num_it):
    eps = np.random.multivariate_normal(mean, cov, size=3).T
    u = v + eps
    item_chosen = (u.argmax(axis=0) == (y-1)).astype(float)
    freq_chosen += item_chosen / num_it
    
print(freq_chosen)

###Steps to Computing a Power Analysis Given an Experimental Design and value of theta
1. Read in scores into correct np array format
2. Chose the item given its normalized value 
3. Calculate the probability of the chosen item

In [35]:
#Load data from Bollen et al., 2010
choice = pd.read_csv('/Users/amywinecoff/Documents/CITP/Research/Github/AgentChoiceSim/co1_wide.csv')  

#for now, remove the conditions with 5 options so I can figure out the code for a fixed set size
choice = choice[~choice['condition'].isin(['Top5', 'Top5_NR'])]

score_cols = [c for c in choice.columns if 'score' in c]
choice_set_vals = choice[score_cols]
choice_set_vals.head()

Unnamed: 0,score1,score2,score3,score4,score5,score6,score7,score8,score9,score10,score11,score12,score13,score14,score15,score16,score17,score18,score19,score20
0,39,38,38,38,38,38.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0
1,42,42,42,42,41,38.0,37.0,36.0,35.0,35.0,34.0,33.0,33.0,33.0,32.0,32.0,31.0,31.0,30.0,30.0
2,41,40,40,40,40,39.0,39.0,38.0,38.0,38.0,37.0,37.0,37.0,37.0,36.0,36.0,36.0,36.0,35.0,35.0
3,36,36,35,35,35,34.0,33.0,33.0,33.0,33.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,31.0
5,41,40,40,40,39,39.0,39.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0


In [75]:
d = choice_set_vals.values
#sigma, omega(w), beta
#theta_h1 = [0.0000, 0.2376, 0.9739]
probs=calcPiAll(theta=t, data=data)

In [71]:
#def chose_item_dn(d, theta = [0.0000, 0.2376, 0.9739]):
def chose_item(theta, data):
    probs=calcPiAll(theta=t, data=data)
    num_subj = data.shape[0]
    Jm = data.shape[1]

    v = DivisiveNormalization(theta=theta, data=data)
    # the following covariance matrix has the structure
    # [ 1    0.5    ...    0.5 ]
    # [ 0.5    1    ...    0.5 ]
    # [ 0.5   ...    1    0.5  ]
    # [ 0.5   0.5   ...    1   ]


    cov = np.ones((Jm, Jm)) * 0.5
    cov[np.arange(Jm), np.arange(Jm)] = 1
    mean = np.zeros(Jm)
    #for i in range(num_it):
    eps = np.random.multivariate_normal(mean, cov, size=num_subj).T
    u = v + eps
    item_chosen = u.argmax(axis=0)
    
    return item_chosen


In [108]:
def calcModelLL(theta, data, null_theta=None):
    """Calculates the log likelikihood given theta values for a DN model. If a null model is being tested,
    it will chose the item based on the alternative model, then calculate the probability of that choice, and the 
    log-likelihood given both the alternative model and the null model
    """
    
    probs=calcPiAll(theta=theta, data=data)
    item_chosen = chose_item(theta=theta, data=data) 
    chosen_probs=probs[np.arange(len(probs)), item_chosen]
    LL = sum(np.log(chosen_probs))
    
    if null_theta:
        null_probs=calcPiAll(theta=null_theta, data=data)
        print ("probs = {}".format(probs))
        print("null probs = {}".format(null_probs))
        
        null_chosen_probs = null_probs[np.arange(len(null_probs)), item_chosen]
        null_LL = sum(np.log(null_chosen_probs))
    else:
        null_LL = None
    
    return LL, null_LL
    

In [112]:
d = np.array([
             [4, 2.33, 1.875, 1.8, 1.5, 1.495, 1.335, 1.275, 1.125, 1.09, 1, 0.925],              
             [2.125, 2.125, 2.025, 2.0, 1.875, 1.495, 1.485, 1.335, 1.275, 1.075, 1.0, 0.625],
             [4.0, 2.17,  2.0, 2.0, 1.875, 1.875, 1.5, 1.485, 1.335, 1.275, 1.09, 1.075],
            ])
#omega allowed to vary. Set to value in Webb et al., 2020
theta_h1 = [1.0, 0.117, 1.0]
#This is the null model that tests that omega != 0
theta_h0 = [theta_h1[0], 0, theta_h1[2]]

LLs = calcModelLL(theta=theta_h1, data=d, null_theta=theta_h0)
#LL_h1 = calcModelLL(theta=theta_h1, data=d)

print("LL for H0 model: {}".format(LLs[1]))
print("LL for H1 model: {}".format(LLs[0]))
#print(LL_h1)#-362.68216377703664
LR = 2*(LLs[0]-LLs[1])
print(LR)

probs = [[0.32985897 0.11449658 0.08209691 0.07756162 0.06143207 0.06118892
  0.05382024 0.05125535 0.04528739 0.04398234 0.04077111 0.0382485 ]
 [0.12380303 0.12380303 0.11493993 0.11280617 0.10261233 0.0761239
  0.07551136 0.06680006 0.0635569  0.05367533 0.05031713 0.03605083]
 [0.28918523 0.09370881 0.08321797 0.08321797 0.07613528 0.07613528
  0.05780435 0.05715537 0.05099314 0.04868912 0.04212757 0.04162992]]
null probs = [[9.22470487e-01 4.18771039e-02 1.21180406e-02 9.67530341e-03
  3.69071153e-03 3.62868785e-03 2.07640104e-03 1.67066330e-03
  9.51148813e-04 8.30564401e-04 5.81853928e-04 4.29034489e-04]
 [2.01554075e-01 2.01554075e-01 1.62888511e-01 1.54208737e-01
  1.16177500e-01 4.43032576e-02 4.30970980e-02 2.80738795e-02
  2.34664057e-02 1.24843355e-02 9.71954358e-03 2.47258265e-03]
 [9.08367381e-01 2.59686382e-02 1.61349974e-02 1.61349974e-02
  1.11460319e-02 1.11460319e-02 3.29765966e-03 3.12981537e-03
  1.82768064e-03 1.46205606e-03 7.13187583e-04 6.71522502e-04]]
LL for

In [106]:
d = np.array([
             [4, 2.33, 1.875, 1.8, 1.5, 1.495, 1.335, 1.275, 1.125, 1.09, 1, 0.925],              
             [2.125, 2.125, 2.025, 2.0, 1.875, 1.495, 1.485, 1.335, 1.275, 1.075, 1.0, 0.625],
             [4.0, 2.17,  2.0, 2.0, 1.875, 1.875, 1.5, 1.485, 1.335, 1.275, 1.09, 1.075],
            ])
#omega allowed to vary. Set to value in Webb et al., 2020
theta_h1 = [1.0, 0.117, 1.0]
#This is the null model that tests that omega != 0
theta_h0 = [theta_h1[0], 0, theta_h1[2]]

LL_h0 = calcModelLL(theta=theta_h0, data=d)
LL_h1 = calcModelLL(theta=theta_h1, data=d)

print("LL for H0 model: {}".format(LL_h0))
print("LL for H1 model: {}".format(LL_h1))
#print(LL_h1)#-362.68216377703664
LR = 2*(LL_h1-LL_h0)
print(LR)

LL for H0 model: -1.9914955662023088
LL for H1 model: -6.730723593084446
-9.478456053764274


In [104]:
LR = 2*(LL_h1-LL_h0)
print(LR)

-4.933738252024667


In [93]:
p=1 - chi2.cdf(LR, 1)

1.9731752898266564e-09

In [None]:
# save as Python
#!jupyter nbconvert --to script DivisiveNormalization.ipynb