In [1]:
import pandas as pd
import numpy as np
import numpy.matlib
import matplotlib.pyplot as plt
import pdb
import scipy
from scipy.optimize import minimize
from scipy.stats import multivariate_normal
from scipy.optimize import basinhopping
from tqdm.notebook import tqdm
import math
from utils import *

In [2]:
""" 
Obtaining data from a given expt
"""
csv_test = pd.read_csv('../auditory_categorization_longLow/important_things_not_included_in_assets/allTrials.csv');
csv_data = pd.read_csv('auditory_categorization_lc_online_data/auditory_categorization_v2_119865_2021-06-11_19h04.17_d1d63a32-cae7-11eb-abb1-ac1f6b405aea/5fad0bb914cb0035f917d619_categorization_task_longLow_2021-03-29_23h07.31.656.csv');
                       

In [3]:
n_tones = 3
n_trials = csv_data.shape[0]-47

"""
Get tones and values of keys pressed
"""
test_columns = list(csv_test.columns)
test_tones_name = test_columns.index('Name')
test_tones_col_idx = test_columns.index('Tones')
df_names = (csv_test.iloc[0:800,test_tones_name]).values
df_tones = (csv_test.iloc[0:800,test_tones_col_idx]).values

tones_array_orig = np.zeros((n_trials,n_tones))
tones_array_idxs_keep = []

for i_wav in range(804):
    if isinstance(csv_data['Name'][i_wav+46],str):
        tones_array_orig[i_wav,:] = np.array(df_tones[np.where(csv_data['Name'][i_wav+46]\
                                                          ==df_names)[0]][0][1:-1].split(',')).astype(float)  
        tones_array_idxs_keep += [i_wav]
        
df_tones = np.copy(tones_array_orig[tones_array_idxs_keep,:])
df_corrans = np.copy(csv_data['corrAns'][46:csv_data.shape[0]])[tones_array_idxs_keep]
df_keys = np.copy(csv_data['test_resp.keys'][46:csv_data.shape[0]])[tones_array_idxs_keep]

In [4]:
"""
Find no response cases in the expt
"""
no_response = np.intersect1d(np.where(df_keys!='h')[0],np.where(df_keys!='l')[0])
print("Did not respond to: ",no_response)

"""
Convert keys ['l','h'] to [0,1] and plot p(H|T)
"""
corrans_num_orig = np.zeros_like(df_corrans)
corrans_num_orig[df_corrans == 'h'] = 1

keys_num_orig = np.zeros_like(df_keys)
keys_num_orig[df_keys == 'h'] = 1

corrans_num = corrans_num_orig[:800]
keys_num = keys_num_orig[:800]
tones_array = df_tones[:800]
print("Got correct: ", np.sum(keys_num==corrans_num)/len(tones_array))
print("Got high correct: ", np.sum((keys_num)*(corrans_num))/np.sum(corrans_num))
print("Got low correct: ", np.sum((1-keys_num)*(1-corrans_num))/np.sum(1-corrans_num))

"""
Subsample the long context expt with a high category bias
"""
def subsampleLongContext(trial_behaviour_full, corrans_full, trial_tones_full):
    idxHigh = np.arange(len(trial_behaviour_full[0:]))[corrans_full[0:]==1]
    idxLow = np.arange(len(trial_behaviour_full[0:]))[corrans_full[0:]==0]
    idxOfSmallerCategory = np.random.choice(idxLow,size=len(idxHigh),replace=False)
    idxToKeep = np.concatenate((idxHigh, idxOfSmallerCategory))
    corrans_expt = corrans_full[0:][idxToKeep]
    trial_behaviour_expt = trial_behaviour_full[0:][idxToKeep]
    trial_tones_expt = trial_tones_full[0:][idxToKeep,:]
    #print("Got correct: ", np.sum(trial_behaviour_expthc==corrans_expthc)/len(trial_tones_expthc))
    return trial_tones_expt, trial_behaviour_expt

Did not respond to:  []
Got correct:  0.82125
Got high correct:  0.6779661016949152
Got low correct:  0.8812056737588653


In [5]:
# this has been changed to check how values change with observer responses

expt_tones = np.arange(90,3000,1) #array of possible true tones
log_freq_seq_array = np.arange(0.6,4.7,0.1)
log_freq_percept = np.arange(0.6,4.7,0.1) # array of possible perceptual tones

idxs_with_response = np.delete(np.arange(len(tones_array)),no_response)
trialTones = tones_array[idxs_with_response,:]
trialBehaviour = keys_num[idxs_with_response]
corrAns = corrans_num[idxs_with_response]


In [None]:
"""
New optimization algorithm: uses scipy.optimize.fmin. 
Crude grid initially and then find minimum using the function.
"""

guess_low_mean = np.array([2.55]); guess_high_mean = np.array([2.85]); 
guess_sigma = np.arange(0.05,1,0.15); guess_sensory_sigma = np.array([0.24]);
guess_p_back = np.arange(0.05,1,0.1); guess_p_low = np.arange(0.1,1.05,0.1);

# Constraining guesses of means of low and high distributions based on observed behaviour in the entire dataset.
for iSubsampled in range(4):
    trialTones_subsampled, trialBehaviour_subsampled = subsampleLongContext(trial_behaviour_full=trialBehaviour, 
                                                                            corrans_full=corrAns,
                                                                            trial_tones_full=trialTones)
    print(trialTones_subsampled.shape, trialBehaviour_subsampled.shape)
    neg_ll_array = np.zeros((len(guess_low_mean), len(guess_high_mean), len(guess_sigma), 
                             len(guess_sensory_sigma), len(guess_p_back), len(guess_p_low)))
    for lm in range(len(guess_low_mean)):
        for hm in range(len(guess_high_mean)):
            for s in tqdm(range(len(guess_sigma))):
                for ss in range(len(guess_sensory_sigma)):
                    for pb in tqdm(range(len(guess_p_back)), leave=False, desc="prob back"):
                        for pl in range(len(guess_p_low)):
                            params = [guess_low_mean[lm], guess_high_mean[hm], guess_sigma[s], 
                                      guess_sensory_sigma[ss], guess_p_back[pb], guess_p_low[pl]]
                            # print(lm, hm, pb)
                            neg_ll_array[lm,hm,s,ss,pb,pl] = MLE(params, 
                                                                    trial_tones=trialTones_subsampled,
                                                                    trial_behaviour=trialBehaviour_subsampled)

                    
    """
    Means and p_back corresponding to the least negative log likelihood value
    """
    idxs = np.where(neg_ll_array == np.amin(neg_ll_array)) 
    best_thetas = np.array([guess_low_mean[idxs[0]], guess_high_mean[idxs[1]], guess_sigma[idxs[2]], \
                            guess_sensory_sigma[idxs[3]], guess_p_back[idxs[4]], guess_p_low[idxs[5]]])

    print(best_thetas)

In [None]:
# define mle function
def MLE_fmin(params):
    log_prior_low_mean, log_prior_high_mean, log_prior_sigma, sigma_sensory, prob_back, prob_low = \
    params[0], params[1], params[2], params[3], params[4], params[5] # inputs are guesses at our parameters  
    
    _,_,LikelihoodLatentTonegivenHigh,LikelihoodLatentTonegivenLow,_,_ = \
    posterior_array(log_freq_seq_array, len(trial_tones[0]), p_back=prob_back, p_low=prob_low,\
                    log_prior=[log_prior_low_mean,log_prior_high_mean,log_prior_sigma])

    LikelihoodPerceptgivenHigh = np.zeros((len(log_freq_percept),len(log_freq_percept),len(log_freq_percept)))
    LikelihoodPerceptgivenLow = np.zeros((len(log_freq_percept),len(log_freq_percept),len(log_freq_percept)))
    
    for itrue1 in range(len(log_freq_seq_array)):
        for itrue2 in range(len(log_freq_seq_array)):
            for itrue3 in range(len(log_freq_seq_array)):
                probPerceptgivenLatentTones = Tones3dgrid([log_freq_seq_array[itrue1],\
                                                           log_freq_seq_array[itrue2],\
                                                           log_freq_seq_array[itrue3]],sigma=sigma_sensory)
                LikelihoodPerceptgivenHigh \
                += probPerceptgivenLatentTones * LikelihoodLatentTonegivenHigh[itrue1,itrue2,itrue3]
                LikelihoodPerceptgivenLow \
                += probPerceptgivenLatentTones * LikelihoodLatentTonegivenLow[itrue1,itrue2,itrue3]
    probHighgivenPercept = LikelihoodPerceptgivenHigh*(1-prob_low)/\
    (LikelihoodPerceptgivenHigh*(1-prob_low) + LikelihoodPerceptgivenLow*(prob_low))
        
    neg_ll = 0; 
    probability_high = np.zeros((len(trial_tones),1))
    for i_trial in range(len(trial_tones)):
        input_array_mat = Tones3dgrid(np.array([np.log10(trial_tones[i_trial][0]),\
                                               np.log10(trial_tones[i_trial][1]),
                                               np.log10(trial_tones[i_trial][2])]),sigma=sigma_sensory)
        probability_high0 = np.sum(np.multiply(probHighgivenPercept>0.5,input_array_mat))
        probability_high[i_trial] = np.sum(np.multiply(probHighgivenPercept>0.5,input_array_mat))
            
        if trial_behaviour[i_trial]:
            if np.isnan(np.log(probability_high0 + 0.0000001)) \
            or np.isinf(np.log(probability_high0 + 0.0000001)) \
            or np.isnan(np.log(1-probability_high0 + 0.0000001)) \
            or np.isinf(np.log(1-probability_high0 + 0.0000001)):
                pdb.set_trace()
            neg_ll += -np.log(probability_high0 + 0.0000001) # if high dist is chosen by observer
        else:
            neg_ll += -np.log(1 - probability_high0 + 0.0000001) # if low dist is chosen by observer
    print(params, neg_ll)        
    return(neg_ll)

minimum_nll = scipy.optimize.fmin(MLE_fmin, [2.55,2.85,0.05,0.15,0.65,0.65] , maxiter=10000, maxfun=10000, 
                                  xtol=0.01, ftol=0.01)

print(minimum_nll)

In [None]:

    
unique_tones = np.unique(trial_tones)
tone1_prob_behaviour = np.zeros((len(unique_tones)))
tone2_prob_behaviour = np.zeros((len(unique_tones)))
tone3_prob_behaviour = np.zeros((len(unique_tones)))

for i_tone in range(len(unique_tones)):
    tone1_prob_behaviour[i_tone] = np.mean(trial_behaviour[trial_tones[:,0]\
                                                       ==unique_tones[i_tone]])
    tone2_prob_behaviour[i_tone] = np.mean(trial_behaviour[trial_tones[:,1]\
                                                       ==unique_tones[i_tone]])
    tone3_prob_behaviour[i_tone] = np.mean(trial_behaviour[trial_tones[:,2]\
                                                       ==unique_tones[i_tone]])
influence1, = plt.plot(np.log10(unique_tones), tone1_prob_behaviour, label = 'Influence of Tone 1')
influence2, = plt.plot(np.log10(unique_tones), tone2_prob_behaviour, label = 'Influence of Tone 2')
influence3, = plt.plot(np.log10(unique_tones), tone3_prob_behaviour, label = 'Influence of Tone 3')
influence, = plt.plot(np.log10(unique_tones), (tone1_prob_behaviour+tone2_prob_behaviour+tone3_prob_behaviour)/3,
                       'k', label = 'Average Influence')

_, probability_high = MLE([2.55,2.85,0.05,0.15,0.45,0.6])

tone1_prob_behaviour = np.zeros((len(unique_tones)))
tone2_prob_behaviour = np.zeros((len(unique_tones)))
tone3_prob_behaviour = np.zeros((len(unique_tones)))

for i_tone in range(len(unique_tones)):
    tone1_prob_behaviour[i_tone] = np.mean(probability_high[trial_tones[:,0]\
                                                       ==unique_tones[i_tone]])
    tone2_prob_behaviour[i_tone] = np.mean(probability_high[trial_tones[:,1]\
                                                       ==unique_tones[i_tone]])
    tone3_prob_behaviour[i_tone] = np.mean(probability_high[trial_tones[:,2]\
                                                       ==unique_tones[i_tone]])
mnll_influence, = plt.plot(np.log10(unique_tones),
                           (tone1_prob_behaviour+tone2_prob_behaviour+tone3_prob_behaviour)/3,'k.',
                          label = 'p(B_H|T) given fmin parameters')

plt.xlim([1.9,3.6])
plt.ylim([-0.2,1.1])
plt.xlabel('log10(Tones)')
plt.ylabel('p(B_H|T)')
# plt.savefig('figures/long_context/experimenter=nate_categorization_task_long_2021-01-18_16h48.56.916.png')