In [1]:
import numpy as np
import pandas as pd
from sympy.solvers import solve
from sympy import Symbol
import scipy.stats as sstats

from utils import SimulateData

In [70]:
def generate_exgauss_sampler_from_fit(data,
                                      default_sample_size=100000):
    FIT_K, FIT_LOC, FIT_SCALE = sstats.exponnorm.fit(data)
    FIT_LAMBDA = 1/(FIT_K*FIT_SCALE)
    FIT_BETA = 1/FIT_LAMBDA

    def sample_exgauss(sample_size=default_sample_size,
                       beta=FIT_BETA, scale=FIT_SCALE, loc=FIT_LOC):
        exp_out = np.random.exponential(scale=beta, size=sample_size)
        norm_out = np.random.normal(scale=scale, size=sample_size)
        out = (exp_out+norm_out) + loc
        n_negatives = np.sum(out < 0)
        while n_negatives > 0:
            out[out < 0] = sample_exgauss(n_negatives, beta=beta, scale=scale, loc=loc)
            n_negatives = np.sum(out < 0)
        return out

    return sample_exgauss

In [71]:
sampler = generate_exgauss_sampler_from_fit(SSD0_RTs)

In [72]:
samples = sampler(1000000)

In [73]:
samples[samples < 0]

array([], dtype=float64)

In [57]:
samples[samples == 0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [56]:
len(samples)

1000000

In [10]:
# GET ABCD INFO
abcd_data = pd.read_csv('abcd_data/minimal_abcd_no_issue_3.csv')

SSDs = abcd_data.SSDDur.unique()
SSDs = [i for i in SSDs if i == i and i <= 550]
SSDs.sort()
acc_per_SSD = pd.DataFrame()
for ssd in SSDs:
    curr_means = abcd_data.query(
        "SSDDur == %s and correct_stop==0.0" % ssd
    ).groupby('NARGUID').mean()['choice_accuracy']
    curr_means.name = ssd
    acc_per_SSD = pd.concat([acc_per_SSD, curr_means], 1, sort=True)

go_accs = abcd_data.query(
        "trial_type == 'GoTrial' and correct_go_response in ['1.0', '0.0']"
    ).groupby('NARGUID').mean()['choice_accuracy']
go_accs.name = -1
acc_per_SSD = pd.concat([acc_per_SSD, go_accs], 1, sort=True)

p = Symbol('p')
guess_mean = acc_per_SSD.mean()[0.0]
go_mean = acc_per_SSD.mean()[-1]
p_guess_per_SSD = []
for ssd in SSDs:
    curr_mean = acc_per_SSD.mean()[ssd]
    solution = solve(p*guess_mean + (1-p)*go_mean - curr_mean, p)
    assert len(solution) == 1
    p_guess_per_SSD.append(solution[0])

print(p_guess_per_SSD)
SSD0_RTs = abcd_data.query(
    "SSDDur == 0.0 and correct_stop==0.0"
    ).stop_rt_adjusted.values
sample_exgauss = generate_exgauss_sampler_from_fit(SSD0_RTs)

simulator_dict = {
    'vanilla': SimulateData(),
    'guesses': SimulateData(guesses=True),
    'graded_mu_go_log': SimulateData(mu_go_grader='log'),
    'graded_mu_go_linear': SimulateData(mu_go_grader='linear')
}

group_data_dict = {
    'vanilla': pd.DataFrame(),
    'guesses': pd.DataFrame(),
    'graded_mu_go_log': pd.DataFrame(),
    'graded_mu_go_linear': pd.DataFrame(),
}

params = {
    'n_trials_stop': 1000,
    'n_trials_go': 1000,
    'SSDs': SSDs,
    'guess_function': sample_exgauss,
    'p_guess_stop': p_guess_per_SSD,
}

for sim_key in ['guesses']:
    data = simulator_dict[sim_key].simulate(params)
    data['simulation'] = sim_key
#     data.to_csv('%s/individual_%s.csv' % (args.out_dir, sim_key))

[1.00000000000000, 0.841267812256568, 0.604685133548690, 0.450800885199760, 0.291362614587630, 0.205845982758886, 0.147778040621424, 0.0857490996232271, 0.0658960847076383, 0.0370359052900360, 0.0303846371645836, 0.00860486486964472]
yes
[1.00000000000000, 0.841267812256568, 0.604685133548690, 0.450800885199760, 0.291362614587630, 0.205845982758886, 0.147778040621424, 0.0857490996232271, 0.0658960847076383, 0.0370359052900360, 0.0303846371645836, 0.00860486486964472]
[1.00000000000000, 0.841267812256568, 0.604685133548690, 0.450800885199760, 0.291362614587630, 0.205845982758886, 0.147778040621424, 0.0857490996232271, 0.0658960847076383, 0.0370359052900360, 0.0303846371645836, 0.00860486486964472]
{0.0: 1000.0, 50.0: 841.0, 100.0: 605.0, 150.0: 451.0, 200.0: 291.0, 250.0: 206.0, 300.0: 148.0, 350.0: 86.0, 400.0: 66.0, 450.0: 37.0, 500.0: 30.0, 550.0: 9.0}


In [11]:
data

Unnamed: 0,condition,SSD,trial_idx,mu_go,mu_stop,accum_go,accum_stop,process_go,process_stop,block,goRT,stopRT,simulation
0,stop,50.0,0,0.2,0.6,,,[],[],0,,688.173581,guesses
1,stop,50.0,1,0.2,0.6,,,[],[],0,,684.762458,guesses
2,stop,50.0,2,0.2,0.6,,,[],[],0,,299.840332,guesses
3,stop,50.0,3,0.2,0.6,,,[],[],0,,836.079608,guesses
4,stop,50.0,4,0.2,0.6,,,[],[],0,,183.100805,guesses
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11995,go,,995,0.2,0.6,101.745857,0.0,"[0, 0, 0, 0.44133700774542284, 1.8156672518203...",[],0,684.0,,guesses
11996,go,,996,0.2,0.6,100.870597,0.0,"[0, 0.13085407730374804, 3.0805312322340885, 1...",[],0,760.0,,guesses
11997,go,,997,0.2,0.6,100.755438,0.0,"[0.10283707391950415, 0.8494489035770241, 0.80...",[],0,480.0,,guesses
11998,go,,998,0.2,0.6,100.789571,0.0,"[0, 0.2321899597883101, 0.2392811163456252, 1....",[],0,541.0,,guesses


In [8]:
len(p_guess_per_SSD)

13

In [9]:
acc_per_SSD

Unnamed: 0,0.0,50.0,100.0,150.0,200.0,250.0,300.0,350.0,400.0,450.0,500.0,550.0,-1.0
003RTV85,,,,,1.000000,1.000000,0.90,0.8,0.8,0.666667,,,0.936842
00CY2MDM,0.818182,0.833333,0.60,0.666667,0.666667,,,,,,,,0.852113
00HEV6HB,0.312500,0.500000,0.00,1.000000,1.000000,1.000000,1.00,1.0,,,,,0.851211
00LJVZK2,0.428571,0.666667,0.00,1.000000,0.500000,1.000000,0.00,0.5,0.0,1.000000,1.0,,0.682692
00NPMHND,,,,,,,,,1.0,1.000000,1.0,0.5,0.854626
...,...,...,...,...,...,...,...,...,...,...,...,...,...
hkfm1ruj,,0.000000,0.50,,1.000000,1.000000,0.00,0.5,1.0,1.000000,1.0,1.0,0.912409
hx1ru4hv,0.600000,0.666667,0.25,0.250000,0.750000,0.750000,0.00,,,,,,0.117450
jf8w3pw6,,1.000000,1.00,0.714286,0.875000,1.000000,1.00,1.0,,,,,0.945017
x8k59,,0.000000,0.50,0.500000,0.500000,0.833333,0.75,1.0,1.0,,,,0.937716


In [31]:
def get_p_resp_per_SSD(data):
    data = data.copy()
    out_dict = {}
    for ssd in SSDs:
        curr_data = abcd_data.query(
            "SSDDur == %s" % ssd
        ).copy()
        if len(curr_data) == 0:
            out_dict[ssd] = np.nan
        else:
            out_dict[ssd] = len(curr_data.query("correct_stop == 0.0")) / len(curr_data)
    return out_dict


In [32]:
ssd_resp_df = abcd_data.groupby('NARGUID').apply(get_p_resp_per_SSD)

KeyboardInterrupt: 

In [None]:
ssd_resp_df

In [None]:
# df['b'].apply(pd.Series)