In [1]:
import numpy as np
import pandas as pd
from sympy.solvers import solve
from sympy import Symbol
import scipy.stats as sstats

from utils import SimulateData

In [70]:
def generate_exgauss_sampler_from_fit(data,
                                      default_sample_size=100000):
    FIT_K, FIT_LOC, FIT_SCALE = sstats.exponnorm.fit(data)
    FIT_LAMBDA = 1/(FIT_K*FIT_SCALE)
    FIT_BETA = 1/FIT_LAMBDA

    def sample_exgauss(sample_size=default_sample_size,
                       beta=FIT_BETA, scale=FIT_SCALE, loc=FIT_LOC):
        exp_out = np.random.exponential(scale=beta, size=sample_size)
        norm_out = np.random.normal(scale=scale, size=sample_size)
        out = (exp_out+norm_out) + loc
        n_negatives = np.sum(out < 0)
        while n_negatives > 0:
            out[out < 0] = sample_exgauss(n_negatives, beta=beta, scale=scale, loc=loc)
            n_negatives = np.sum(out < 0)
        return out

    return sample_exgauss

In [71]:
sampler = generate_exgauss_sampler_from_fit(SSD0_RTs)

In [72]:
samples = sampler(1000000)

In [73]:
samples[samples < 0]

array([], dtype=float64)

In [57]:
samples[samples == 0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [56]:
len(samples)

1000000

In [10]:
# GET ABCD INFO
abcd_data = pd.read_csv('abcd_data/minimal_abcd_no_issue_3.csv')

SSDs = abcd_data.SSDDur.unique()
SSDs = [i for i in SSDs if i == i and i <= 550]
SSDs.sort()
acc_per_SSD = pd.DataFrame()
for ssd in SSDs:
    curr_means = abcd_data.query(
        "SSDDur == %s and correct_stop==0.0" % ssd
    ).groupby('NARGUID').mean()['choice_accuracy']
    curr_means.name = ssd
    acc_per_SSD = pd.concat([acc_per_SSD, curr_means], 1, sort=True)

go_accs = abcd_data.query(
        "trial_type == 'GoTrial' and correct_go_response in ['1.0', '0.0']"
    ).groupby('NARGUID').mean()['choice_accuracy']
go_accs.name = -1
acc_per_SSD = pd.concat([acc_per_SSD, go_accs], 1, sort=True)

p = Symbol('p')
guess_mean = acc_per_SSD.mean()[0.0]
go_mean = acc_per_SSD.mean()[-1]
p_guess_per_SSD = []
for ssd in SSDs:
    curr_mean = acc_per_SSD.mean()[ssd]
    solution = solve(p*guess_mean + (1-p)*go_mean - curr_mean, p)
    assert len(solution) == 1
    p_guess_per_SSD.append(solution[0])

print(p_guess_per_SSD)
SSD0_RTs = abcd_data.query(
    "SSDDur == 0.0 and correct_stop==0.0"
    ).stop_rt_adjusted.values
sample_exgauss = generate_exgauss_sampler_from_fit(SSD0_RTs)

simulator_dict = {
    'vanilla': SimulateData(),
    'guesses': SimulateData(guesses=True),
    'graded_mu_go_log': SimulateData(mu_go_grader='log'),
    'graded_mu_go_linear': SimulateData(mu_go_grader='linear')
}

group_data_dict = {
    'vanilla': pd.DataFrame(),
    'guesses': pd.DataFrame(),
    'graded_mu_go_log': pd.DataFrame(),
    'graded_mu_go_linear': pd.DataFrame(),
}

params = {
    'n_trials_stop': 1000,
    'n_trials_go': 1000,
    'SSDs': SSDs,
    'guess_function': sample_exgauss,
    'p_guess_stop': p_guess_per_SSD,
}

for sim_key in ['guesses']:
    data = simulator_dict[sim_key].simulate(params)
    data['simulation'] = sim_key
#     data.to_csv('%s/individual_%s.csv' % (args.out_dir, sim_key))

[1.00000000000000, 0.841267812256568, 0.604685133548690, 0.450800885199760, 0.291362614587630, 0.205845982758886, 0.147778040621424, 0.0857490996232271, 0.0658960847076383, 0.0370359052900360, 0.0303846371645836, 0.00860486486964472]
yes
[1.00000000000000, 0.841267812256568, 0.604685133548690, 0.450800885199760, 0.291362614587630, 0.205845982758886, 0.147778040621424, 0.0857490996232271, 0.0658960847076383, 0.0370359052900360, 0.0303846371645836, 0.00860486486964472]
[1.00000000000000, 0.841267812256568, 0.604685133548690, 0.450800885199760, 0.291362614587630, 0.205845982758886, 0.147778040621424, 0.0857490996232271, 0.0658960847076383, 0.0370359052900360, 0.0303846371645836, 0.00860486486964472]
{0.0: 1000.0, 50.0: 841.0, 100.0: 605.0, 150.0: 451.0, 200.0: 291.0, 250.0: 206.0, 300.0: 148.0, 350.0: 86.0, 400.0: 66.0, 450.0: 37.0, 500.0: 30.0, 550.0: 9.0}


In [82]:
def get_p_resp_per_SSD(data):
    data = data.copy()
    out_dict = {}
    for ssd in SSDs:
        curr_data = data.query(
            "SSDDur == %s" % ssd
        )
        if len(curr_data) == 0:
            out_dict[ssd] = np.nan
        else:
            out_dict[ssd] = len(curr_data.query("correct_stop == 0.0")) / len(curr_data)
    return out_dict


In [93]:
pd.Series(get_p_resp_per_SSD(abcd_data))

0.0      0.513487
50.0     0.393573
100.0    0.374880
150.0    0.398952
200.0    0.432870
250.0    0.487436
300.0    0.518291
350.0    0.560510
400.0    0.574502
450.0    0.604146
500.0    0.593429
550.0    0.599864
dtype: float64

In [84]:
ssd_resp_dict = abcd_data.groupby('NARGUID').apply(get_p_resp_per_SSD)
ssd_resp_df = ssd_resp_dict.apply(pd.Series)

In [95]:
pd.DataFrame(ssd_resp_df.mean())

Unnamed: 0,0
0.0,0.344903
50.0,0.210295
100.0,0.240136
150.0,0.306626
200.0,0.383921
250.0,0.474563
300.0,0.53908
350.0,0.601356
400.0,0.625307
450.0,0.659756


In [96]:
len(abcd_data.NARGUID.unique())

6793

# TESTING GUESS SIMULATION

In [104]:
import numpy as np
import pandas as pd
import argparse
from sympy.solvers import solve
from sympy import Symbol
import scipy.stats as sstats

from utils import SimulateData

In [105]:
def get_args():
    parser = argparse.ArgumentParser(description='ABCD data simulations')
    parser.add_argument('--n_trials', default=1500)
    parser.add_argument('--abcd_dir', default='./abcd_data',
                        help='location of ABCD data')
    parser.add_argument('--out_dir', default='./simulated_data',
                        help='location to save simulated data')
    args = parser.parse_args([])
    return(args)


def generate_exgauss_sampler_from_fit(data,
                                      default_sample_size=100000):
    FIT_K, FIT_LOC, FIT_SCALE = sstats.exponnorm.fit(data)
    FIT_LAMBDA = 1/(FIT_K*FIT_SCALE)
    FIT_BETA = 1/FIT_LAMBDA

    def sample_exgauss(sample_size=default_sample_size,
                       beta=FIT_BETA, scale=FIT_SCALE, loc=FIT_LOC):
        exp_out = np.random.exponential(scale=beta, size=sample_size)
        norm_out = np.random.normal(scale=scale, size=sample_size)
        return (exp_out+norm_out) + loc

    return sample_exgauss

In [108]:
print('getting args')
args = get_args()
print('analyzing ABCD info')
# GET ABCD INFO
abcd_data = pd.read_csv('%s/minimal_abcd_no_issue_3.csv' % args.abcd_dir)

SSDs = abcd_data.SSDDur.unique()
SSDs = [i for i in SSDs if i == i and i <= 550]
SSDs.sort()
acc_per_SSD = pd.DataFrame()
for ssd in SSDs:
    curr_means = abcd_data.query(
        "SSDDur == %s and correct_stop==0.0" % ssd
    ).groupby('NARGUID').mean()['choice_accuracy']
    curr_means.name = ssd
    acc_per_SSD = pd.concat([acc_per_SSD, curr_means], 1, sort=True)

go_accs = abcd_data.query(
        "trial_type == 'GoTrial' and correct_go_response in ['1.0', '0.0']"
    ).groupby('NARGUID').mean()['choice_accuracy']
go_accs.name = -1
acc_per_SSD = pd.concat([acc_per_SSD, go_accs], 1, sort=True)

p = Symbol('p')
guess_mean = acc_per_SSD.mean()[0.0]
go_mean = acc_per_SSD.mean()[-1]
p_guess_per_SSD = []
for ssd in SSDs:
    curr_mean = acc_per_SSD.mean()[ssd]
    solution = solve(p*guess_mean + (1-p)*go_mean - curr_mean, p)
    assert len(solution) == 1
    p_guess_per_SSD.append(solution[0])
print(p_guess_per_SSD)

SSD0_RTs = abcd_data.query(
    "SSDDur == 0.0 and correct_stop==0.0"
    ).stop_rt_adjusted.values
sample_exgauss = generate_exgauss_sampler_from_fit(SSD0_RTs)

simulator_dict = {
    'vanilla': SimulateData(),
    'guesses': SimulateData(guesses=True),
    'graded_mu_go_log': SimulateData(mu_go_grader='log'),
    'graded_mu_go_linear': SimulateData(mu_go_grader='linear')
}

group_data_dict = {
    'vanilla': pd.DataFrame(),
    'guesses': pd.DataFrame(),
    'graded_mu_go_log': pd.DataFrame(),
    'graded_mu_go_linear': pd.DataFrame(),
}

params = {
    'n_trials_stop': args.n_trials,
    'n_trials_go': args.n_trials,
    'SSDs': SSDs,
    'guess_function': sample_exgauss,
    'p_guess_stop': p_guess_per_SSD,
}

for sim_key in ['guesses']:
    print(sim_key)
    data = simulator_dict[sim_key].simulate(params)
    data['simulation'] = sim_key
    print('saving...')
#         data.to_csv('%s/individual_%s.csv' % (args.out_dir, sim_key))

getting args
analyzing ABCD info
[1.00000000000000, 0.841267812256568, 0.604685133548690, 0.450800885199760, 0.291362614587630, 0.205845982758886, 0.147778040621424, 0.0857490996232271, 0.0658960847076383, 0.0370359052900360, 0.0303846371645836, 0.00860486486964472]
guesses
yes
[1.00000000000000, 0.841267812256568, 0.604685133548690, 0.450800885199760, 0.291362614587630, 0.205845982758886, 0.147778040621424, 0.0857490996232271, 0.0658960847076383, 0.0370359052900360, 0.0303846371645836, 0.00860486486964472]
[1.00000000000000, 0.841267812256568, 0.604685133548690, 0.450800885199760, 0.291362614587630, 0.205845982758886, 0.147778040621424, 0.0857490996232271, 0.0658960847076383, 0.0370359052900360, 0.0303846371645836, 0.00860486486964472]
{0.0: 1500.0, 50.0: 1262.0, 100.0: 907.0, 150.0: 676.0, 200.0: 437.0, 250.0: 309.0, 300.0: 222.0, 350.0: 129.0, 400.0: 99.0, 450.0: 56.0, 500.0: 46.0, 550.0: 13.0}
saving...


In [111]:
data.query("SSD==50.0")

Unnamed: 0,condition,SSD,trial_idx,mu_go,mu_stop,accum_go,accum_stop,process_go,process_stop,block,goRT,stopRT,simulation
0,stop,50.0,0,0.2,0.6,,,[],[],0,,202.202577,guesses
1,stop,50.0,1,0.2,0.6,,,[],[],0,,275.200977,guesses
2,stop,50.0,2,0.2,0.6,,,[],[],0,,456.770620,guesses
3,stop,50.0,3,0.2,0.6,,,[],[],0,,253.327825,guesses
4,stop,50.0,4,0.2,0.6,,,[],[],0,,521.525767,guesses
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,stop,50.0,1495,0.2,0.6,10.679133,100.273711,"[0.5673035899106609, 0, 0.30826128569107714, 0...","[0, 0.750977158291771, 0.8981248892668345, 0, ...",0,,,guesses
1496,stop,50.0,1496,0.2,0.6,24.882351,101.599535,"[0.9843643655053149, 0.33593119173464125, 0.89...","[0, 0, 0, 3.012509470790272, 2.677342463771506...",0,,,guesses
1497,stop,50.0,1497,0.2,0.6,61.536959,104.759231,"[0.3401571146813035, 1.289355657176547, 2.2310...","[0, 0, 0, 0.5157108511110892, 1.22796319720649...",0,,,guesses
1498,stop,50.0,1498,0.2,0.6,101.554378,96.640610,"[0.09304239665300443, 0.6200415867752247, 0.23...","[0.7805308822739458, 0, 0, 0, 2.18724898538441...",0,,360.000000,guesses


In [110]:
SSDs

[0.0,
 50.0,
 100.0,
 150.0,
 200.0,
 250.0,
 300.0,
 350.0,
 400.0,
 450.0,
 500.0,
 550.0]