In [1]:
import numpy as np
import pandas as pd
from sympy.solvers import solve
from sympy import Symbol
import scipy.stats as sstats

from utils import SimulateData

In [2]:
n_trials = 1000
SSD = 700
simulator = SimulateData()
params = simulator._init_params({})
params['n_trials_stop'] = n_trials
params['n_trials_go'] = n_trials

params['mu_go'] = simulator._log_mu_go(params['mu_go'], SSD)
simulator._set_n_trials(params)
simulator._set_n_guesses(params)  # no guessing is happening

data_dict = simulator._simulate_go_trials(simulator._init_data_dict(),
                                          params)
goRTs = data_dict['RT']
goRTs.sort()

In [None]:
def generate_exgauss_sampler_from_fit(data,
                                      default_sample_size=100000):
    FIT_K, FIT_LOC, FIT_SCALE = sstats.exponnorm.fit(data)
    FIT_LAMBDA = 1/(FIT_K*FIT_SCALE)
    FIT_BETA = 1/FIT_LAMBDA

    def sample_exgauss(sample_size=default_sample_size,
                       beta=FIT_BETA, scale=FIT_SCALE, loc=FIT_LOC):
        exp_out = np.random.exponential(scale=beta, size=sample_size)
        norm_out = np.random.normal(scale=scale, size=sample_size)
        out = (exp_out+norm_out) + loc
        n_negatives = np.sum(out < 0)
        while n_negatives > 0:
            out[out < 0] = sample_exgauss(n_negatives, beta=beta, scale=scale, loc=loc)
            n_negatives = np.sum(out < 0)
        return out

    return sample_exgauss

In [None]:
# GET ABCD INFO
abcd_data = pd.read_csv('abcd_data/minimal_abcd_no_issue_3.csv')

SSDs = abcd_data.SSDDur.unique()
SSDs = [i for i in SSDs if i == i and i <= 550]
SSDs.sort()
acc_per_SSD = pd.DataFrame()
for ssd in SSDs:
    curr_means = abcd_data.query(
        "SSDDur == %s and correct_stop==0.0" % ssd
    ).groupby('NARGUID').mean()['choice_accuracy']
    curr_means.name = ssd
    acc_per_SSD = pd.concat([acc_per_SSD, curr_means], 1, sort=True)

go_accs = abcd_data.query(
        "trial_type == 'GoTrial' and correct_go_response in ['1.0', '0.0']"
    ).groupby('NARGUID').mean()['choice_accuracy']
go_accs.name = -1
acc_per_SSD = pd.concat([acc_per_SSD, go_accs], 1, sort=True)

p = Symbol('p')
guess_mean = acc_per_SSD.mean()[0.0]
go_mean = acc_per_SSD.mean()[-1]
p_guess_per_SSD = []
for ssd in SSDs:
    curr_mean = acc_per_SSD.mean()[ssd]
    solution = solve(p*guess_mean + (1-p)*go_mean - curr_mean, p)
    assert len(solution) == 1
    p_guess_per_SSD.append(solution[0])

print(p_guess_per_SSD)
SSD0_RTs = abcd_data.query(
    "SSDDur == 0.0 and correct_stop==0.0"
    ).stop_rt_adjusted.values
sample_exgauss = generate_exgauss_sampler_from_fit(SSD0_RTs)

simulator_dict = {
    'vanilla': SimulateData(),
    'guesses': SimulateData(guesses=True),
    'graded_mu_go_log': SimulateData(mu_go_grader='log'),
    'graded_mu_go_linear': SimulateData(mu_go_grader='linear')
}

group_data_dict = {
    'vanilla': pd.DataFrame(),
    'guesses': pd.DataFrame(),
    'graded_mu_go_log': pd.DataFrame(),
    'graded_mu_go_linear': pd.DataFrame(),
}

params = {
    'n_trials_stop': 1000,
    'n_trials_go': 1000,
    'SSDs': SSDs,
    'guess_function': sample_exgauss,
    'p_guess_stop': p_guess_per_SSD,
}

for sim_key in ['guesses']:
    data = simulator_dict[sim_key].simulate(params)
    data['simulation'] = sim_key
#     data.to_csv('%s/individual_%s.csv' % (args.out_dir, sim_key))

In [None]:
def get_p_resp_per_SSD(data):
    data = data.copy()
    out_dict = {}
    for ssd in SSDs:
        curr_data = data.query(
            "SSDDur == %s" % ssd
        )
        if len(curr_data) == 0:
            out_dict[ssd] = np.nan
        else:
            out_dict[ssd] = len(curr_data.query("correct_stop == 0.0")) / len(curr_data)
    return out_dict


In [None]:
pd.Series(get_p_resp_per_SSD(abcd_data))

In [None]:
ssd_resp_dict = abcd_data.groupby('NARGUID').apply(get_p_resp_per_SSD)
ssd_resp_df = ssd_resp_dict.apply(pd.Series)

In [None]:
len(abcd_data.NARGUID.unique())

# TESTING SIMULATION

In [None]:
import numpy as np
import pandas as pd
import argparse
from sympy.solvers import solve
from sympy import Symbol
import scipy.stats as sstats

from utils import SimulateData

In [None]:
def get_args():
    parser = argparse.ArgumentParser(description='ABCD data simulations')
    parser.add_argument('--n_trials', default=1500)
    parser.add_argument('--abcd_dir', default='./abcd_data',
                        help='location of ABCD data')
    parser.add_argument('--out_dir', default='./simulated_data',
                        help='location to save simulated data')
    args = parser.parse_args([])
    return(args)


def generate_exgauss_sampler_from_fit(data,
                                      default_sample_size=100000):
    FIT_K, FIT_LOC, FIT_SCALE = sstats.exponnorm.fit(data)
    FIT_LAMBDA = 1/(FIT_K*FIT_SCALE)
    FIT_BETA = 1/FIT_LAMBDA

    def sample_exgauss(sample_size=default_sample_size,
                       beta=FIT_BETA, scale=FIT_SCALE, loc=FIT_LOC):
        exp_out = np.random.exponential(scale=beta, size=sample_size)
        norm_out = np.random.normal(scale=scale, size=sample_size)
        return (exp_out+norm_out) + loc

    return sample_exgauss

In [None]:
print('getting args')
args = get_args()
print('analyzing ABCD info')
# GET ABCD INFO
abcd_data = pd.read_csv('%s/minimal_abcd_no_issue_3.csv' % args.abcd_dir)

SSDs = abcd_data.SSDDur.unique()
SSDs = [i for i in SSDs if i == i]
SSDs.sort()
acc_per_SSD = pd.DataFrame()
for ssd in SSDs:
    curr_means = abcd_data.query(
        "SSDDur == %s and correct_stop==0.0" % ssd
    ).groupby('NARGUID').mean()['choice_accuracy']
    curr_means.name = ssd
    acc_per_SSD = pd.concat([acc_per_SSD, curr_means], 1, sort=True)

go_accs = abcd_data.query(
        "trial_type == 'GoTrial' and correct_go_response in ['1.0', '0.0']"
    ).groupby('NARGUID').mean()['choice_accuracy']
go_accs.name = -1
acc_per_SSD = pd.concat([acc_per_SSD, go_accs], 1, sort=True)

p = Symbol('p')
guess_mean = acc_per_SSD.mean()[0.0]
go_mean = acc_per_SSD.mean()[-1]
p_guess_per_SSD = []
for ssd in SSDs:
    curr_mean = acc_per_SSD.mean()[ssd]
    solution = solve(p*guess_mean + (1-p)*go_mean - curr_mean, p)
    assert len(solution) == 1
    p_guess_per_SSD.append(solution[0])
print(p_guess_per_SSD)

SSD0_RTs = abcd_data.query(
    "SSDDur == 0.0 and correct_stop==0.0"
    ).stop_rt_adjusted.values
sample_exgauss = generate_exgauss_sampler_from_fit(SSD0_RTs)

simulator_dict = {
    'vanilla': SimulateData(),
    'guesses': SimulateData(guesses=True),
    'graded_mu_go_log': SimulateData(mu_go_grader='log'),
    'graded_mu_go_linear': SimulateData(mu_go_grader='linear')
}

group_data_dict = {
    'vanilla': pd.DataFrame(),
    'guesses': pd.DataFrame(),
    'graded_mu_go_log': pd.DataFrame(),
    'graded_mu_go_linear': pd.DataFrame(),
}

params = {
    'n_trials_stop': 10,
    'n_trials_go': 10,
    'SSDs': [550, 600, 650, 700, 750],
    'guess_function': sample_exgauss,
    'p_guess_stop': p_guess_per_SSD,
}

for sim_key in ['graded_mu_go_log']:
    print(sim_key)
    data = simulator_dict[sim_key].simulate(params)
    data['simulation'] = sim_key
    print('saving...')
#         data.to_csv('%s/individual_%s.csv' % (args.out_dir, sim_key))

In [None]:
data

In [None]:
SSDs

In [None]:
data