In [1]:
import numpy as np
import pandas as pd
from sympy.solvers import solve
from sympy import Symbol
import scipy.stats as sstats

from utils import SimulateData

# __Preprocessing__

## 1. Drop Issue 3

In [16]:
abcd_data_w_issue_3 = pd.read_csv('abcd_data/minimal_abcd.csv', index_col=0)

  mask |= (ar1 == a)


In [22]:
issue_3_people = abcd_data_w_issue_3.loc[(abcd_data_w_issue_3['stop_rt_adjusted'] < 50) & (abcd_data_w_issue_3['stop_rt_adjusted'] > 0) & (abcd_data_w_issue_3['SSDDur'] ==50), 'NARGUID'].unique()

print('n affected:', len(issue_3_people))

197

In [25]:
print('p affect:', len(issue_3_people)/ 7231 )

0.027243811367722307

In [32]:
abcd_data = abcd_data_w_issue_3[~abcd_data_w_issue_3.NARGUID.isin(issue_3_people)].copy()
print('n remaining:', abcd_data.NARGUID.nunique())

In [33]:
abcd_data.to_csv('abcd_data/minimal_abcd_no_issue_3.csv', index=False)

## 2. Generate choice accuracy column

In [93]:
abcd_data = pd.read_csv('abcd_data/minimal_abcd_no_issue_3.csv')
resp_replace = {'2.0': 2.0,
                '1.0': 1.0,
                '3.0': 3.0,
                '4.0': 4.0,
                '{LEFTARROW}': 1.0,
                '{RIGHTARROW}': 2.0}

In [94]:
# build finger press column
abcd_data['finger_press'] = np.NaN

# stop responses
for col in ['SSD.RESP', 'StopSignal.RESP']:
    abcd_data[col] = abcd_data[col].replace(resp_replace).astype(float)
    abcd_data['finger_press'] = abcd_data['finger_press'].combine_first(abcd_data[col])
    
assert abcd_data.loc[(abcd_data['trial_type'] == 'StopTrial') & (abcd_data['correct_stop'] == 0)]['finger_press'].notnull().any()

# go trial responses
abcd_data['Fix.RESP'] = abcd_data['Fix.RESP'].replace(resp_replace).astype(float)
for col in ['Go.RESP', 'Fix.RESP']:
    abcd_data['finger_press'] = abcd_data['finger_press'].combine_first(abcd_data[col])
    
# remap
finger_remap = {1.0: 4.0, 2.0: 3.0}
abcd_data['finger_press'].replace(finger_remap, inplace=True)

In [95]:
# choice column
abcd_data['choice_accuracy'] = np.where(
    abcd_data['finger_press'].notnull(),
    np.where(
        abcd_data['finger_press']==abcd_data['correct_stimulus_mapping_1'],
        1,
        0),
    np.nan
)

In [96]:
abcd_data

Unnamed: 0,NARGUID,go_rt_adjusted,stop_rt_adjusted,trial_type,SSDDur,correct_go_response,correct_stop,SSD.RESP,Fix.RESP,StopSignal.RESP,Go.RESP,TrialNum,correct_stimulus_mapping_1,correct_stimulus_mapping_2,finger_press,choice_accuracy
0,00CY2MDM,1207.0,,GoTrial,,1.0,,,4.0,,,1,4.0,1.0,4.0,1.0
1,00CY2MDM,0.0,,GoTrial,,omission,,,,,,2,3.0,2.0,,
2,00CY2MDM,,0.0,StopTrial,50.0,,1.0,,,,,3,3.0,2.0,,
3,00CY2MDM,342.0,,GoTrial,,1.0,,,,,3.0,4,3.0,2.0,3.0,1.0
4,00CY2MDM,275.0,,GoTrial,,1.0,,,,,4.0,5,4.0,1.0,4.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2532235,ZVGAMFG7,965.0,,GoTrial,,1.0,,,,,1.0,356,4.0,1.0,4.0,1.0
2532236,ZVGAMFG7,961.0,,GoTrial,,1.0,,,,,2.0,357,3.0,2.0,3.0,1.0
2532237,ZVGAMFG7,1086.0,,GoTrial,,1.0,,,1.0,,,358,4.0,1.0,4.0,1.0
2532238,ZVGAMFG7,,916.0,StopTrial,500.0,,0.0,,1.0,,,359,4.0,1.0,4.0,1.0


In [97]:
abcd_data.to_csv('abcd_data/minimal_abcd_no_issue_3.csv', index=False)

# __Metrics for Simulation__

## 1. get SSD distributions per subject

In [34]:
abcd_data = pd.read_csv('abcd_data/minimal_abcd_no_issue_3.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [35]:
SSD_dist = abcd_data.groupby('NARGUID')['SSDDur'].value_counts(normalize=True)
SSD_dist.name = 'proportion'
SSD_dist = SSD_dist.reset_index()

In [37]:
SSD_dist.to_csv('abcd_data/SSD_dist_by_subj.csv', index=False)

## 2. P(guess|SSD) for mixture distributions

In [98]:
abcd_data = pd.read_csv('abcd_data/minimal_abcd_no_issue_3.csv')
SSDs = abcd_data.SSDDur.unique()
SSDs = [i for i in SSDs if i == i]
SSDs.sort()

In [118]:
acc_per_SSD = pd.DataFrame()
for ssd in SSDs:
    curr_means = abcd_data.query(
        "SSDDur == %s and correct_stop==0.0" % ssd
    ).groupby('NARGUID').mean()['choice_accuracy']
    curr_means.name = ssd
    acc_per_SSD = pd.concat([acc_per_SSD, curr_means], 1, sort=True)

go_accs = abcd_data.query(
        "trial_type == 'GoTrial' and correct_go_response in ['1.0', '0.0', 'omission']"
    ).groupby('NARGUID').mean()['choice_accuracy']
go_accs.name = -1
acc_per_SSD = pd.concat([acc_per_SSD, go_accs], 1, sort=True)

In [122]:
p = Symbol('p')
guess_mean = acc_per_SSD.mean()[0.0]
go_mean = acc_per_SSD.mean()[-1]
p_guess_per_SSD = {}
for ssd in SSDs:
    curr_mean = acc_per_SSD.mean()[ssd]
    solution = solve(p*guess_mean + (1-p)*go_mean - curr_mean, p)
    assert len(solution) == 1
    p_guess_per_SSD[ssd] = solution[0]
p_guess_df = pd.DataFrame(p_guess_per_SSD, index=['p_guess'])
p_guess_df.to_csv('abcd_data/p_guess_per_ssd.csv')

In [127]:
p_guess_df.columns

Float64Index([  0.0,  50.0, 100.0, 150.0, 200.0, 250.0, 300.0, 350.0, 400.0,
              450.0, 500.0, 550.0, 600.0, 650.0, 700.0, 750.0, 800.0, 850.0,
              900.0],
             dtype='float64')

In [121]:
abcd_data.query(
        "trial_type == 'GoTrial' and correct_go_response in ['1.0', '0.0', 'omission']"
    )

Unnamed: 0,NARGUID,go_rt_adjusted,stop_rt_adjusted,trial_type,SSDDur,correct_go_response,correct_stop,SSD.RESP,Fix.RESP,StopSignal.RESP,Go.RESP,TrialNum,correct_stimulus_mapping_1,correct_stimulus_mapping_2,finger_press,choice_accuracy
0,00CY2MDM,1207.0,,GoTrial,,1.0,,,4.0,,,1,4.0,1.0,4.0,1.0
1,00CY2MDM,0.0,,GoTrial,,omission,,,,,,2,3.0,2.0,,
3,00CY2MDM,342.0,,GoTrial,,1.0,,,,,3.0,4,3.0,2.0,3.0,1.0
4,00CY2MDM,275.0,,GoTrial,,1.0,,,,,4.0,5,4.0,1.0,4.0,1.0
6,00CY2MDM,326.0,,GoTrial,,0.0,,,,,3.0,7,4.0,1.0,3.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2532234,ZVGAMFG7,0.0,,GoTrial,,omission,,,,,,355,3.0,2.0,,
2532235,ZVGAMFG7,965.0,,GoTrial,,1.0,,,,,1.0,356,4.0,1.0,4.0,1.0
2532236,ZVGAMFG7,961.0,,GoTrial,,1.0,,,,,2.0,357,3.0,2.0,3.0,1.0
2532237,ZVGAMFG7,1086.0,,GoTrial,,1.0,,,1.0,,,358,4.0,1.0,4.0,1.0


## 3. Inhibition function (p(respond|SSD))

In [123]:
def get_p_resp_per_SSD(data, SSDs):
    data = data.copy()
    out_dict = {}
    for ssd in SSDs:
        curr_data = data.query(
            "SSDDur == %s" % ssd
        )
        if len(curr_data) == 0:
            out_dict[ssd] = np.nan
        else:
            out_dict[ssd] = len(curr_data.query("correct_stop == 0.0")) / len(curr_data)
    return out_dict

In [124]:
abcd_data = pd.read_csv('abcd_data/minimal_abcd_no_issue_3.csv')
SSDs = [i for i in abcd_data.SSDDur.unique() if i==i]
ssd_resp_dict = abcd_data.groupby('NARGUID').apply(get_p_resp_per_SSD, SSDs)
ssd_resp_df = ssd_resp_dict.apply(pd.Series)

In [125]:
abcd_inhib_func = pd.DataFrame(ssd_resp_df.mean())
abcd_inhib_func.index.name = 'SSD'
abcd_inhib_func.columns = ['p_respond']
abcd_inhib_func = abcd_inhib_func.reset_index()
abcd_inhib_func['underlying distribution'] = 'ABCD data'

In [126]:
abcd_inhib_func.to_csv('abcd_data/abcd_inhib_func.csv', index=False)