In [1]:
%matplotlib inline

# Assuming we are in the notebooks directory, we need to move one up:
%cd ../..

/home/olivia/Code/neural_link_SV_iDE


In [2]:
import numpy as np
import pandas as pd
import os
import seaborn as sns

import matplotlib.pyplot as plt


# Replace with directory you have downloaded NARPS data from:
DATA_DIR = '/home/olivia/Data/NARPS/event_tsvs/'


sns.set(style="ticks", palette="muted", color_codes=True, font_scale=1.8)

files = [file for file in os.listdir(DATA_DIR) if 'sub' in file]

meta_df = pd.DataFrame({'Filename': files})
meta_df.sort_values(by='Filename', inplace=True)
meta_df.reset_index(inplace=True, drop=True)
meta_df['Participant'] = 0
meta_df['Run'] = 0
meta_df['Trials Designated NoResp'] = 0
meta_df['Trials with RT == 0'] = 0
meta_df['Behaviour in File'] = True

def squash_response(row):
    if 'accept' in row:
        return 1
    elif 'reject' in row:
        return 0
    else:
        return row

def check_RT_response_data(row):
    try:
        if row['participant_response'] == 'NoResp':
            return False
    except KeyError:
        return False
    if row['RT'] == 0:
        return False
    else:
        return True

FileNotFoundError: [Errno 2] No such file or directory: '/home/olivia/Data/NARPS/event_tsvs/'

In [None]:
# These two lines are just here because we're in a notebook, so delete in .py script.
ps_df = 0
del ps_df

prev_p = 0
run_check = 0

# For each filename...
for index, row in meta_df.iterrows():
    file = row['Filename']
        
    # check the file conforms to what we think it should
    # sub-001_task-MGT_run-01_events.tsv
    # there are 4 runs per "sub" and the NARPS site says:
    # "119 healthy participants completed the experiment (n=60 from the equal indifference group and n=59 from the equal range group). Nine participants were excluded prior to fMRI analysis based on pre-registered exclusion criteria: Five did not show a significant effect of both gains and loses on their choices (Bayesian logistic regression, p < 0.05; reflecting a lack of understanding of the task) and four missed over 10% of trials (in one or more runs). Data of two additional participants is currently under QA. Thus, at least 108 participants will be included in the final dataset sent to the analysis teams (n=54 from the equal indifference group and n=54 from the equal range group)."
    
    # Check that the filenames have a certain structure:
    file_list = file.split('_')
    assert file_list[0].split('-')[0] == 'sub'
    p = int(file_list[0].split('-')[1])
    assert p > 0
    assert p <= 128
    
    assert file_list[2].split('-')[0] == 'run'
    run = int(file_list[2].split('-')[1])
    assert run > 0
    assert run <= 4
    
    # check if all participants have all 4 runs, and print who doesn't
    if prev_p != p:
        if run_check and run_check != 4:
            print(prev_p, run_check)
        run_check = 0
    prev_p = p
    run_check+=1

    # Open the file:
    p_run_df = pd.read_csv(DATA_DIR + file, delimiter='\t')

    # Create required columns for participant ID, run, and trail number:
    p_run_df['ID'] = p  # just a number 
    p_run_df['participant_id'] = file_list[0]  # the IDs the original data used
    p_run_df['run'] = run
    
    p_run_df.reset_index(inplace=True)
    p_run_df.rename(index=str, columns={"index": "trial"}, inplace=True)
    p_run_df['trial'] += 1
    
    # Risk: sqrt(gain*gain*prob_gain*(1 - prob_gain) + loss*loss*prob_loss*(1 - prob_loss))
    # based on Canessa et al 2013 https://doi.org/10.1523/JNEUROSCI.0497-13.2013
    p_run_df['risk'] =  np.sqrt((p_run_df['gain']**2 + p_run_df['loss']**2)) * 0.5
        
    p_run_df['use'] = p_run_df.apply(check_RT_response_data, axis=1)

    meta_df.loc[index, 'Participant'] = p
    meta_df.loc[index, 'Run'] = run


    # Find the files with RT set to 0 and note that in meta_df:
    if (p_run_df['RT'] == 0).any():
        meta_df.loc[index, 'Trials with RT == 0'] = (p_run_df['RT'] == 0).value_counts()[True]
        if 'participant_response' in p_run_df.columns:
            meta_df.loc[index, 'Trials Designated NoResp'] = (p_run_df['participant_response'] == 'NoResp').value_counts()[True]
        else:
            meta_df.loc[index, 'Behaviour in File'] = False    
    
    # Create accept column for participant:        
    try:
        p_run_df['accept'] = p_run_df['participant_response'].apply(squash_response)
    except KeyError:
        p_run_df['accept'] = 'NoResp'

    # Create big dataframe for all participants:
    try:
        ps_df = pd.concat([ps_df, p_run_df], sort=False)
    except NameError:
        ps_df = p_run_df
    
    ps_df

participants_df = pd.read_csv(DATA_DIR + 'participants.tsv', delimiter='\t')
ps_df = ps_df.set_index('participant_id').join(participants_df.set_index('participant_id'))

In [None]:
meta_df.head(10)

In [None]:
meta_df[meta_df['Participant'] == 48]

In [None]:
meta_df[meta_df['Behaviour in File'] == False]

In [None]:
ps_df[ps_df['ID'] == 48]

In [None]:
len(ps_df['ID'].unique())

In [None]:
ps_df.describe()

In [None]:
# remove the trials where something went wrong, RT == 0 or no data
clean_ps_df = ps_df[ps_df['use']]

In [None]:
clean_ps_df.describe()

In [None]:
clean_ps_df

In [None]:
clean_ps_df.groupby('ID')[['gain', 'loss']].describe()
# use this to assign to each participant what condition they are in

In [None]:
equalIndif_df = clean_ps_df[clean_ps_df['group'] == 'equalIndifference']
equalRange_df = clean_ps_df[clean_ps_df['group'] == 'equalRange']

In [None]:

# sns.set()
fig, axs = plt.subplots(1, 2, sharey=True, figsize=(16,4))#, tight_layout=True)
variable = 'gain'
counts = np.bincount(equalIndif_df[variable])
# print(counts)


# [print(i) for i in zip(range(len(counts)), counts)]
ticks = []
axs[0].bar(range(len(counts)), counts, width=1, align='center')
axs[0].xaxis.set_minor_locator(plt.MultipleLocator(1))
axs[0].xaxis.set_major_locator(plt.MultipleLocator(2))
# axs[0].tick_params(axis='x', which='major', labelsize=14)
axs[0].set_xlim([9,41])
axs[0].set_title('Equal Indifference')

counts = np.bincount(equalRange_df[variable])
# print(counts)
axs[1].bar(range(len(counts)), counts, width=1, align='center')
axs[1].xaxis.set_minor_locator(plt.MultipleLocator(1))
axs[1].xaxis.set_major_locator(plt.MultipleLocator(2))
axs[1].set_xlim([4.2,20.8])
axs[1].set_title('Equal Range')
title = fig.suptitle(variable.capitalize())

# shift subplots down:
title.set_y(0.95)
fig.subplots_adjust(top=0.75)
sns.despine(right=True)

# plt.show()

In [None]:
fig, axs = plt.subplots(1, 2, sharey=True, figsize=(16,4))#, tight_layout=True)
variable = 'loss'
counts = np.bincount(equalIndif_df[variable])
# print(counts)


# [print(i) for i in zip(range(len(counts)), counts)]
ticks = []
axs[0].bar(range(len(counts)), counts, width=1, align='center')
axs[0].xaxis.set_minor_locator(plt.MultipleLocator(1))
axs[0].xaxis.set_major_locator(plt.MultipleLocator(2))
# axs[0].tick_params(axis='x', which='major', labelsize=14)
axs[0].set_xlim([4.2,20.8])
axs[0].set_title('Equal Indifference')

counts = np.bincount(equalRange_df[variable])
# print(counts)
axs[1].bar(range(len(counts)), counts, width=1, align='center')
axs[1].xaxis.set_minor_locator(plt.MultipleLocator(1))
axs[1].xaxis.set_major_locator(plt.MultipleLocator(2))
axs[1].set_xlim([4.2,20.8])
axs[1].set_title('Equal Range')
title = fig.suptitle(variable.capitalize())

# shift subplots down:
title.set_y(0.95)
fig.subplots_adjust(top=0.75)
sns.despine(right=True)

plt.show()

In [None]:
sns.set(font_scale=1.2)

f, axes = plt.subplots(2, 1, figsize=(7, 7), sharex=True)
sns.despine(right=True)


sns.distplot(equalRange_df['risk'], ax=axes[0], norm_hist = False , kde=False)
sns.distplot(equalIndif_df['risk'], ax=axes[1], norm_hist = False, kde=False)
axes[0].set_title('Equal Range')
axes[1].set_title('Equal Indifference')
plt.show()

In [None]:
sns.set(font_scale=1.8)

f, axes = plt.subplots(2, 1, figsize=(7, 7), sharex=True)
sns.despine(right=True)


sns.distplot(equalRange_df['RT'], ax=axes[0])#, kde=False)
sns.distplot(equalIndif_df['RT'], ax=axes[1])#, kde=False)
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20,10))
sns.despine(right=True)

sns.violinplot(x="accept", y="RT", data=clean_ps_df, hue='group',
               split=True, inner="quart",
               palette={"equalRange": "y", "equalIndifference": "b"}, ax=ax)
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20,10))
sns.despine(right=True)

sns.violinplot(x="participant_response", y="RT", data=clean_ps_df, hue='group',
               split=True, inner="quart",
               palette={"equalRange": "y", "equalIndifference": "b"}, ax=ax)
plt.show()

In [None]:
sns.set(font_scale=1.8)
sns.set_style('ticks')
sns.countplot(x='accept', data=clean_ps_df)
plt.show()

In [None]:
sns.set(font_scale=1.8)
sns.set_style('ticks')
sns.countplot(x='accept', data=equalRange_df)
plt.show()

In [None]:
sns.set(font_scale=1.8)
sns.set_style('ticks')
sns.countplot(x='accept', data=equalIndif_df)
plt.show()

In [None]:
sns.set(font_scale=1)
sns.set_style('ticks')
sns.countplot(x='participant_response', data=equalRange_df)
plt.show()

In [None]:
sns.set(font_scale=1)
sns.set_style('ticks')
sns.countplot(x='participant_response', data=equalIndif_df)
plt.show()

In [None]:
clean_ps_df.to_csv('./data/participants.csv', index=False)
meta_df.to_csv('./data/file_details.csv', index=False)

In [None]:
participants_df

In [None]:
ps_df.head()

In [None]:
subject_13 = ps_df[ps_df['ID'] == 13]

In [None]:
subject_13['accept'].describe()

In [None]:
subject_13

In [None]:
group_assignments = ps_df.drop_duplicates(subset='ID')[['group', 'ID']]

In [None]:
def check_group(row):
    one_group = []
    other_group = []
    if row['ID'] % 2:
        if row['group'] == 'equalIndifference':
            return True
    else:
        if row['group'] == 'equalRange':
            return True
    return False

group_assignments['consistent'] = group_assignments.apply(check_group, axis=1)
    

In [None]:
group_assignments.iloc[30:50]

In [None]:
group_assigments[group_assignments['consistent'] == False]

In [None]:
group_assignments[['consistent']] == False

In [None]:
subject_87 = ps_df[ps_df['ID'] == 87]

In [None]:
subject_87