# Imports

In [1]:
import pandas as pd; import seaborn as sb; import warnings; import scipy; import re; 
import os; from analysis_helpers import *; import itertools; from scipy import stats
import random; import pandas as pd; import numpy as np; from sklearn import datasets, linear_model; 
from sklearn.linear_model import LinearRegression; import statsmodels.api as sm
from scipy import stats; from itertools import groupby; from operator import itemgetter
warnings.filterwarnings('ignore')
%matplotlib inline

# Check A:  behavioral data files

At the beginning of output chunk, a statement indicates the number of participants and the unique number of presentation and memory behavioral file counts (for example, if all participants have 8 presentation and 8 memory runs, then this value will be '8'; if some participants have 8 files, and some have 7 -- missing a file -- then the numbers '7' and '8' will display; etc).

In [2]:
# for each experiment's data directory
for data_dir in ['../sustained_attention_experiment/data', '../variable_attention_experiment/data']:
    
    for group in ['group1', 'group2']:
        
        sub_list = []

        # for each participant directory in the data directory
        for sub_dir in [f for f in os.listdir(data_dir+'/'+group) if f != '.DS_Store']:

            # aggregate all the data from the participant into a dataframe, and append to a list
            sub_list.append(sum_pd(data_dir + '/' + group + '/' + sub_dir))
            
        print()
        print(data_dir)
        print(group)
        print()
            
        print('We have loaded data from '+str(len(sub_list))+' unique subjects')

        # Concatenate the list into single dataframe 
        concatenated = pd.concat(sub_list)

        # obtain the number of unique runs for each participant for each trial type (Presentation and Memory)
        unique_runs = concatenated.groupby(['Subject','Trial Type'])['Run'].nunique()
        print()
        
        print('Below, we can see the number of unique runs loaded for each subject, for each trial type.')
        print()
        print('The set of all numbers of runs from all participants contains '
              + str(unique_runs.nunique()) + ' unique value: '+str(unique_runs.unique()))
        print()
        print(str(unique_runs))


../sustained_attention_experiment/data
group1

We have loaded data from 30 unique subjects

Below, we can see the number of unique runs loaded for each subject, for each trial type.

The set of all numbers of runs from all participants contains 1 unique value: [8]

Subject  Trial Type  
0        Memory          8
         Presentation    8
2        Memory          8
         Presentation    8
6        Memory          8
         Presentation    8
7        Memory          8
         Presentation    8
8        Memory          8
         Presentation    8
9        Memory          8
         Presentation    8
10       Memory          8
         Presentation    8
11       Memory          8
         Presentation    8
12       Memory          8
         Presentation    8
13       Memory          8
         Presentation    8
14       Memory          8
         Presentation    8
15       Memory          8
         Presentation    8
16       Memory          8
         Presentation    8
17       

# Check B1: check attention level assignments

In [3]:
# load in labeled data
data = pd.read_csv('../parsed_data/full_behavioral.csv')

### Check B1a: Number of images at each attention level, in each memory run

This cell outputs the unique counts for each type of non-novel image shown in memory run (Fully Attended, Side, Category, and None) and for novel images shown in memory run.

All participants should see five of each non-novel image type, and 20 novel images.

In [4]:
attn_grp = data.groupby(['UniqueID','Run','Trial Type','Attention Level'], as_index=False).count()

print('unique number of each non-novel image type: '+str(attn_grp[attn_grp['Attention Level']!='Novel']['Subject'].unique()))
print('unique number of novel images: '+str(attn_grp[attn_grp['Attention Level']=='Novel']['Subject'].unique()))

unique number of each non-novel image type: [5]
unique number of novel images: [20]


### Check B1b: Novel images equal proportion faces and places


This cell outputs the unique counts for novel images shown in each category (Faces and Places). All participants should see 10 of each image type.

In [29]:
attn_grp = data.groupby(['UniqueID','Run','Trial Type','Attention Level','Category'], as_index=False).count()
print('unique number of each non-novel image type: '+str(attn_grp[(attn_grp['Attention Level']=='Novel') & (attn_grp['Category']=='Place')]['Subject'].unique()))
print('unique number of novel images: '+str(attn_grp[(attn_grp['Attention Level']=='Novel') & (attn_grp['Category']=='Place')]['Subject'].unique()))

unique number of each non-novel image type: [10]
unique number of novel images: [10]


# Check B1c: Check category consistency, sustained exp
###### Full  and  Category   images from each run are same image category
###### Side  and   None       images from each run are same image category

In [6]:
data_match = data[data['Experiment']=='/sustain']
match_grp  = data_match.groupby(['UniqueID','Run','Attention Level','Category'],as_index=False).count()

counts = 0

for s in match_grp['UniqueID'].unique():
    for r in match_grp['Run'].unique():
        
        d = match_grp[(match_grp['UniqueID']==s)&(match_grp['Run']==r)]
        
        if d[d['Attention Level']=='Full']['Category'].item() != d[d['Attention Level']=='Category']['Category'].item():
            print('Full and Category Face/Place mismatch')
            count += 1
            
        if d[d['Attention Level']=='Side']['Category'].item() != d[d['Attention Level']=='None']['Category'].item():
            print('Side and None Face/Place mismatch')
            count += 1
            
if counts ==  0:
    print('No mismatches detected.')

No mismatches detected.


# Check B2: Randomly select runs from random participants to spot check manually

In [7]:
# randomly select two participants
# randomly select a run from each participant's data
# output randomly selected runs to html file for manual check

In [8]:
# pick one participant from each experiment and group

# Check Attention Cuing

See if participants in sustained attn experiment saw the same cue in back-to-back blocks.

See if participants in variable attn experiment saw same cue in back-to-back trials.

In both experiments, there are instances of repeated cues in the first test group but not the second (see output below).

In [9]:
data['combined_cue'] = data['Cued Category'] + data['Cued Side']

In [10]:
for exp in ['/sustain']:
    
    for group in [1,2]:
        
        repeat_instances = 0
        
        for sub in data[(data['Experiment']==exp)&(data['Group']==group)]['UniqueID'].unique():

            d = data[data['UniqueID']==sub].groupby(['Run','combined_cue'],as_index=False).count()
            cue_list = list(d['combined_cue'])

            for idx,x in enumerate(cue_list):
                if idx>0:
                    if cue_list[idx]==cue_list[idx-1]:
                        repeat_instances += 1
                        
        print(exp+'ed attn, group '+str(group)+': '+str(repeat_instances)+' instances of repeated cues in back to back blocks')

/sustained attn, group 1: 29 instances of repeated cues in back to back blocks
/sustained attn, group 2: 0 instances of repeated cues in back to back blocks


In [11]:
for exp in ['/variabl']:
    
    for group in [1,2]:
        
        repeat_instances = 0
        
        for sub in data[(data['Experiment']==exp)&(data['Group']==group)]['UniqueID'].unique():
            
            d = data[data['UniqueID']==sub].groupby(['Run','Trial','combined_cue'],as_index=False).count()
            
            for r in d['Run'].unique():
                cue_list = list(d[d['Run']==r]['combined_cue'])

                for idx,x in enumerate(cue_list):
                    if idx>0:
                        if cue_list[idx]==cue_list[idx-1]:
                            repeat_instances += 1
                        
        print(exp+'e attn, group '+str(group)+': '+str(repeat_instances)+' instances of repeated cues in back to back trials')

/variable attn, group 1: 360 instances of repeated cues in back to back trials
/variable attn, group 2: 0 instances of repeated cues in back to back trials


# Check out memory reaction times

In [12]:
data.groupby(['Experiment','Group'])['Familiarity Reaction Time (s)'].max()

Experiment  Group
/sustain    1        3.017
            2        2.012
/variabl    1        3.034
            2        2.003
Name: Familiarity Reaction Time (s), dtype: float64

### Proportion of memory trials above 2.012

3.9% of memory trial responses from Group 1 were recorded after 2.012 seconds

In [13]:
# All are in Group 1

data[(data['Familiarity Reaction Time (s)']>2.012)&(data['Trial Type']=='Memory')]['Group'].unique()

array([1])

In [14]:
data[(data['Familiarity Reaction Time (s)']>2.012)&(data['Trial Type']=='Memory')]

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Subject,Trial Type,Run,Cued Composite,Uncued Composite,Cued Face,Cued Place,...,Rating History,Stimulus Onset,Stimulus End,Attention Probe,Trial,UniqueID,Group,Experiment,Attention Reaction Time (s).1,combined_cue
200,200,200,110,10,Memory,2,,,,,...,,1.538592e+09,1.538592e+09,,0.0,0,1,/sustain,,
482,482,82,12,13,Memory,0,,,,,...,,1.539182e+09,1.539182e+09,,2.0,1,1,/sustain,,
507,507,107,37,13,Memory,0,,,,,...,,1.539182e+09,1.539182e+09,,27.0,1,1,/sustain,,
516,516,116,46,13,Memory,0,,,,,...,,1.539182e+09,1.539182e+09,,36.0,1,1,/sustain,,
520,520,120,60,13,Memory,1,,,,,...,,1.539182e+09,1.539182e+09,,0.0,1,1,/sustain,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35838,11838,238,148,20,Memory,2,,,,,...,,1.554835e+09,1.554835e+09,,38.0,89,1,/variabl,,
35869,11869,269,339,20,Memory,6,,,,,...,,1.554836e+09,1.554836e+09,,29.0,89,1,/variabl,,
35967,11967,367,217,20,Memory,4,,,,,...,,1.554835e+09,1.554835e+09,,7.0,89,1,/variabl,,
35982,11982,382,232,20,Memory,4,,,,,...,,1.554835e+09,1.554835e+09,,22.0,89,1,/variabl,,


In [15]:
data[(data['Familiarity Reaction Time (s)']>2.012)&(data['Trial Type']=='Memory') & ~(data['Familiarity Rating'].isna())].shape

(736, 34)

In [16]:
data[~(data['Familiarity Rating'].isna()) & (data['Trial Type']=='Memory') & (data['Group']==1)].shape

(18661, 34)

In [17]:
736/18661

0.03944054445099405

### Trials with no response recorded

All trials memory across all experiments with Familiarity reaction time of zero have no button press recorded for that trial (nan).

(The default for no button press is `response: nan`, `response time: 0`)

In [18]:
data[data['Familiarity Reaction Time (s)']==0]['Familiarity Rating'].unique()

array([nan])

All trials where there is no button press recorded have a response time of 0 (the default) except for 22 trials in Variable attention experiment, group 1. 

These trials have no button press recorded but they have the maximum response time recorded (~3 secs). 

In [28]:
data[(data['Familiarity Rating'].isna()) & (data['Familiarity Reaction Time (s)']!=0) & (data['Trial Type']=='Memory')].groupby(['Experiment','Group']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Subject,Trial Type,Run,Cued Composite,Uncued Composite,Cued Face,Cued Place,...,Pre Invalid Cue,Attention Button,Rating History,Stimulus Onset,Stimulus End,Attention Probe,Trial,UniqueID,Attention Reaction Time (s).1,combined_cue
Experiment,Group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
/variabl,1,22,22,22,22,22,22,0,0,0,0,...,0,0,0,22,22,0,22,22,0,0


In [32]:
data[(data['Familiarity Rating'].isna()) & (data['Familiarity Reaction Time (s)']!=0) & (data['Trial Type']=='Memory')][['Experiment','Group','UniqueID','Run','Trial','Familiarity Rating','Familiarity Reaction Time (s)']]

Unnamed: 0,Experiment,Group,UniqueID,Run,Trial,Familiarity Rating,Familiarity Reaction Time (s)
24083,/variabl,1,60,0,3.0,,3.019
25291,/variabl,1,63,0,11.0,,3.013
25317,/variabl,1,63,0,37.0,,3.018
27761,/variabl,1,69,3,1.0,,3.001
27792,/variabl,1,69,3,32.0,,3.001
27891,/variabl,1,69,7,11.0,,3.0
27914,/variabl,1,69,7,34.0,,3.032
27919,/variabl,1,69,7,39.0,,3.033
28122,/variabl,1,70,1,2.0,,3.001
28248,/variabl,1,70,6,8.0,,3.001


In [33]:
data[(data['Familiarity Rating'].isna()) & (data['Familiarity Reaction Time (s)']!=0) & (data['Trial Type']=='Memory')][['Experiment','Group','UniqueID','Run','Trial','Familiarity Rating','Familiarity Reaction Time (s)']].shape

(22, 7)

In [34]:
data[(data['Experiment']=='/variabl') & (data['Group']==1) & (data['Trial Type']=='Memory')][['Experiment','Group','UniqueID','Run','Trial','Familiarity Rating','Familiarity Reaction Time (s)']].shape

(9600, 7)

In [35]:
22/9600

0.0022916666666666667

In [39]:
os.getcwd()

'/Users/student/Desktop/AM_RT_and_gaze/attention-memory-task/data_analysis_code'