In [6]:
# Pandas is used for data manipulation
import pandas as pd
import numpy as np

# Read in data and display first 5 rows
dataROI_in = pd.read_csv('icse14_roi_3mmVoxel.csv',sep=';',decimal='.')
datadeact = pd.read_csv('icse14_roi_deact.csv',sep=';',decimal='.')

# all scans until 68 are warm-up; so remove them
dataROI_in = dataROI_in[dataROI_in.scan > 68]
#manually reset the index to start with 0
dataROI_in = dataROI_in.reset_index(drop=True)
# all scans until 68 are warm-up; so remove them
datadeact = datadeact[datadeact.scan > 68]
#manually reset the index to start with 0
datadeact = datadeact.reset_index(drop=True)

#all scans above 969 cannot be assigned to a task; so remove them
dataROI_in = dataROI_in[dataROI_in.scan < 969]
#manually reset the index to start with 0
dataROI_in = dataROI_in.reset_index(drop=True)
datadeact = datadeact[datadeact.scan < 969]
#manually reset the index to start with 0
datadeact = datadeact.reset_index(drop=True)

# #=== remove duplicates ========================================================================
# #first, drop everything that should not be z-transformed and is not necessary (proband is necessary for grouping ) ===
# scan is necessary for dropping warm-up 
dataROI = dataROI_in.drop('scan', axis = 1)
dataROI = dataROI.drop('trial', axis = 1)
dataROI = dataROI.drop('task', axis = 1)
dataROI = dataROI.drop('snippet', axis = 1)
dataROI = dataROI.drop('response', axis = 1)
dataROI.rename(columns=lambda x: x + '_act', inplace=True)

datadeact = datadeact.drop('scan', axis = 1)
datadeact = datadeact.drop('trial', axis = 1)
datadeact = datadeact.drop('task', axis = 1)
datadeact = datadeact.drop('snippet', axis = 1)
datadeact = datadeact.drop('response', axis = 1)
datadeact = datadeact.drop('Unnamed: 974', axis = 1)
datadeact = datadeact.drop('proband', axis = 1)
datadeact.rename(columns=lambda x: x + '_deact', inplace=True)

data = pd.concat([dataROI, datadeact], axis = 1)

#=== z transformation =========================================================================
z_data = data.groupby(['proband_act']).transform(lambda x: (x - x.mean())/ x.std())

#=== replace outliers with consecutive mean of voxels =========================================
z_data = z_data.mask(z_data.sub(z_data.mean()).div(z_data.std()).abs().gt(4))
z_data = z_data.interpolate()

#now add the columns again ====================================================================
z_data['proband'] = dataROI_in['proband']
z_data['scan'] = dataROI_in['scan']
z_data['trial'] = dataROI_in['trial']
z_data['task'] = dataROI_in['task']
z_data['snippet'] = dataROI_in['snippet']
z_data['response'] = dataROI_in['response']
data = z_data


In [12]:
data.head(5)

Unnamed: 0,149-108-79_act,149-111-79_act,149-108-82_act,152-108-70_act,152-111-70_act,152-108-73_act,152-111-73_act,152-105-76_act,152-108-76_act,152-105-79_act,...,aggr_6_groups0,aggr_6_groups1,aggr_6_groups2,aggr_6_groups3,aggr_6_groups4,aggr_6_groups5,aggr_6_groups6,aggr_6_groups7,aggr_6_groups8,aggr_6_groups9
0,0.52381,0.982681,0.430325,0.755873,1.122842,1.256206,1.271991,0.497764,0.841114,0.348136,...,fifth,second,fourth,first,first,fourth,sixth,fifth,third,fourth
1,-0.075237,-0.051955,0.421203,-1.574324,-1.717728,-1.264377,-1.490963,-0.389934,-0.721982,0.472517,...,fifth,second,fourth,first,first,fourth,sixth,fifth,third,fourth
2,0.651193,0.26667,0.206841,0.164485,0.411769,0.976141,0.96229,0.484564,0.910585,-0.025006,...,fifth,second,fourth,first,first,fourth,sixth,fifth,third,fourth
3,-0.426402,-0.764385,-0.185395,-0.732239,-0.649256,-0.641007,-0.457465,-0.719934,-0.425152,-0.398148,...,fifth,second,fourth,first,first,fourth,sixth,fifth,third,fourth
4,-1.734664,-1.07227,-0.459048,-1.172566,-0.064762,-1.821495,-0.659293,-2.884729,-2.90716,-1.17164,...,fifth,second,fourth,first,first,fourth,sixth,fifth,third,fourth


In [13]:
#input: alist: the list to split, wanted_parts: into how many parts it should be split
#returns: all split lists
def split_list(alist, wanted_parts=1):
    length = len(alist)
    return [ alist[i*length // wanted_parts: (i+1)*length // wanted_parts] 
             for i in range(wanted_parts) ]


In [14]:
import numpy as np
import random
numVariants = 10
tasks = ['Swap','Median','Factorial','CrossSum','FindLargestOfThree','ReverseArray','Maximum','DecimalToBinary','CountSameChars','Substring','ReverseString','Power']


In [15]:
#2 groups
for i in range(numVariants):
    random.shuffle(tasks)
    tasksGroup1, tasksGroup2 = split_list(tasks,2)
    colName = 'aggr_2_groups' + str(i)
    aggr_2_groups = []
    k = 0
    while k < len(data['snippet']):
        if data.at[k,'snippet'] in tasksGroup1 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_2_groups.append('first')
        if data.at[k,'snippet'] in tasksGroup2 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_2_groups.append('second')
        k = k + 1
    data[colName] = pd.Series(aggr_2_groups)
    

#3 groups
for i in range(numVariants):
    random.shuffle(tasks)
    tasksGroup1, tasksGroup2, tasksGroup3 = split_list(tasks,3)
    colName = 'aggr_3_groups' + str(i)
    aggr_3_groups = []
    k = 0
    while k < len(data['snippet']):
        if data.at[k,'snippet'] in tasksGroup1 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_3_groups.append('first')
        if data.at[k,'snippet'] in tasksGroup2 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_3_groups.append('second')
        if data.at[k,'snippet'] in tasksGroup3 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_3_groups.append('third')
        k = k + 1
    data[colName] = pd.Series(aggr_3_groups)
    

# 4 groups
for i in range(numVariants):
    random.shuffle(tasks)
    tasksGroup1, tasksGroup2, tasksGroup3, tasksGroup4 = split_list(tasks,4)
    colName = 'aggr_4_groups' + str(i)
    aggr_4_groups = []
    k = 0
    while k < len(data['snippet']):
        if data.at[k,'snippet'] in tasksGroup1 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_4_groups.append('first')
        if data.at[k,'snippet'] in tasksGroup2 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_4_groups.append('second')
        if data.at[k,'snippet'] in tasksGroup3 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_4_groups.append('third')
        if data.at[k,'snippet'] in tasksGroup4 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_4_groups.append('fourth')
        k = k + 1
    data[colName] = pd.Series(aggr_4_groups)
    

# 6 groups
for i in range(numVariants):
    random.shuffle(tasks)
    tasksGroup1, tasksGroup2, tasksGroup3, tasksGroup4, tasksGroup5, tasksGroup6 = split_list(tasks,6)
    colName = 'aggr_6_groups' + str(i)
    aggr_6_groups = []
    k = 0
    while k < len(data['snippet']):
        if data.at[k,'snippet'] in tasksGroup1 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_6_groups.append('first')
        if data.at[k,'snippet'] in tasksGroup2 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_6_groups.append('second')
        if data.at[k,'snippet'] in tasksGroup3 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_6_groups.append('third')
        if data.at[k,'snippet'] in tasksGroup4 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_6_groups.append('fourth')
        if data.at[k,'snippet'] in tasksGroup5 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_6_groups.append('fifth')
        if data.at[k,'snippet'] in tasksGroup6 and data.at[k,'task'] == 'C':
            for k in range(k-15,k+60):
                aggr_6_groups.append('sixth')
        k = k + 1
    data[colName] = pd.Series(aggr_6_groups)

In [17]:
z_data.to_csv('data_task_groups_roi_deact.csv',sep=';',decimal='.', index=False)

In [16]:
data.head(5)

Unnamed: 0,149-108-79_act,149-111-79_act,149-108-82_act,152-108-70_act,152-111-70_act,152-108-73_act,152-111-73_act,152-105-76_act,152-108-76_act,152-105-79_act,...,aggr_6_groups0,aggr_6_groups1,aggr_6_groups2,aggr_6_groups3,aggr_6_groups4,aggr_6_groups5,aggr_6_groups6,aggr_6_groups7,aggr_6_groups8,aggr_6_groups9
0,0.52381,0.982681,0.430325,0.755873,1.122842,1.256206,1.271991,0.497764,0.841114,0.348136,...,first,third,first,fifth,first,third,sixth,second,fifth,fourth
1,-0.075237,-0.051955,0.421203,-1.574324,-1.717728,-1.264377,-1.490963,-0.389934,-0.721982,0.472517,...,first,third,first,fifth,first,third,sixth,second,fifth,fourth
2,0.651193,0.26667,0.206841,0.164485,0.411769,0.976141,0.96229,0.484564,0.910585,-0.025006,...,first,third,first,fifth,first,third,sixth,second,fifth,fourth
3,-0.426402,-0.764385,-0.185395,-0.732239,-0.649256,-0.641007,-0.457465,-0.719934,-0.425152,-0.398148,...,first,third,first,fifth,first,third,sixth,second,fifth,fourth
4,-1.734664,-1.07227,-0.459048,-1.172566,-0.064762,-1.821495,-0.659293,-2.884729,-2.90716,-1.17164,...,first,third,first,fifth,first,third,sixth,second,fifth,fourth
