In [15]:
import pandas as pd
import os

In [16]:
sub = '001'
run='1'

df = pd.read_csv(f'{sub}/{sub}_run{run}.csv')
df['trial_type'] = df['ImgPath'].apply(lambda x: 'house' if 'houses' in x else 'face' if 'faces' in x else None)
df['onset'] = round(df['onset'], 2)
df['duration'] = round(df['img_dur'], 1)
df['end_time'] = round(df['end_time'], 2)
df = df[['trial_type', 'onset', 'end_time', 'duration']] # keep only desired cols

df

Unnamed: 0,trial_type,onset,end_time,duration
0,house,15.00,15.75,0.3
1,house,15.75,16.50,0.3
2,house,16.50,17.25,0.3
3,house,17.25,18.00,0.3
4,house,18.00,18.75,0.3
...,...,...,...,...
235,face,356.25,357.00,0.3
236,face,357.00,357.75,0.3
237,face,357.75,358.50,0.3
238,face,358.50,359.25,0.3


In [18]:
# create a new dataframe 'new_df' with every 20 rows
new_df = df.groupby(df.index // 20).apply(lambda x: pd.Series({
    'trial_type': x['trial_type'].unique()[0],
    'onset': round(x['onset'].iloc[0]),
    'duration': round(x['end_time'].iloc[-1] - x['onset'].iloc[0])
})).reset_index(drop=True)

new_df


Unnamed: 0,trial_type,onset,duration
0,house,15,15
1,face,45,15
2,house,75,15
3,face,105,15
4,house,135,15
5,face,165,15
6,house,195,15
7,face,225,15
8,house,255,15
9,face,285,15


In [3]:
# loop over all subjects
for i in range(1, 6):

    # 1 -> 001, ..., 10 -> 010
    sub = str(i).zfill(3)

    # loop over all runs 1 & 2
    for run in range(1,3):

        # load df
        df = pd.read_csv(f'{sub}/{sub}_run{run}.csv')

        # clean df
        df['trial_type'] = df['ImgPath'].apply(lambda x: 'house' if 'houses' in x else 'face' if 'faces' in x else None)
        df['onset'] = round(df['onset'], 2)
        df['duration'] = round(df['img_dur'], 1)
        df['end_time'] = round(df['end_time'], 2)
        df = df[['trial_type', 'onset', 'end_time', 'duration']] # keep only desired cols

        # block design df: 
        # create a new df with every 20 rows (each block)
        blocked_df = df.groupby(df.index // 20).apply(lambda x: pd.Series({
            'trial_type': x['trial_type'].unique()[0],
            'onset': round(x['onset'].iloc[0]),
            'duration': round(x['end_time'].iloc[-1] - x['onset'].iloc[0])
        })).reset_index(drop=True)


        # save cleaned df as csv
        save_sub_path = (f'cleaned_data/{sub}')
        if not os.path.exists(save_sub_path):
            os.makedirs(save_sub_path)
        
        blocked_df.to_csv(f'{save_sub_path}/sub-{sub}_task-localizer_run-{run}_events.csv')


In [3]:
# loop over all subjects
for i in range(1, 6):

    # 1 -> 001, ..., 10 -> 010
    sub = str(i).zfill(3)

    # loop over all runs 1-6
    for run in range(1,7):

        # if odd number, use v1 - even numbers use v2
        if i % 2 == 1:
            df = pd.read_csv(f'raw_data/{sub}/RUNv1_{sub}_run{run}.csv')
        else: 
            df = pd.read_csv(f'raw_data/{sub}/RUNv2_{sub}_run{run}.csv')

        # clean df
        df['onset'] = round(df['global_time_start']).astype(int)
        df['duration'] = df['end_time'] - df['onset']
        #df = df[['event_type', 'Image', 'onset', 'duration', 'end_time']] # keep only desired cols
        df = df[['event_type', 'onset', 'duration']] # keep only desired cols

        df = df.rename(columns={'event_type': 'trial_type', 'Image': 'image'})

        # save cleaned df as TSV
        save_sub_path = (f'data/{sub}')
        if not os.path.exists(save_sub_path):
            os.makedirs(save_sub_path)
        
        df.to_csv(f'{save_sub_path}/sub-{sub}_task-face_run-{run}_events.tsv', sep='\t')
