## Restructure Attention Shift to have trials, sub-blocks and blocks

This script works with files of the form `_events_temp1.tsv` with columns
`onset`, `duration`, `sample`, `trial_type`, `value`, `event_code`, and `cond_code`.
The `key` is of the form `sub-xxx_run-y` which
uniquely specify each event file in the dataset.

**Transformations:**
1. Delete the `trial_type` and the `value` column.
2. Rename `repetition_type` as `rep_status` and `trigger` as `value`.
3. Insert a column called `trial` with the trial number. Trial anchors are `show_face_initial`
and `show_cross`.  The excluded tags are `setup_left_sym` and `setup_right_sym`.
4. The `show_cross` value column should be 1.
5. Insert new column `rep_lag`.
6. Reorder the columns as `onset`, `duration`, `sample`, `event_type`, `face_type`,
`rep_status`, `rep_lag`, `value`, and `stim_file`.
7. Save as `*_events_temp2.tsv`

In [3]:
from hed.tools.hed_logger import HedLogger
from hed.util.io_util import get_file_list, make_file_dict
from hed.util.data_util import get_new_dataframe

def set_anchor_blocks(df, col_name, anchor_mask):
    total = 0
    anchor_list = list(anchor_mask.astype(int))
    for i, value in anchor_mask.iteritems():
        if value:
            total += 1
        anchor_list[i] = total
    df[col_name] = anchor_list
    return total

# Set up the logger
status = HedLogger()

# Make the dictionaries of the events.tsv files and the EEG.set events files
bids_root_path = 'G:\AttentionShift\AttentionShiftWorking'
bids_files = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_events_temp1")
bids_dict = make_file_dict(bids_files, name_indices=(0, 2))

trial_anchors = ['3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']
sub_anchors = ['1', '2']
for key, file in bids_dict.items():
    df = get_new_dataframe(file)

    # Set the number of trials
    trial_mask = df['event_code'].map(str).isin(trial_anchors)
    trial_count = set_anchor_blocks(df, 'trial', trial_mask)
    status.add(key, f"Added trial column: total trials {trial_count}")
    ## Set the sub blocks
    sub_mask = df['event_code'].map(str).isin(sub_anchors)
    sub_count = set_anchor_blocks(df, 'sub_block', sub_mask)
    status.add(key, f"Added sub_block column: total sub blocks {sub_count}")
    cond_code = 0
    total_cond = 0
    df['cond_block'] = 0
    cond_counts = [0, 0, 0]
    for ind, row in df.iterrows():
        if row['cond_code'] != cond_code:
            total_cond += 1
            cond_counts[row['cond_code'] - 1] += 1
            cond_code = row['cond_code']
        df.loc[ind, 'cond_block'] = total_cond
    status.add(key, f"{total_cond} condition blocks, cond block counts: {str(cond_counts)}")

    df['time_df'] = df['onset'].diff()
    df.loc[0, 'time_df'] = df.loc[0, 'onset']
    status.add(key, f"Added the time_df column giving difference in onsets of successive events")

    gap_mask1 = df['time_df'] > 1.0
    gap_mask2 = df['event_code'].map(str) != '202'
    df_gaps = df.loc[gap_mask1 & gap_mask2, :]
    status.add(key, f"Total of {sum(gap_mask1)} gaps of which {sum(~gap_mask2)} are pauses")
    cond_counts = [0, 0, 0]
    for ind, row in df_gaps.iterrows():
        status.add(key, f"Event:{ind} Gap:{round(row['time_df'])}s event_code:{row['event_code']} "
        f"cond_code:{row['cond_code']} sub_block:{row['sub_block']}")
        cond_counts[row['cond_code'] - 1] += 1
    status.add(key, f"Total cond_code counts for individual blocks: {str(cond_counts)}")

    for i in range(total_cond):
        sub_blocks = df.loc[df['cond_block'] == (i + 1), 'sub_block']
        sub_count = sub_blocks.max() - sub_blocks.min()
        sub_index = sub_blocks.index
        trials =  df.loc[df['cond_block'] == (i + 1), 'trial']
        trial_count = trials.max() - trials.min()
        cond_code = df.loc[df['cond_block'] == (i + 1), 'cond_code']
        status.add(key, f"Block {i + 1} [{sub_index.min()}, {sub_index.max()}] (cond_code {cond_code.iloc[0]}):"
                  f"{sub_count} sub blocks  {trial_count} trials")

    filename = file[:-10] + "_temp2.tsv"
    df.to_csv(filename, sep='\t', index=False)
    status.add(key, f"Saved as _events_temp2.tsv")

In [4]:
status.print_log()

sub-001_run-01
	Added trial column: total trials 4793
	Added sub_block column: total sub blocks 479
	5 condition blocks, cond block counts: [2, 2, 1]
	Added the time_df column giving difference in onsets of successive events
	Total of 26 gaps of which 6 are pauses
	Event:0 Gap:198s event_code:1 cond_code:1 sub_block:1
	Event:295 Gap:43s event_code:2 cond_code:2 sub_block:25
	Event:584 Gap:38s event_code:1 cond_code:3 sub_block:49
	Event:879 Gap:42s event_code:2 cond_code:3 sub_block:73
	Event:1173 Gap:49s event_code:1 cond_code:3 sub_block:97
	Event:1469 Gap:30s event_code:2 cond_code:3 sub_block:121
	Event:1765 Gap:33s event_code:1 cond_code:3 sub_block:145
	Event:2061 Gap:73s event_code:2 cond_code:3 sub_block:169
	Event:2355 Gap:54s event_code:1 cond_code:3 sub_block:193
	Event:2650 Gap:56s event_code:2 cond_code:3 sub_block:216
	Event:2945 Gap:49s event_code:1 cond_code:3 sub_block:240
	Event:3242 Gap:15s event_code:2 cond_code:3 sub_block:264
	Event:3539 Gap:18s event_code:1 cond_