## Restructure the WH-MEEG events

This script summarizes the event structures in the W-H-MEEG dataset
for consistency and potential remapping.  The script assumes that
a previous restructuring of the initial events in `*_events.tsv`
has been accomplished using `wh_fix_initial_events.ipynb` has
been dumped to `*_events_temp1.tsv`.  This scripts performs various
transformations and dumps the result to `*_events_temp2.tsv`.

**Transformations:**
1. Delete the `value` and the `response_time` column.
2. Rename the `repetition_type` column as `rep_status`.
3. Rename the `trigger` column as `value`.
4. Insert a column called `trial` with the trial number. Trial anchors are `show_face_initial`
and `show_cross`.  The excluded tags are `setup_left_sym` and `setup_right_sym`.
5. The value column should be 1 when `event_type is `show_cross`.
6. The `event_type` column should be `double_press` when the value column is 4352.
7. Insert new column `rep_lag` and calculate based on the `rep_status` column values and the `stim_file` values.
8. Reorder the columns as `onset`, `duration`, `sample`, `event_type`, `face_type`,
`rep_status`, `rep_lag`, `value`, and `stim_file`.
9. Save as `*_events_temp2.tsv`.

In [1]:
from hed.tools.io_utils import get_file_list, make_file_dict
from hed.tools.data_utils import get_new_dataframe
from hed.tools.hed_logger import HedLogger

bids_root_path = 'G:/WH_working2'
bids_files = get_file_list(bids_root_path, extensions=['.tsv'], name_suffix='_events_temp1')
file_dict = make_file_dict(bids_files, indices=(0, -3))
srate = 1100
final_order = ['onset', 'duration', 'sample', 'event_type', 'face_type', 'rep_status',
               'rep_lag', 'trial', 'value', 'stim_file']

print(f"BIDS form of the events: {len(file_dict)}")
status = HedLogger()
for key, file in file_dict.items():
    df = get_new_dataframe(file)
    df.drop(columns=['value', 'trial_type', 'response_time'], inplace=True)
    status.add(key, f"Drop the value, trial_type, and response_time columns")

    df.rename(columns={'repetition_type': 'rep_status', 'trigger': 'value'}, inplace=True)
    status.add(key, f"Rename repetition_type column as rep_status and trigger column as value")

    df['trial'] = '0'
    trial = 0
    for ind, row in df.iterrows():
        if df.loc[ind, 'event_type'] == 'show_cross' or df.loc[ind, 'event_type'] == 'show_face_initial':
            trial += 1
        df.loc[ind, 'trial'] = trial
    exclude_mask = (df['event_type'].map(str) == 'setup_left_sym') | (df['event_type'].map(str) == 'setup_right_sym')
    df.loc[exclude_mask, 'trial'] = 'n/a'
    status.add(key, f"Set the trial numbers")

    show_cross_mask = df['event_type'].map(str) == 'show_cross'
    df.loc[show_cross_mask, 'value'] = 1
    status.add(key, f"Set value columns to 1 when event_type is show_cross")

    double_press_mask = df['value'].map(str) == '4352'
    df.loc[double_press_mask, 'event_type'] = 'double_press'
    status.add(key, f"Set event_type columns to double_press when value is 4352")

    df['rep_lag'] = 'n/a'
    immediate_mask = df['rep_status'].map(str) == 'immediate_repeat'
    df.loc[immediate_mask, 'rep_lag'] = 1
    status.add(key, f"Set the rep_lag to 1 for rep_status equals immediate_repeat")
    stim_dict = {}
    for ind, row in df.iterrows():
        stim_file = df.loc[ind, 'stim_file']
        if stim_file == 'n/a':
            continue
        elif stim_file not in stim_dict:
            stim_dict[stim_file] = df.loc[ind, 'trial']
        elif df.loc[ind, 'rep_status'] == 'delayed_repeat':
            df.loc[ind, 'rep_lag'] = df.loc[ind, 'trial'] - stim_dict[stim_file]
    status.add(key, f"Create and fill in the rep_lag column when rep_status is delayed_repeat")

    col_list = list(df)
    if len(col_list) != len(final_order):
        status.add(key, f"ERROR {key} dataframe has wrong number of columns {len(col_list)}", also_print=True)
        continue
    for item in col_list:
        if item not in final_order:
            status.add(key, f"ERROR dataframe column {item} should not be there", also_print=True)
        continue
    df = df.reindex(columns=final_order)
    status.add(key, f"Reorder the columns")
    filename = file[:-10] + "_temp2.tsv"
    df.to_csv(filename, sep='\t', index=False)
    status.add(key, f"Save the file as _events_temp2.tsv")


BIDS form of the events: 108


In [2]:
status.print_log()

sub-002_run-1
	Dropped the value, trial_type, and response_time columns
	Renamed repetition_type column as rep_status and trigger column as value
	Set the trial numbers
	Set value columns to 1 when event_type is show_cross
	Set event_type columns to double_press when value is 4352
	Set the rep_lag to 1 for rep_status equals immediate_repeat
	Create and fill in the rep_lag column when rep_status is delayed_repeat
	Reorder the columns
	Save the file as _events_temp2.tsv
sub-002_run-2
	Dropped the value, trial_type, and response_time columns
	Renamed repetition_type column as rep_status and trigger column as value
	Set the trial numbers
	Set value columns to 1 when event_type is show_cross
	Set event_type columns to double_press when value is 4352
	Set the rep_lag to 1 for rep_status equals immediate_repeat
	Create and fill in the rep_lag column when rep_status is delayed_repeat
	Reorder the columns
	Save the file as _events_temp2.tsv
sub-002_run-3
	Dropped the value, trial_type, and resp