## Restructure Attention Shift to have trials, sub-blocks and blocks

This script reads the `_eventstemp2.tsv` files and adds `trial`, `sub_block`,
`block`, and `focus_modality` columns.
The script then saves the modified dataframe to `_eventstemp3.tsv`.
The script then produces a report of the block and sub-block structure
of the file so that errors can be detected.

The original data has some problematic event encoding.
This script and `attention_shift_04_fix_cond_codes.ipynb` were executed
iteratively until all errors had been corrected.


In [1]:
import os
import datetime
import pandas as pd
from hed.tools import BidsTabularDictionary, get_file_list, get_new_dataframe, HedLogger

# Variables to set for the specific dataset
bids_root_path = '/XXX/AttentionShiftWorking'
exclude_dirs = ['sourcedata', 'stimuli', 'code']
entities = ('sub', 'run')
skip_cols = ['onset', 'duration', 'sample']
log_name = 'attention_shift_05_add_trials_log'

# Set up the logger
log_file_name = f"code/curation_logs/{log_name}.txt"
logger = HedLogger(name=log_name)

# Construct the event file dictionary
bids_files = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_eventstemp2", exclude_dirs=exclude_dirs)
bids_dict = BidsTabularDictionary("Event files", bids_files, entities=entities)

In [2]:
block_markers = ['1', '2']
trial_markers = ['3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']
for key, file in bids_dict.iter_files():
    filename = file.file_path
    df = get_new_dataframe(filename)
    df['sub_block'] = 'n/a'
    df['trial'] = 'n/a'
    df['focus_modality'] = 'n/a'
    ## Set the sub block and trial columns
    sub_block_total = 0
    trial_total = 0
    shift_modality = 'n/a'
    for idx, row in df.iterrows():
        # Add sub_block number
        if df.loc[idx, 'event_code'].astype(str) in block_markers:
            sub_block_total += 1
        df.loc[idx, 'sub_block'] = sub_block_total

        # Add trial number
        if  df.loc[idx,'event_code'].astype(str) in trial_markers:
            trial_total += 1
        df.loc[idx, 'trial'] = trial_total

        # Add focus_modality
        if df.loc[idx,'cond_code'].astype(str) == '1':
            df.loc[idx, 'focus_modality'] = 'auditory'
            continue
        elif df.loc[idx,'cond_code'].astype(str) == '2':
            df.loc[idx, 'focus_modality'] = 'visual'
            continue
        elif df.loc[idx,'cond_code'].astype(str) != '3':
            df.loc[idx, 'focus_modality'] = 'n/a'
            continue
        if df.loc[idx,'event_code'].astype(str) == '1':
            shift_modality = 'auditory'
        elif  df.loc[idx,'event_code'].astype(str) == '2':
            shift_modality = 'visual'
        df.loc[idx, 'focus_modality'] = shift_modality

    logger.add(key, f"Added columns with {sub_block_total} sub_blocks and {trial_total} trials and shift_modality" )
    filename = filename[:-5] + "3.tsv"
    df.to_csv(filename, sep='\t', index=False)
    logger.add(key, f"Saved the file to {filename}")

    # Produce a log of the counts of the blocks and trials
    start_trial = 1
    start_sub_block = 1
    start_row = 0
    end_row = -1
    end_trial = 0
    end_sub_block = 0
    change_block = False
    current_block = 1
    last_cond = pd.to_numeric(df.loc[0, 'cond_code'])
    current_cond = last_cond
    for idx, row in df.iterrows():
        if pd.to_numeric(row['cond_code']) != last_cond:
            end_row = idx - 1
            end_trial = pd.to_numeric(df.loc[end_row, 'trial'])
            end_sub_block = pd.to_numeric(df.loc[end_row, 'sub_block'])
            change_block = True
            current_cond = last_cond

        if change_block and df.loc[idx, 'event_code'].astype(str) in block_markers:
            sub_blocks = end_sub_block - start_sub_block + 1
            trials = end_trial - start_trial + 1
            start_trial = end_trial + 1
            start_sub_block = end_sub_block + 1
            logger.add(key,
                   f" Block {current_block} {start_row}:{end_row + 1} (cond_code {current_cond}): "
                   f"{sub_blocks} sub-blocks {trials} trials")

            change_block = False
            current_block = current_block + 1
            start_row = end_row + 1
        last_cond = pd.to_numeric(row['cond_code'])
    sub_blocks = pd.to_numeric(df["sub_block"].iloc[-1]) - start_sub_block + 1
    trials =  pd.to_numeric(df["trial"].iloc[-1]) - start_trial + 1
    logger.add(key,
               f" Block {current_block} {start_row}:{len(df.index)} (cond_code {last_cond}): "
               f"{sub_blocks} sub-blocks {trials} trials")

# Print the log
log_string = "\n\nLog output:\n" + logger.get_log_string()
error_string = "\n\nERROR Summary:\n" + logger.get_log_string(level="ERROR")
print(log_string)
print(error_string)
save_path = os.path.join(bids_root_path, log_file_name)
with open(save_path, "w") as fp:
    fp.write(f"{log_file_name} {datetime.datetime.now()}\n")
    fp.write(log_string)
    fp.write(error_string)



Log output:
attention_shift_05_add_trials_log: Level None
sub-001_run-01:
	[ Added columns with 479 sub_blocks and 4793 trials and shift_modality]
	[ Saved the file to G:\AttentionShift\AttentionShiftWorking\sub-001\eeg\sub-001_task-AuditoryVisualShift_run-01_eventstemp3.tsv]
	[  Block 1 0:294 (cond_code 1): 24 sub-blocks 240 trials]
	[  Block 2 294:583 (cond_code 2): 24 sub-blocks 240 trials]
	[  Block 3 583:4131 (cond_code 3): 287 sub-blocks 2875 trials]
	[  Block 4 4131:4992 (cond_code 1): 72 sub-blocks 718 trials]
	[  Block 5 4992:5856 (cond_code 2): 72 sub-blocks 720 trials]
sub-002_run-01:
	[ Added columns with 480 sub_blocks and 4795 trials and shift_modality]
	[ Saved the file to G:\AttentionShift\AttentionShiftWorking\sub-002\eeg\sub-002_task-AuditoryVisualShift_run-01_eventstemp3.tsv]
	[  Block 1 0:289 (cond_code 1): 24 sub-blocks 240 trials]
	[  Block 2 289:578 (cond_code 2): 24 sub-blocks 240 trials]
	[  Block 3 578:4144 (cond_code 3): 288 sub-blocks 2878 trials]
	[  Bloc