## Produce a consolidated events file for BCIT Basic Guard Duty

A copy of the EEG.event structure is dumped to the dataset as `events_temp.tsv` files.
The `bcit_basic_guard_duty_01_initial_summary.ipynb` has already been run.
Manual review has confirmed that either both versions of the event files have the same
number of events or that the session had two runs and due to a bug in `ess2bids`,
the `events.tsv` file for run 1 is actually a copy of the `events.tsv` file from run 2.

This notebook creates a `events_temp1.tsv` for each:
1. Read the eeg events.
2. Remove unwanted columns and add `onset` and `duration` columns.
3. Reorder the columns to the `final_column_order`.
4. Replace all empty string and NaN entries with n/a.
5. Save as `_events_temp1.tsv`.

In [1]:
import os
from hed.tools import BidsTsvDictionary, HedLogger
from hed.util import get_file_list, get_new_dataframe, get_value_dict, replace_values

# Variables to set for the specific dataset
bids_root_path = 'F:/ARLBidsStart/BasicGuardDutyWorking'
exclude_dirs = ['sourcedata', 'stimuli', 'code']
entities = ('sub', 'ses', 'run')
eeg_drop_columns = ['urevent', 'usertags', 'imageid']
final_column_order = ['onset', 'duration', 'sample', 'value', 'gid']
log_file_name = 'bcit_basic_guard_duty_02_initial_combination_log.json'
sampling_rate_file = os.path.realpath(os.path.join(bids_root_path, 'code/samplingRates.tsv'))

# Construct the event file dictionaries for the BIDS and for EEG.event files
files_eeg = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_events_temp", exclude_dirs=exclude_dirs)
eeg_dict = BidsTsvDictionary("EEG event files", files_eeg, entities=entities)
sampling_dict = get_value_dict(sampling_rate_file)

# Set up the logger and perform  the operations to combine the two versions of the event files
logger = HedLogger()
for key, file, rowcount, column_count in eeg_dict.iter_tsv_info():
    filename = eeg_dict.get_file_path(key)
    df_eeg = get_new_dataframe(filename)
    logger.add(key, f"Created a dataframe for {filename}")

    basename = os.path.basename(filename)
    samplingRate = float(sampling_dict[basename[:-16]])
    logger.add(key, f"Looked up sampling rate of {samplingRate}")

    df_eeg.drop(columns=eeg_drop_columns, inplace=True)
    logger.add(key, f"Dropped {str(eeg_drop_columns)} drop_columns")
    df_eeg['onset'] = df_eeg['latency']
    df_eeg['onset'] = df_eeg['onset'].subtract(1.0)
    df_eeg['onset'] = df_eeg['onset'].divide(samplingRate)
    logger.add(key, f"Calculate onset from latency and add an onset column")
    df_eeg['duration'] = 'n/a'
    logger.add(key, f"Add a duration column")
    df_eeg = df_eeg.rename(columns={"latency": "sample", "type":"value"})
    logger.add(key, f"Rename the latency column as sample and type column as value")
    df_eeg = df_eeg.reindex(columns=final_column_order)
    logger.add(key, f"Reordered the columns as {str(final_column_order)}")
    num_replaced = replace_values(df_eeg, values=[' ', 'NaN'], replace_value='n/a')
    logger.add(key, f"Replaced {num_replaced} blank or NaN values with n/a")
    filename_out = filename[:-9] + "_temp1.tsv"
    df_eeg.to_csv(filename_out, sep='\t', index=False)
    logger.add(key, f"Saved as {filename_out}")

# Output and save the log
logger.print_log()
save_path = os.path.join(bids_root_path, 'code')
logger.save_log(save_path, log_name=log_file_name)

# Output errors
print("\nERROR Summary:")
logger.print_log(level="ERROR")

sub-01_ses-01_run-1:
	[ Created a dataframe for F:\ARLBidsStart\BasicGuardDutyWorking\sub-01\ses-01\eeg\sub-01_ses-01_task-GuardDuty_run-1_events_temp.tsv]
	[ Looked up sampling rate of 1024.0]
	[ Dropped ['urevent', 'usertags', 'imageid'] drop_columns]
	[ Calculate onset from latency and add an onset column]
	[ Add a duration column]
	[ Rename the latency column as sample and type column as value]
	[ Reordered the columns as ['onset', 'duration', 'sample', 'value', 'gid']]
	[ Replaced 0 blank or NaN values with n/a]
	[ Saved as F:\ARLBidsStart\BasicGuardDutyWorking\sub-01\ses-01\eeg\sub-01_ses-01_task-GuardDuty_run-1_events_temp1.tsv]
sub-02_ses-01_run-1:
	[ Created a dataframe for F:\ARLBidsStart\BasicGuardDutyWorking\sub-02\ses-01\eeg\sub-02_ses-01_task-GuardDuty_run-1_events_temp.tsv]
	[ Looked up sampling rate of 1024.0]
	[ Dropped ['urevent', 'usertags', 'imageid'] drop_columns]
	[ Calculate onset from latency and add an onset column]
	[ Add a duration column]
	[ Rename the laten