## Produce a consolidated events file for BCIT Calibration Driving

A copy of the EEG.event structure is dumped to the dataset as `eventstemp.tsv` files.
The `bcit_calibration_driving_01_initial_summary.ipynb` has already been run.
Manual review has confirmed that either both versions of the event files have the same
number of events or that the session had two runs and due to a bug in `ess2bids`,
the `events.tsv` file for run 1 is actually a copy of the `events.tsv` file from run 2.

This notebook creates a `eventstemp1.tsv` for each:
1. Read the eeg events.
2. Remove unwanted columns and add `onset` and `duration` columns.
3. Reorder the columns to the `final_column_order`.
4. Replace all empty string and NaN entries with n/a.
5. Save as `_eventstemp1.tsv`.

In [1]:
import os
import datetime
from hed.tools import BidsTabularDictionary, get_file_list, get_new_dataframe, get_value_dict, HedLogger, replace_values

# Variables to set for the specific dataset
bids_root_path = '/XXX/CalibrationDrivingWorking'
exclude_dirs = ['sourcedata', 'stimuli', 'code']
entities = ('sub', 'ses', 'run')
eeg_drop_columns = ['urevent', 'usertags']
final_column_order = ['onset', 'duration', 'sample', 'value']
log_name = 'bcit_calibration_driving_02_initial_combination_log'
sampling_rate_file = os.path.realpath(os.path.join(bids_root_path, 'code/samplingRates.tsv'))

# Set up the logger
log_file_name = f"code/curation_logs/{log_name}.txt"
logger = HedLogger(name=log_name)

# Construct the event file dictionaries for the BIDS and for EEG.event files
files_eeg = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_eventstemp", exclude_dirs=exclude_dirs)
eeg_dict = BidsTabularDictionary("EEG event files", files_eeg, entities=entities)
sampling_dict = get_value_dict(sampling_rate_file)

# Perform  the operations to combine the two versions of the event files
for key, file, rowcount, column_count in eeg_dict.iter_tsv_info():
    filename = eeg_dict.get_file_path(key)
    df_eeg = get_new_dataframe(filename)
    basename = os.path.basename(filename)
    logger.add(key, f"Created a dataframe for {basename}")

    samplingRate = float(sampling_dict[basename[:-15]])
    logger.add(key, f"Looked up sampling rate of {samplingRate}")

    df_eeg.drop(columns=eeg_drop_columns, inplace=True)
    logger.add(key, f"Dropped {str(eeg_drop_columns)} drop_columns")
    df_eeg['onset'] = df_eeg['latency']
    df_eeg['onset'] = df_eeg['onset'].subtract(1.0)
    df_eeg['onset'] = df_eeg['onset'].divide(samplingRate)
    logger.add(key, f"Calculate onset from latency and add an onset column")
    df_eeg['duration'] = 'n/a'
    logger.add(key, f"Add a duration column")
    df_eeg = df_eeg.rename(columns={"latency": "sample", "type": "value"})
    logger.add(key, f"Rename the latency column as sample and type as value")
    df_eeg = df_eeg.reindex(columns=final_column_order)
    logger.add(key, f"Reordered the columns as {str(final_column_order)}")
    num_replaced = replace_values(df_eeg, values=[' ', 'NaN'], replace_value='n/a')
    logger.add(key, f"Replaced {num_replaced} blank or NaN values with n/a")
    filename_out = filename[:-4] + "1.tsv"
    df_eeg.to_csv(filename_out, sep='\t', index=False)
    logger.add(key, f"Saved as {os.path.basename(filename)}")

# Output and save the log
log_string = "\n\nLog output:\n" + logger.get_log_string()
error_string = "\n\nERROR Summary:\n" + logger.get_log_string(level="ERROR")
print(log_string)
print(error_string)

save_path = os.path.join(bids_root_path, log_file_name)
with open(save_path, "w") as fp:
    fp.write(f"{log_file_name} {datetime.datetime.now()}\n")
    fp.write(log_string)
    fp.write(error_string)



Log output:
bcit_calibration_driving_02_initial_combination_log: Level None
sub-01_ses-01_run-1:
	[ Created a dataframe for sub-01_ses-01_task-Drive_run-1_eventstemp.tsv]
	[ Looked up sampling rate of 2048.0]
	[ Dropped ['urevent', 'usertags'] drop_columns]
	[ Calculate onset from latency and add an onset column]
	[ Add a duration column]
	[ Rename the latency column as sample and type as value]
	[ Reordered the columns as ['onset', 'duration', 'sample', 'value']]
	[ Replaced 0 blank or NaN values with n/a]
	[ Saved as sub-01_ses-01_task-Drive_run-1_eventstemp.tsv]
sub-02_ses-01_run-1:
	[ Created a dataframe for sub-02_ses-01_task-Drive_run-1_eventstemp.tsv]
	[ Looked up sampling rate of 2048.0]
	[ Dropped ['urevent', 'usertags'] drop_columns]
	[ Calculate onset from latency and add an onset column]
	[ Add a duration column]
	[ Rename the latency column as sample and type as value]
	[ Reordered the columns as ['onset', 'duration', 'sample', 'value']]
	[ Replaced 0 blank or NaN values