## Merging of event files for two runs in the same session.


This script starts with the data that uploaded to OpenNeuro as ds002893.
A copy of the EEG.event structure is dumped to the dataset as `_events_temp.tsv`
files.

This file performs the following checks:
1. There are the same number of event files of each type.
2. The BIDS `_events.tsv` files have the expected columns.
3. The EEG `_events_temp.tsv` files have the expected columns.
4. The corresponding BIDS and EEG event files have the same number of events.

If these checks are satisfied, then the program proceeds to:
1. Remove the columns `trial_type`, `value`, `response_time`, `stim_file`, `HED`] from the BIDS events.
2. Remove the columns `urevent`, `type`, and `usertags` from the EEG events.
3. Combine the remaining columns into a single dataframe.
4. Make sure that the `sample`, `sample_offset` and `latency` columns are equal.
5. Drop the `sample_offset` and `latency` columns.
6. Save as `_events_temp1.tsv'.
7. Correct sub-031 run-01 which had incorrect `cond_code` 0 instead of one of the codes 1, 2, or 3 by reading
the corrected codes from a file.

In [1]:
import pandas as pd
from hed.tools.io_utils import get_file_list, make_file_dict
from hed.tools.data_utils import get_new_dataframe
from hed.tools.hed_logger import HedLogger

# Set up the logger
status = HedLogger()

# Make the dictionaries of the events.tsv files and the EEG.set events files
bids_root_path = 'G:\AttentionShift\AttentionShiftWorking1'
bids_files = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_events")
bids_dict = make_file_dict(bids_files, indices=(0, 2))
fatal_error = False

## Subject 004
key='sub-004_run-01'
srate = 250.0293378038558
last_pt = 34560



key='sub-004_run-01'
srate = 250.0293378038558
last_pt = 34304
last_event = 289
#
#
# as_01_initial_combination.ipynb
# # Check to make sure that they have the same keys
# status.add("file-level-checks", f"BIDS events files: {len(bids_dict)} EEG files: {len(eeg_dict)}", also_print=True)
# key_check = set(bids_dict.keys()).symmetric_difference(set(eeg_dict.keys()))
# if key_check:
#     status.add("missing files", f"ERROR [{str(key_check)}] events are not in both BIDS and EEG.set files",
#                also_print=True)
#     fatal_error = True
#
# # Check columns for BIDS events files
# bids_cols = ['onset', 'duration', 'sample', 'trial_type', 'response_time', 'stim_file', 'value', 'HED']
# bids_skip_cols = ['trial_type', 'value', 'response_time', 'stim_file', 'HED']
# bids_col_set = set(bids_cols)
# eeg_cols = ['sample_offset', 'event_code', 'cond_code', 'type', 'latency', 'urevent', 'usertags']
# eeg_skip_cols = ['urevent', 'usertags', 'type']
# eeg_col_set = set(eeg_cols)
# for key, file in bids_dict.items():
#     df_bids = get_new_dataframe(file)
#     bids_cols_missing = bids_col_set.difference(set(df_bids.columns))
#     if bids_cols_missing:
#         status.add(key, f"ERROR {key}: {str(list(bids_cols_missing))} bids event columns are missing", also_print=True)
#         fatal_error = True
#
#     df_eeg = get_new_dataframe(eeg_dict[key])
#     eeg_cols_missing = eeg_col_set.difference(set(df_eeg.columns))
#     if eeg_cols_missing:
#         status.add(key, f"ERROR {key}: {str(list(eeg_cols_missing))} EEG event columns are missing", also_print=True)
#         fatal_error = True
#
#     # Now check the number of rows in the two versions agree
#     if len(df_bids.index) != len(df_eeg.index):
#         status.add(key, f"ERROR {key}: BIDS file has {len(df_bids.index)} events and EEG file "
#                    "has {len(df_eeg.index)} events,", also_print=True)
#         fatal_error = True
#
#     if fatal_error:
#         continue
#
#     # Drop the extra columns and reorder the BIDS dataset
#     bids_extra_cols = set(df_bids.columns).difference(bids_col_set)
#     if bids_extra_cols:
#         df_bids.drop(columns=bids_extra_cols, inplace=True)
#         status.add(key, f"Dropped BIDS columns {str(bids_extra_cols)}")
#     df_bids = df_bids[bids_cols]
#     status.add(key, f"Reordered BIDS columns as {str(bids_cols)}")
#     df_bids.drop(columns=bids_skip_cols, inplace=True)
#     status.add(key, f"Dropped BIDS skip columns {str(bids_skip_cols)}")
#
#     # Drop the extra columns and reorder the EEG dataset
#     eeg_extra_cols = set(df_eeg.columns).difference(eeg_col_set)
#     if eeg_extra_cols:
#         df_eeg.drop(columns=eeg_extra_cols, inplace=True)
#         status.add(key, f"Dropped EEG columns {str(eeg_extra_cols)}")
#     df_eeg = df_eeg[eeg_cols]
#     status.add(key, f"Reordered EEG columns as {str(eeg_cols)}")
#     df_eeg.drop(columns=eeg_skip_cols, inplace=True)
#     status.add(key, f"Dropped EEG skip columns {str(eeg_skip_cols)}")
#
#     # Combine the two versions of the events file.
#     df_out = pd.concat([df_bids, df_eeg], axis=1)
#     status.add(key, f"Concatenated the BIDS and EEG event files for processing")
#
#     # Make sure that the sample and sample_offset columns are equal
#     offset_diff = sum(df_out['sample'].map(str) != df_out['sample_offset'].map(str))
#     latency_diff = sum(df_out['sample'].map(str) != df_out['latency'].map(str))
#     if offset_diff + latency_diff:
#         status.add(key, f"ERROR {key}: sample col has {offset_diff} sample_offset and "
#                         f"{latency_diff} differences,", also_print=True)
#         fatal_error = True
#         continue
#     df_out.drop(columns=['sample_offset', 'latency'], inplace=True)
#     status.add(key, f"Dropped the sample_offset and latency columns")
#     filename = file[:-4] + "_temp1.tsv"
#     df_out.to_csv(filename, sep='\t', index=False)
#     status.add(key, f"Saved as _events_temp1.tsv")
#
# if fatal_error:
#     print("WARNING: This script had fatal data errors which should be corrected before proceeding")
#     raise KeyboardInterrupt
#
#

BIDS events files: 55 EEG files: 55


In [2]:
# key = 'sub-007_run-01'
# filename = bids_dict[key]
# filename = filename[:-4] + "_temp1.tsv"
# df = get_new_dataframe(filename)
# df.drop(index=[0, 1, 2, 3], inplace=True)
# status.add(key, f"Dropped rows 0, 1, 2, 3, to get rid of bad 255 events at beginning of file.")
# df.to_csv(filename, sep='\t', index=False)
# status.add(key, f"Saved as _events_temp1.tsv")
#

In [3]:
# import os
# from hed.tools.data_utils import get_new_dataframe
# bids_file = \
#     'G:/AttentionShift/AttentionShiftWorking/sub-031/eeg/sub-031_task-AuditoryVisualShift_run-01_events_temp1.tsv'
# correction_file = '../../../data/attention_shift_data/sub-031_run-01_code_corrections.tsv'
# print(os.path.abspath(correction_file))
#
# df = get_new_dataframe(bids_file)
# corrections = get_new_dataframe(correction_file)
# df['event_code'] = list(corrections['event_code'])
# df['cond_code'] = list(corrections['cond_code'])
# df.to_csv(bids_file, sep='\t', index=False)
# status.add('sub-031_run-01', f"Replaced the event_code and cond_code columns with values from correction file")
#

E:\HEDPython\hed-python\hedcode\data\attention_shift_data\sub-031_run-01_code_corrections.tsv


In [4]:
# status.print_log()

file-level-checks
	BIDS events files: 55 EEG files: 55
sub-001_run-01
	Reordered BIDS columns as ['onset', 'duration', 'sample', 'trial_type', 'response_time', 'stim_file', 'value', 'HED']
	Dropped BIDS skip columns ['trial_type', 'value', 'response_time', 'stim_file', 'HED']
	Reordered EEG columns as ['sample_offset', 'event_code', 'cond_code', 'type', 'latency', 'urevent', 'usertags']
	Dropped EEG skip columns ['urevent', 'usertags', 'type']
	Concatenated the BIDS and EEG event files for processing
	Dropped the sample_offset and latency columns
	Saved as _events_temp1.tsv
sub-002_run-01
	Reordered BIDS columns as ['onset', 'duration', 'sample', 'trial_type', 'response_time', 'stim_file', 'value', 'HED']
	Dropped BIDS skip columns ['trial_type', 'value', 'response_time', 'stim_file', 'HED']
	Reordered EEG columns as ['sample_offset', 'event_code', 'cond_code', 'type', 'latency', 'urevent', 'usertags']
	Dropped EEG skip columns ['urevent', 'usertags', 'type']
	Concatenated the BIDS and