## Preliminary column summary of events

This script does a preliminary summary of the contents of the events files.
The summary includes printing out the column names of each event file so
that they can be manually checked for differences.

The script assumes that the data is in BIDS format and that each BIDS events
file of the form `_events.tsv` has a corresponding events file with
suffix `_events_temp.tsv` that was previously dumped from the `EEG.set` files.

In [1]:
import os
from hed.util import get_file_list, make_file_dict

bids_root_path = 'G:\AttentionShift\AttentionShiftWorking'
name_indices = (0, 2)
bids_skip = ['onset',  'sample', 'HED']
eeg_skip = ['latency', 'sample_offset', 'urevent', 'usertags']
exclude_dir = os.path.join(bids_root_path, 'sourcedata')
print(f"Summarizing {bids_root_path}...")
files_bids = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_events", exclude_dirs=[exclude_dir])
bids_dict = make_file_dict(files_bids, name_indices=name_indices)
print(f"\n{len(list(bids_dict))} BIDS style event files")
for key, value in bids_dict.items():
    print(f"{key}: {os.path.basename(value)}")

# Construct the dictionary for EEG.event files
files_eeg = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_events_temp")
eeg_dict = make_file_dict(files_eeg, name_indices=name_indices)
print(f"\n{len(list(eeg_dict))} EEG.event style event files")
for key, value in eeg_dict.items():
    print(f"{key}: {os.path.basename(value)}")

Summarizing G:\AttentionShift\AttentionShiftWorking...

52 BIDS style event files
sub-020_run-01: sub-020_task-AuditoryVisualShift_run-01_events.tsv
sub-021_run-01: sub-021_task-AuditoryVisualShift_run-01_events.tsv
sub-022_run-01: sub-022_task-AuditoryVisualShift_run-01_events.tsv
sub-001_run-01: sub-001_task-AuditoryVisualShift_run-01_events.tsv
sub-002_run-01: sub-002_task-AuditoryVisualShift_run-01_events.tsv
sub-003_run-01: sub-003_task-AuditoryVisualShift_run-01_events.tsv
sub-004_run-01: sub-004_task-AuditoryVisualShift_run-01_events.tsv
sub-005_run-01: sub-005_task-AuditoryVisualShift_run-01_events.tsv
sub-006_run-01: sub-006_task-AuditoryVisualShift_run-01_events.tsv
sub-007_run-01: sub-007_task-AuditoryVisualShift_run-01_events.tsv
sub-008_run-01: sub-008_task-AuditoryVisualShift_run-01_events.tsv
sub-009_run-01: sub-009_task-AuditoryVisualShift_run-01_events.tsv
sub-010_run-01: sub-010_task-AuditoryVisualShift_run-01_events.tsv
sub-011_run-01: sub-011_task-AuditoryVisualShif

In [2]:
print("Verifying that both dictionaries have the same keys")
keys_bids = set(bids_dict.keys())
keys_eeg = set(eeg_dict.keys())
list_bids = list(keys_bids.difference(keys_eeg))
list_eeg = list(keys_eeg.difference(keys_bids))
print(f"Bids extra keys {str(list_bids)}")
print(f"EEG extra keys {str(list_eeg)}")

Verifying that both dictionaries have the same keys
Bids extra keys ['sub-022_run-01', 'sub-020_run-01', 'sub-021_run-01']
EEG extra keys []


In [3]:
from hed.util import get_new_dataframe

print(f"\nBIDS style event file columns:")
bids_count_dict = {}
for key, file in bids_dict.items():
    df = get_new_dataframe(file)
    bids_count_dict[key] = len(df.index)
    print(f"{key} [{len(df.index)} events]: {str(list(df.columns.values))}")

print(f"\nEEG.event style event file columns:")
eeg_count_dict = {}
for key, file in eeg_dict.items():
    df = get_new_dataframe(file)
    eeg_count_dict[key] = len(df.index)
    print(f"{key} [{len(df.index)} events]: {str(list(df.columns.values))}")


BIDS style event file columns:
sub-020_run-01 [109 events]: ['onset', 'duration', 'sample', 'trial_type', 'response_time', 'stim_file', 'value', 'HED']
sub-021_run-01 [130 events]: ['onset', 'duration', 'sample', 'trial_type', 'response_time', 'stim_file', 'value', 'HED']
sub-022_run-01 [67 events]: ['onset', 'duration', 'sample', 'trial_type', 'response_time', 'stim_file', 'value', 'HED']
sub-001_run-01 [5856 events]: ['onset', 'duration', 'sample', 'trial_type', 'response_time', 'stim_file', 'value', 'HED']
sub-002_run-01 [5874 events]: ['onset', 'duration', 'sample', 'trial_type', 'response_time', 'stim_file', 'value', 'HED']
sub-003_run-01 [5867 events]: ['onset', 'duration', 'sample', 'trial_type', 'response_time', 'stim_file', 'value', 'HED']
sub-004_run-01 [5862 events]: ['onset', 'duration', 'sample', 'trial_type', 'response_time', 'stim_file', 'value', 'HED']
sub-005_run-01 [5769 events]: ['onset', 'duration', 'sample', 'trial_type', 'response_time', 'stim_file', 'value', 'HE

In [4]:
from hed.tools import ColumnSummary
print('\nBIDS events summary:')
bids_dicts_all, bids_dicts =  ColumnSummary.make_combined_dicts(bids_dict, skip_cols=bids_skip)
bids_dicts_all.print()

print('\nEEG.event events summary:')
eeg_dicts_all, eeg_dicts =  ColumnSummary.make_combined_dicts(eeg_dict, skip_cols=eeg_skip)
eeg_dicts_all.print()


BIDS events summary:
Summary for column dictionary :
  Categorical columns (7):
    Unnamed: 8 (1 distinct values):
      : 11772
    Unnamed: 9 (1 distinct values):
      : 11772
    duration (2 distinct values):
      : 12924
      n/a: 274525
    response_time (2 distinct values):
      : 12924
      n/a: 274525
    stim_file (2 distinct values):
      : 12924
      n/a: 274525
    trial_type (5 distinct values):
      0: 6373
      1: 58185
      2: 54048
      3: 168840
      []: 3
    value (54 distinct values):
      1: 240
      10: 96
      11: 3179
      110: 96
      111: 766
      112: 766
      113: 384
      114: 382
      12: 3173
      1201: 5075
      13: 4909
      14: 4907
      15: 18089
      16: 18090
      17: 192
      18: 192
      19: 96
      199: 306
      2: 240
      201: 764
      202: 929
      21: 2242
      212: 3
      22: 2245
      2201: 4545
      2255: 3
      23: 4484
      24: 4489
      25: 17927
      26: 17923
      28: 2
      3: 192
      