## Checks that the event codes are consistent with conditions

This script cross-checks for consistency after the initial `_events_temp1.tsv`
files are produced by `attention_shift_02_initial_combination.ipynb` notebook.

### Checking for forbidden codes
       Codes 1 and 2 can appear anywhere
       Codes 3 through 6 should appear only in the focus condition.
       Codes 7 through 14 should appear only in the shift condition.
       Codes 199, 201, 202, and 255 are not related to condition.

The conclusions of running this script:
* sub_005_run_01 has 5 shift event codes in a focus condition.
* sub_008_run_01 has 2874 shift event codes in a focus condition.
* sub_015_run_01 has 239 focus event codes in a shift condition.
* sub_031_run_01 has 6067 cond_code values of 0.
* sub_036_run_02 has 721 focus event codes in a shift condition.

In [1]:
from hed.tools import BidsTsvDictionary, BidsTsvSummary, HedLogger
from hed.util import get_file_list, get_new_dataframe

# Set up the logger
status = HedLogger()

# Make the dictionaries of the events.tsv files and the EEG.set events files
bids_root_path = 'G:\AttentionShift\AttentionShiftWorking'
exclude_dirs = ['sourcedata']
entities = ('sub', 'run')
skip_cols = ['onset', 'duration', 'sample']

bids_files = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_events_temp1",
                           exclude_dirs=exclude_dirs)
bids_dict = BidsTsvDictionary(bids_files, entities=entities)
bids_sum_all, bids_sum =  BidsTsvSummary.make_combined_dicts(bids_dict, skip_cols=skip_cols)
bids_sum_all.print('\nBIDS events summary')


BIDS events summary
  Categorical columns (3):
    cond_code (4 distinct values):
      0: 6067
      1: 58184
      2: 54044
      3: 168840
    event_code (16 distinct values):
      1: 11703
      10: 4702
      11: 37548
      12: 37524
      13: 18778
      14: 18779
      2: 11701
      201: 29028
      202: 927
      3: 9296
      4: 9301
      5: 37171
      6: 37167
      7: 9406
      8: 9408
      9: 4696
    type (51 distinct values):
      1: 240
      10: 96
      11: 3179
      110: 96
      111: 766
      112: 766
      113: 384
      114: 382
      12: 3173
      1201: 5075
      13: 4909
      14: 4907
      15: 18089
      16: 18090
      17: 192
      18: 192
      19: 96
      2: 240
      201: 764
      202: 927
      21: 2242
      212: 3
      22: 2245
      2201: 4545
      23: 4484
      24: 4489
      25: 17927
      26: 17923
      28: 2
      3: 192
      31: 6810
      310: 4510
      311: 36014
      312: 35989
      313: 18009
      314: 18014
      32:

In [2]:
print("Isolating the bad codes:")
for key, file, rowcount, columns in bids_dict.iter_tsv_info():
    df_bids = get_new_dataframe(file.file_path)

    focus_cond_mask = df_bids['cond_code'].map(str).isin(['1', '2'])
    shift_cond_mask = df_bids['cond_code'].map(str).isin(['3'])
    focus_event_mask = df_bids['event_code'].map(str).isin(['3', '4', '5', '6'])
    shift_event_mask = df_bids['event_code'].map(str).isin(['7', '8', '9', '10', '11', '12', '13', '14'])
    bad_focus = sum(focus_cond_mask & shift_event_mask)
    if bad_focus:
        status.add(key, f"{key} has {bad_focus} shift event codes in a focus condition")

    bad_shift = sum(shift_cond_mask & focus_event_mask)
    if bad_shift:
        status.add(key, f"{key} has {bad_shift} focus event codes in a shift condition")

    bad_cond_mask = df_bids['cond_code'].map(str).isin(['0'])
    if sum(bad_cond_mask):
        status.add(key, f"{key} has {sum(bad_cond_mask)} cond_code values of 0")

    pulse_code_mask = df_bids['event_code'].map(str).isin(['199'])
    if sum(pulse_code_mask):
        status.add(key, f"{key} has {sum(pulse_code_mask)} event_code values of 199")

    pulse_combo_count = sum(pulse_code_mask & bad_cond_mask)
    if pulse_combo_count:
        status.add(key, f"{key} has {pulse_combo_count} event_code values of 199 with cond_code 0")

    unknown_count = sum(df_bids['event_code'].map(str).isin(['255']))
    if unknown_count:
        status.add(key, f"{key} has {unknown_count} event_code values of 255")

    pause_count = sum(df_bids['event_code'].map(str).isin(['202']))
    if pause_count:
        status.add(key, f"{key} has {pause_count} event_code values of 202")

Isolating the bad codes:


In [3]:
status.print_log()

sub_001_run_01
	sub_001_run_01 has 6 event_code values of 202
sub_002_run_01
	sub_002_run_01 has 20 event_code values of 202
sub_003_run_01
	sub_003_run_01 has 20 event_code values of 202
sub_004_run_01
	sub_004_run_01 has 1 event_code values of 202
sub_004_run_02
	sub_004_run_02 has 19 event_code values of 202
sub_005_run_01
	sub_005_run_01 has 5 shift event codes in a focus condition
	sub_005_run_01 has 6 event_code values of 202
sub_006_run_01
	sub_006_run_01 has 20 event_code values of 202
sub_007_run_01
	sub_007_run_01 has 18 event_code values of 202
sub_008_run_01
	sub_008_run_01 has 2874 shift event codes in a focus condition
	sub_008_run_01 has 11 event_code values of 202
sub_009_run_01
	sub_009_run_01 has 26 event_code values of 202
sub_010_run_01
	sub_010_run_01 has 18 event_code values of 202
sub_011_run_01
	sub_011_run_01 has 21 event_code values of 202
sub_012_run_01
	sub_012_run_01 has 22 event_code values of 202
sub_013_run_01
	sub_013_run_01 has 20 event_code values of 