## Stage 1 checks

This script is run after preliminary checks and corrections.
The working event files are of the form: events_temp2.tsv.
The extra columns have been removed. The event files now have
columns `event_code` and `cond_code`.


### Get a list of event files

In [1]:
from hed.tools.io_utils import get_file_list
bids_root_path = 'G:/AttentionShift/AttentionShiftExperiments'
event_files_bids = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_events_temp2")
bids_skip = ['onset', 'duration', 'sample']

ModuleNotFoundError: No module named 'werkzeug'

In [13]:
import os
from hed.tools.col_dict import ColumnDict

bids_dicts_all = ColumnDict(skip_cols=bids_skip, name=f"{bids_root_path} BIDS" )
bids_dicts = {}
for file in event_files_bids:
    orig_dict = ColumnDict(skip_cols=bids_skip, name=f"{file} BIDS")
    orig_dict.update(file)
    base = os.path.basename(file)
    pieces = base.split('_')
    key = f"{pieces[0]}_{pieces[-2]}"
    bids_dicts[key] = orig_dict
    bids_dicts_all.update_dict(orig_dict)
print('BIDS events summary:')
bids_dicts_all.print()

BIDS events summary:
Summary for column dictionary G:/AttentionShift/AttentionShiftExperiments BIDS:
  Categorical columns (3):
    response_time (1 distinct values):
      n/a: 287442
    trial_type (4 distinct values):
      0: 6373
      1: 58184
      2: 54045
      3: 168840
    value (52 distinct values):
      1: 240
      2: 240
      3: 192
      4: 192
      5: 772
      6: 769
      7: 192
      8: 192
      9: 96
      10: 96
      11: 3179
      12: 3173
      13: 4909
      14: 4907
      15: 18089
      16: 18090
      17: 192
      18: 192
      19: 96
      21: 2242
      22: 2245
      23: 4484
      24: 4489
      25: 17927
      26: 17923
      28: 2
      31: 6810
      32: 6809
      33: 96
      34: 96
      35: 383
      36: 385
      37: 9022
      38: 9022
      39: 4504
      110: 96
      111: 766
      112: 766
      113: 384
      114: 382
      199: 306
      201: 764
      202: 928
      212: 3
      310: 4510
      311: 36014
      312: 35989
      313:

In [14]:
# from hed.tools.col_dict import ColumnDict
# eeg_dicts_all = ColumnDict(skip_cols=eeg_skip, name=f"{bids_root_path} EEG" )
# eeg_dicts = {}
# for key, file in eeg_file_dict.items():
#     eeg_dict = ColumnDict(skip_cols=eeg_skip, name=f"{file} EEG")
#     eeg_dict.update(file)
#     eeg_dicts[key] = eeg_dict
#     eeg_dicts_all.update_dict(eeg_dict)
# print('EEGLAB events summary:')
# eeg_dicts_all.print()

EEGLAB events summary:
Summary for column dictionary G:/AttentionShift/AttentionShiftExperiments EEG:
  Categorical columns (3):
    cond_code (4 distinct values):
      0: 6373
      1: 58184
      2: 54045
      3: 168840
    event_code (17 distinct values):
      1: 11703
      2: 11701
      3: 9296
      4: 9301
      5: 37171
      6: 37167
      7: 9406
      8: 9408
      9: 4696
      10: 4702
      11: 37548
      12: 37524
      13: 18778
      14: 18779
      199: 306
      201: 29028
      202: 928
    type (52 distinct values):
      1: 240
      2: 240
      3: 192
      4: 192
      5: 772
      6: 769
      7: 192
      8: 192
      9: 96
      10: 96
      11: 3179
      12: 3173
      13: 4909
      14: 4907
      15: 18089
      16: 18090
      17: 192
      18: 192
      19: 96
      21: 2242
      22: 2245
      23: 4484
      24: 4489
      25: 17927
      26: 17923
      28: 2
      31: 6810
      32: 6809
      33: 96
      34: 96
      35: 383
      36: 385
  

### Verification of relationships:

These relationships are checked for each event file:
1. EEG `cond_code` == BIDS `trial_type`
2. EEG `type` == BIDS `value`


In [15]:
# print("Checking cond_code == trial_type and type == value")
# for key, file_eeg in eeg_file_dict.items():
#     # Get the column dictionaries for corresponding files
#     eeg_dict = eeg_dicts[key]
#     bids_dict = bids_dicts[key]
#     eeg_type_dict = eeg_dict.categorical_info['type']
#     eeg_cond_dict = eeg_dict.categorical_info['cond_code']
#     bids_value_dict = bids_dict.categorical_info['value']
#     bids_trial_type_dict = bids_dict.categorical_info['trial_type']
#
#     # Check number of values match for cond_code == trial_type
#     for key1, count_eeg in eeg_cond_dict.items():
#         count_orig = bids_trial_type_dict[key1]
#         if count_eeg != count_orig:
#             print(f"EEG key {key} cond_code {count_eeg} != orig trial_type {count_orig}")
#
#     # Check number of values match for cond_code == trial_type
#     for key1, count_eeg in eeg_type_dict.items():
#         count_orig = bids_value_dict[key1]
#         if count_eeg != count_orig:
#             print(f"EEG key {key} type {count_eeg} != orig value {count_orig}")
#

Checking cond_code == trial_type and type == value


### Check EEG relationships

1. EEG  `cond_code` + `event_code` == EEG `type` unless `cond_code` == 0 or
`event_code` == 202
2. EEG `event_code` == 255

In [16]:
# for key, file in eeg_file_dict.items():
#     df_eeg = get_new_dataframe(file)
#     df_eeg.drop(['sample_offset', 'latency', 'urevent', 'usertags'], axis=1, inplace=True)
#     df_eeg['new_col'] = df_eeg['cond_code'].map(str) + df_eeg['event_code'].map(str)
#     code_255_col = df_eeg['event_code'].map(str) == '255'
#     trial_col = df_eeg['cond_code'].map(str) != '0'
#     pause_col = df_eeg['event_code'].map(str) != '202'
#     type_col = df_eeg['type'].map(str) != '202'
#     comp_col = df_eeg['new_col'].map(str) != df_eeg["type"].map(str)
#     x = comp_col & trial_col & pause_col
#     y = (type_col & ~pause_col) | (~type_col & pause_col)
#     print(f"{key}: has {sum(x)} event_code and {sum(y)} 202 type disagreements :")
#     for index, value in x.iteritems():
#         if value:
#             row = df_eeg.loc[index]
#             print(f"Key {key} index {index}: event_code:{row['event_code']} type:{row['type']} cond_code:{row['cond_code']}")
#
#     for index, value in y.iteritems():
#         if value:
#             row = df_eeg.loc[index]
#             print(f"Key {key} index {index}: event_code:{row['event_code']} type:{row['type']}")
#
#     for index, value in code_255_col.iteritems():
#         if value:
#             row = df_eeg.loc[index]
#             print(f"Key {key} index {index}: event_code:{row['event_code']} type:{row['type']}")

sub-001_run-01: has 0 event_code and 0 202 type disagreements :
sub-002_run-01: has 0 event_code and 0 202 type disagreements :
sub-003_run-01: has 0 event_code and 0 202 type disagreements :
sub-004_run-01: has 0 event_code and 0 202 type disagreements :
sub-004_run-02: has 0 event_code and 0 202 type disagreements :
sub-005_run-01: has 0 event_code and 0 202 type disagreements :
sub-006_run-01: has 0 event_code and 0 202 type disagreements :
sub-007_run-01: has 0 event_code and 0 202 type disagreements :
sub-008_run-01: has 0 event_code and 0 202 type disagreements :
sub-009_run-01: has 0 event_code and 0 202 type disagreements :
sub-010_run-01: has 0 event_code and 0 202 type disagreements :
sub-011_run-01: has 0 event_code and 0 202 type disagreements :
sub-012_run-01: has 0 event_code and 0 202 type disagreements :
sub-013_run-01: has 0 event_code and 0 202 type disagreements :
sub-014_run-01: has 0 event_code and 0 202 type disagreements :
sub-015_run-01: has 0 event_code and 0 2

### Verify event codes agree with headIT
They do except for code 255.

Find out which files have a cond_code of 0

In [19]:
# for key, file in eeg_file_dict.items():
#     df_eeg = get_new_dataframe(file)
#     trial_col = df_eeg['cond_code'].map(str) == '0'
#     num_zeros = sum(trial_col)
#     if num_zeros:
#         print(f"{key}: has {sum(trial_col)} cond_code value 0")

sub-020_run-01: has 109 cond_code value 0
sub-021_run-01: has 130 cond_code value 0
sub-022_run-01: has 67 cond_code value 0
sub-031_run-01: has 6067 cond_code value 0


#### Find unique key column combinations

Use the `KeyTemplate` class to find the unique value combinations
of `cond_code` and `event_code` are properly used.

Note:  Codes 1 through 6 should be in nonshifted
       Codes 7 through 14 should be in shift condition

       Codes 199, 201, 202, and 255 are not related to condition.



In [18]:
# from hed.tools.key_template import KeyTemplate
# key_columns = ['cond_code', 'event_code']
# template = KeyTemplate(key_columns)
# for file in event_files_eeg:
#     template.update(file)
# template.resort()
# template.print()

Counts for key [['cond_code', 'event_code']]:
[0, 1]	240
[0, 2]	240
[0, 3]	192
[0, 4]	192
[0, 5]	772
[0, 6]	769
[0, 7]	192
[0, 8]	192
[0, 9]	96
[0, 10]	96
[0, 11]	768
[0, 12]	766
[0, 13]	385
[0, 14]	383
[0, 199]	306
[0, 201]	764
[0, 202]	20
[1, 1]	2411
[1, 2]	2407
[1, 3]	4524
[1, 4]	4524
[1, 5]	18089
[1, 6]	18090
[1, 7]	192
[1, 8]	192
[1, 9]	96
[1, 10]	96
[1, 11]	766
[1, 12]	766
[1, 13]	384
[1, 14]	382
[1, 201]	5075
[1, 202]	190
[2, 1]	2242
[2, 2]	2245
[2, 3]	4484
[2, 4]	4489
[2, 5]	17927
[2, 6]	17923
[2, 8]	2
[2, 12]	3
[2, 201]	4545
[2, 202]	185
[3, 1]	6810
[3, 2]	6809
[3, 3]	96
[3, 4]	96
[3, 5]	383
[3, 6]	385
[3, 7]	9022
[3, 8]	9022
[3, 9]	4504
[3, 10]	4510
[3, 11]	36014
[3, 12]	35989
[3, 13]	18009
[3, 14]	18014
[3, 201]	18644
[3, 202]	533
