## Summarize and check events.

This script summarizes the event structures in the W-H-MEEG dataset
and checks the remapping.  This scripts works on `*_events_temp2.tsv`.


In [5]:
from hed.tools.io_utils import get_file_list, make_file_dict
from hed.tools.data_utils import get_new_dataframe
from hed.tools.hed_logger import HedLogger

bids_root_path = 'G:/WH_working2'
map_path = '../../../data/wakeman_henson_data/wh_map.tsv'
bids_files = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_events_temp2")
file_dict = make_file_dict(bids_files, indices=(0, -3))
final_order = ['onset', 'duration', 'sample', 'event_type', 'face_type', 'rep_status',
               'rep_lag', 'trial', 'value', 'stim_file']
skip_columns = ['onset', 'duration', 'sample']

status = HedLogger()

def stringify_columns(df, columns=None):
    all_columns = list(df.columns)
    if not columns:
        columns = all_columns

    for col in columns:
        if col in all_columns:
            df[col] = df[col].astype(str)

def get_missing_columns(df, columns):
    df_columns = list(df.columns)
    missing_cols = []
    for col in columns:
        if col not in df_columns:
            missing_cols.append(col)
    return missing_cols


# Create the dictionary
map_dict = {}
map_df = get_new_dataframe(map_path)
stringify_columns(map_df)

for ind, row in map_df.iterrows():
    val = row['value']
    if  val in map_dict:
        status.add('map_dict', f"ERROR {val} duplicated in map dictionary", also_print=True)
    else:
        map_dict[val] = ind

# Check the consistency
map_cols = list(map_df.columns)

for key, file in file_dict.items():
    df = get_new_dataframe(file)
    stringify_columns(df, columns=map_cols)
    for ind, row in df.iterrows():
        value =  row['value']
        if value not in map_dict:
            status.add(key, f"ERROR {key} has invalid value {value} in row {ind}", also_print=True)
        elif row['event_type'] != map_df.loc[map_dict[value], 'event_type']:
            status.add(key, f"ERROR {key} has invalid event_type {row['event_type']} in row {ind}", also_print=True)
        elif row['face_type'] != map_df.loc[map_dict[value], 'face_type']:
            status.add(key, f"ERROR {key} has invalid face_type {row['face_type']} in row {ind}", also_print=True)
        elif row['rep_status'] != map_df.loc[map_dict[value], 'rep_status']:
            status.add(key, f"ERROR {key} has invalid frep_status {row['rep_status']} in row {ind}", also_print=True)
        else:
            continue

In [7]:
status.print_log()

from hed.tools.map_utils import make_combined_dicts

print('\nBIDS events summary:')
bids_dicts_all, bids_dicts =  make_combined_dicts(file_dict, skip_cols=skip_columns)
bids_dicts_all.print()


BIDS events summary:
Summary for column dictionary :
  Categorical columns (7):
    event_type (9 distinct values):
      double_press: 14
      left_press: 6911
      right_press: 8507
      setup_left_sym: 48
      setup_right_sym: 60
      show_circle: 15929
      show_cross: 15922
      show_face: 15821
      show_face_initial: 108
    face_type (4 distinct values):
      famous_face: 5304
      n/a: 47391
      scrambled_face: 5305
      unfamiliar_face: 5320
    rep_lag (13 distinct values):
      1: 3888
      10: 819
      11: 913
      12: 751
      13: 444
      14: 171
      15: 52
      16: 3
      6: 10
      7: 69
      8: 224
      9: 485
      n/a: 55491
    rep_status (4 distinct values):
      delayed_repeat: 3941
      first_show: 8100
      immediate_repeat: 3888
      n/a: 47391
    stim_file (453 distinct values):
      circle.bmp: 15929
      cross.bmp: 15922
      f001.bmp: 34
      f002.bmp: 36
      f003.bmp: 36
      f004.bmp: 34
      f005.bmp: 33
      f00