## Script checks stim_file
This script creates a dictionary of the `stim_file` files used in the dataset and
eliminates the unused stimuli files.


In [1]:
import os
from hed.tools.io_util import get_file_list, make_file_dict
from hed.tools.data_util import get_new_dataframe
from hed.tools.hed_logger import HedLogger

status = HedLogger()
dataset = "eeg_ds003654s"
bids_root_path =  os.path.join(os.path.dirname(os.path.abspath('')), os.path.join('../datasets/', dataset))
bids_root_path = os.path.abspath(bids_root_path)
status.add(dataset, f"Bids root path: {bids_root_path}", also_print=True)
bids_files = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_events")
file_dict = make_file_dict(bids_files, indices=(0, -2))


status.add(dataset, f"\nBIDS event files: {len(file_dict)}", also_print=True)

total_events = 0
stim_dict = {}
for key, file in file_dict.items():
    df = get_new_dataframe(file)
    for ind, value in df['stim_file'].iteritems():
        stim_dict[value] = True
    total_events = total_events + len(df.index)
status.add(dataset, f"Total events: {total_events} unique stimuli: {len(stim_dict)}", also_print=True)

# Now get the stimuli files and remove those that aren't used:

Bids root path: D:\Research\HED\hed-examples\datasets\eeg_ds003654s

BIDS event files: 6
Total events: 3362 unique stimuli: 346


In [2]:
stimuli_path = os.path.abspath(os.path.join(bids_root_path, 'stimuli'))
status.add(dataset, f"Now removing unused files from {stimuli_path}", also_print=True)

remove_count = 0
stim_files = get_file_list(stimuli_path, extensions=[".bmp"])
for file in stim_files:
    basename = os.path.basename(file)
    if basename not in stim_dict:
        os.remove(file)
        status.add(dataset, f"Removing {basename}: {file}", also_print=True)
        remove_count += 1
status.add(dataset, f"Removed {remove_count} out of {len(stim_files)} files", also_print=True)

status.print_log()

Now removing unused files from D:\Research\HED\hed-examples\datasets\eeg_ds003654s\stimuli
Removing f001.bmp: D:\Research\HED\hed-examples\datasets\eeg_ds003654s\stimuli\f001.bmp
Removing f002.bmp: D:\Research\HED\hed-examples\datasets\eeg_ds003654s\stimuli\f002.bmp
Removing f003.bmp: D:\Research\HED\hed-examples\datasets\eeg_ds003654s\stimuli\f003.bmp
Removing f007.bmp: D:\Research\HED\hed-examples\datasets\eeg_ds003654s\stimuli\f007.bmp
Removing f008.bmp: D:\Research\HED\hed-examples\datasets\eeg_ds003654s\stimuli\f008.bmp
Removing f014.bmp: D:\Research\HED\hed-examples\datasets\eeg_ds003654s\stimuli\f014.bmp
Removing f016.bmp: D:\Research\HED\hed-examples\datasets\eeg_ds003654s\stimuli\f016.bmp
Removing f018.bmp: D:\Research\HED\hed-examples\datasets\eeg_ds003654s\stimuli\f018.bmp
Removing f019.bmp: D:\Research\HED\hed-examples\datasets\eeg_ds003654s\stimuli\f019.bmp
Removing f029.bmp: D:\Research\HED\hed-examples\datasets\eeg_ds003654s\stimuli\f029.bmp
Removing f030.bmp: D:\Researc