In [7]:
import numpy as np
import pandas as pd
import os

In [2]:
def load_image_labels(metadata_path: str, things_map_path: str):
    """Load image labels from metadata file."""
    print(f"[INFO] Loading image labels from {metadata_path}")
    print(f"[INFO] Loading high-level image labels from {things_map_path}")
    meta = np.load(metadata_path, allow_pickle=True).item()
    things_map = pd.read_csv(things_map_path, delimiter="\t")
    files = meta['train_img_files']
    concepts = meta['train_img_concepts']
    things_concepts = meta['train_img_concepts_THINGS']
    
    # Create a mapping from full path to concept label
    path_to_label = {}
    for things_concept, concept, fname in zip(things_concepts, concepts, files):
        # print(things_concept.split("_")[0])
        # print(things_map.iloc[int(things_concept.split("_")[0]) + 1])
        row = things_map.iloc[int(things_concept.split("_")[0]) - 1]
        high_concept = str(things_map.columns[row == 1][0]) if not (row == 0).all() else 'miscellaneous'
        path_key = os.path.join(concept, fname)
        path_to_label[path_key] = high_concept
        
    return path_to_label

In [5]:
METADATA_PATH = r'/scratch/vjh9526/bdml_2025/project/datasets/THINGS-EEG/image_set/image_metadata.npy'
THINGS_MAP_PATH = r"/scratch/vjh9526/bdml_2025/project/datasets/THINGS-EEG/image_set/category27_top-down.tsv"

In [8]:
maps = load_image_labels(METADATA_PATH, THINGS_MAP_PATH)

[INFO] Loading image labels from /scratch/vjh9526/bdml_2025/project/datasets/THINGS-EEG/image_set/image_metadata.npy
[INFO] Loading high-level image labels from /scratch/vjh9526/bdml_2025/project/datasets/THINGS-EEG/image_set/category27_top-down.tsv


In [10]:
len(maps.keys()), len(maps.values())

(16540, 16540)

In [13]:
from collections import Counter

counter = Counter(list(maps.values()))
print(counter)

Counter({'miscellaneous': 8790, 'animal': 1370, 'food': 1220, 'container': 1190, 'clothing': 800, 'tool': 440, 'fruit': 380, 'plant': 320, 'body part': 310, 'furniture': 270, 'musical instrument': 260, 'toy': 220, 'vegetable': 210, 'vehicle': 190, 'sports equipment': 180, 'weapon': 160, 'dessert': 90, 'kitchen appliance': 90, 'electronic device': 50})


In [14]:
counter['miscellaneous'] * 100 / len(maps.values())

53.14389359129383