In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm 

# load extra 53 concepts 
path = "./Animacy/things_concepts.tsv"
df = pd.read_csv(path, sep="\t")

# extract 
image_path = "/projects/archiv/DataStore_Boyanova/ExpAtt_EEG/Image_dataset/Images"
extra_concepts = df.columns[2:].to_numpy()
concept_mat = np.isin(df.iloc[0:-1, 2:].to_numpy(), 1)
defenitions = df["Definition (from WordNet, Google, or Wikipedia)"][:-1].values

In [4]:
REORDER_RULES = {
    "camera": ["camera_lens", "camera1", "camera2"],
    "chicken": ["chicken_wire", "chicken1", "chicken2"],
    "crystal": ["crystal_ball", "crystal1", "crystal2"],
    "hot": ["hot_chocolate", "hot_tub", "hot-air_balloon", "hot-water_bottle"],
    "ice": ["ice_cream", "ice_cube", "ice_pack", "ice-cream_cone"],
    "pepper": ["pepper_mill", "pepper1", "pepper2"],
}

def reorder_categories_inplace(categories_os):
    categories = list(categories_os)  # copy

    for key, desired_order in REORDER_RULES.items():
        # indices where any of the target categories occur
        indices = [i for i, c in enumerate(categories) if c in desired_order]

        if not indices:
            continue

        # keep only those present, in desired order
        reordered = [c for c in desired_order if c in categories]

        # replace in the original span
        for idx, new_cat in zip(sorted(indices), reordered):
            categories[idx] = new_cat

    return categories

In [7]:
categories_os = sorted(os.listdir(image_path))
categories_os = reorder_categories_inplace(categories_os)
categories_os = np.array(categories_os)

an1 = ["mammal", "farm animal"] 
an2 = ["insect"]
inan1 = [ "plant", "fruit", "vegtable", "fruit"]
inan2 = ["vehicle", "tool", "garden tool", "school supply", "weapon"]

In [18]:
mammals = []
insects = []
plants = []
objects = []

for cat_id, cat in enumerate(categories_os):
    cat_concepts = extra_concepts[np.isin(concept_mat[cat_id], 1)]
    
    if any(item in cat_concepts for item in an1):
        mammals.append(cat)
    
    if any(item in cat_concepts for item in an2):
        insects.append(cat)
    
    if any(item in cat_concepts for item in inan1):
        plants.append(cat)
        
    if any(item in cat_concepts for item in inan2):
        objects.append(cat)

In [31]:
def get_category_paths(category_list):
    image_path = "/projects/archiv/DataStore_Boyanova/ExpAtt_EEG/Image_dataset/Images"
    all_cat_imgs = []

    for cat in category_list:
        data_path = os.path.join(image_path, cat)
        all_cat_imgs.extend(os.listdir(data_path))
        
    return all_cat_imgs

import pickle
def dump_data(data, filename):
    """
    Serializes and saves data to a file using pickle.
    ------
    Args:
        data (any): The data to be serialized and saved.
        filename (str): The path to the file where the data will be saved.

    Returns:
        None
    """
    with open(filename, 'wb') as f:
        pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)

In [32]:
mammal_paths = get_category_paths(mammals)
insect_paths = get_category_paths(insects)
plant_paths = get_category_paths(plants)
object_paths = get_category_paths(objects)

categories = {"mammals": mammals,
              "insects": insects,
              "objects": objects,
              "plants": plants}

category_paths = {"mammals": mammal_paths,
                  "insects": insect_paths,
                  "objects": object_paths,
                  "plants": plant_paths}

dump_data(categories, "./Animacy/categories.pkl")
dump_data(category_paths, "./Animacy/category_paths.pkl")