In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm 

# load extra 53 concepts 
path = "./Animacy/things_concepts.tsv"
df = pd.read_csv(path, sep="\t")

# extract 
image_path = "/projects/archiv/DataStore_Boyanova/ExpAtt_EEG/Image_dataset/Images"
extra_concepts = df.columns[2:].to_numpy()
concept_mat = np.isin(df.iloc[0:-1, 2:].to_numpy(), 1)
defenitions = df["Definition (from WordNet, Google, or Wikipedia)"][:-1].values

In [2]:
REORDER_RULES = {
    "camera": ["camera_lens", "camera1", "camera2"],
    "chicken": ["chicken_wire", "chicken1", "chicken2"],
    "crystal": ["crystal_ball", "crystal1", "crystal2"],
    "hot": ["hot_chocolate", "hot_tub", "hot-air_balloon", "hot-water_bottle"],
    "ice": ["ice_cream", "ice_cube", "ice_pack", "ice-cream_cone"],
    "pepper": ["pepper_mill", "pepper1", "pepper2"],
}

def reorder_categories_inplace(categories_os):
    categories = list(categories_os)  # copy

    for key, desired_order in REORDER_RULES.items():
        # indices where any of the target categories occur
        indices = [i for i, c in enumerate(categories) if c in desired_order]

        if not indices:
            continue

        # keep only those present, in desired order
        reordered = [c for c in desired_order if c in categories]

        # replace in the original span
        for idx, new_cat in zip(sorted(indices), reordered):
            categories[idx] = new_cat

    return categories

In [3]:
categories_os = sorted(os.listdir(image_path))
categories_os = reorder_categories_inplace(categories_os)
categories_os = np.array(categories_os)

an1 = ["mammal", "insect", "farm_animal", "bird"] 
inan1 = [ "plant", "fruit", "vegtable", "fruit"]
inan2 = ["vehicle", "tool", "garden tool", "school supply", "weapon"]
inan3 = ["food"]

In [4]:
animals = []
animals_extra = []
plants = []
objects = []
food = []

for cat_id, cat in enumerate(categories_os):
    cat_concepts = extra_concepts[np.isin(concept_mat[cat_id], 1)]
    
    if any(item in cat_concepts for item in an1):
        animals.append(cat)
        animals_extra.append(cat_concepts[1])
    
    if any(item in cat_concepts for item in inan1):
        plants.append(cat)
        
    if any(item in cat_concepts for item in inan2):
        objects.append(cat)
        
    if any(item in cat_concepts for item in inan3):
        food.append(cat)

In [6]:
def get_category_paths(category_list):
    image_path = "/projects/archiv/DataStore_Boyanova/ExpAtt_EEG/Image_dataset/Images"
    all_cat_imgs = []

    for cat in category_list:
        data_path = os.path.join(image_path, cat)
        files = os.listdir(data_path)
        file_paths = [os.path.join(data_path, f) for f in files]
        all_cat_imgs.extend(file_paths)
        
    return all_cat_imgs

import pickle
def dump_data(data, filename):
    """
    Serializes and saves data to a file using pickle.
    ------
    Args:
        data (any): The data to be serialized and saved.
        filename (str): The path to the file where the data will be saved.

    Returns:
        None
    """
    with open(filename, 'wb') as f:
        pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)

In [9]:
animal_paths = get_category_paths(animals)
plant_paths = get_category_paths(plants)
object_paths = get_category_paths(objects)
food_paths = get_category_paths(food)

categories = {"animals": animals,
              "animal_type": animals_extra, 
              "objects": objects,
              "plants": plants,
              "food": food}

category_paths = {"animals": animal_paths,
                  "objects": object_paths,
                  "plants": plant_paths,
                  "food": food_paths}

dump_data(categories, "./Animacy/categories.pkl")
dump_data(category_paths, "./Animacy/category_paths.pkl")

In [8]:
from pathlib import Path
import shutil

base_dir = Path("/projects/archiv/DataStore_Boyanova/ExpAtt_EEG/Image_dataset")

for category, paths in category_paths.items():
    target_root = base_dir / f"Images_{category}"
    
    for src in paths:
        src_path = Path(src)
        
        # Get subpath after "Images/"
        relative_subpath = src_path.relative_to(base_dir / "Images")
        
        dst_path = target_root / relative_subpath
        
        # Create parent directories if needed
        dst_path.parent.mkdir(parents=True, exist_ok=True)
        
        # Copy file
        shutil.copy2(src_path, dst_path)

In [84]:
category_paths.keys()

dict_keys(['animals', 'objects', 'plants'])

In [88]:
len(category_paths["objects"])

3197