In [None]:
import os
import shutil
import pandas as pd

Organize the images into different subfolders based on their labels

In [None]:
import os
import shutil
import pandas as pd
from tqdm import tqdm

def organize_images_by_label(csv_path, image_folder, output_folder, id_column="file_name", label_column="label", image_extension=".png"):
    """
    Organizes images into subfolders by label based on a CSV file.

    Args:
        csv_path: Path to the CSV file containing image IDs and labels
        image_folder: Path to the folder with all image files
        output_folder: Where to create subfolders and copy the images
        id_column: Column in the CSV with image IDs (without extension)
        label_column: Column with label names
        image_extension: Extension of image files (e.g. '.png', '.jpg')
    """

    # Load the CSV
    df = pd.read_csv(csv_path)
    df[id_column] = df[id_column].astype(str)

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Iterate through the rows
    for _, row in tqdm(df.iterrows(), total=len(df)):
        image_id = row[id_column]
        label = row[label_column]

        src_path = os.path.join(image_folder, image_id + image_extension)
        label_folder = os.path.join(output_folder, str(label))
        dst_path = os.path.join(label_folder, image_id + image_extension)

        # Create label folder if it doesn't exist
        os.makedirs(label_folder, exist_ok=True)

        # Copy image if it exists
        if os.path.exists(src_path):
            shutil.copy2(src_path, dst_path)
        else:
            print(f"[⚠️] Missing image: {src_path}")

    print(f"\n✅ Done organizing images by label into: {output_folder}")

# Example usage
if __name__ == "__main__":
    csv_path = "C:/Users/Christian/Desktop/N1_data/clean_data.csv"
    image_folder = "C:/Users/Christian/Desktop/N1_data/image_data"
    output_folder = "C:/Users/Christian/Desktop/N1_data/sorted_images"

    organize_images_by_label(csv_path, image_folder, output_folder)