----

In [1]:
import os
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

def process_class(class_id, class_name, dataset_folder, set_):
    class_folder = os.path.join(dataset_folder, class_name)
    if os.path.isdir(class_folder):
        images = os.listdir(class_folder)
        image_paths = []
        labels = []

        for image_name in images:
            image_relative_path = os.path.join(set_, class_name, image_name)
            image_paths.append(image_relative_path)
            labels.append(str(class_id))

        return image_paths, labels
    else:
        return [], []

def main():
    root_dir = '/home/vinicius-cin/Datasets/DF1_16c_classification_filtered_v1'

    # Read class names and ids
    with open(os.path.join(root_dir, 'classes.names'), mode="r") as f:
        class_names = f.read().splitlines()
        classes = dict(enumerate(class_names))
    
    with ThreadPoolExecutor(max_workers=16) as executor:  # Adjust max_workers as needed
        for set_ in ['train', 'test', 'val']:
            dataset_folder = os.path.join(root_dir, set_)
            results = list(tqdm(executor.map(process_class, classes.keys(), classes.values(),
                                              [dataset_folder] * len(classes), [set_] * len(classes)),
                                total=len(classes)))

            # Flatten the results
            image_paths = [path for paths, _ in results for path in paths]
            labels = [label for _, labels in results for label in labels]

            # Create a DataFrame from the lists
            df = pd.DataFrame({"image_relative_path": image_paths, "label": labels})

            # Save the DataFrame to a CSV file
            save_dir = os.path.join(root_dir, set_, f"{set_}.csv")
            df.to_csv(save_dir, index=False)
            print(f'CSV saved to {save_dir}')

if __name__ == "__main__":
    main()

100%|███████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 85.10it/s]


CSV saved to /home/vinicius-cin/Datasets/DF1_16c_classification_filtered_v1/train/train.csv


100%|█████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 1690.02it/s]


CSV saved to /home/vinicius-cin/Datasets/DF1_16c_classification_filtered_v1/test/test.csv


100%|█████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 1611.45it/s]

CSV saved to /home/vinicius-cin/Datasets/DF1_16c_classification_filtered_v1/val/val.csv



