# Various functions for formatting the data, and other relevant data formatting methods

Extract images from the dataset and copy it to the new folder

Used to run CSEC image correction.

In [5]:
import os
import shutil

input_folder = "/home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC"  # Replace with the path to your folder of folders
output_folder = "/home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC_extracted/"  # Replace with the path to your new folder


# If no valid extensions are provided, default to common image extensions
valid_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.JPG']

# Make sure the output folder exists
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Walk through the input folder and process each subfolder
for root, dirs, files in os.walk(input_folder):
    for file in files:
        if any(file.lower().endswith(ext) for ext in valid_extensions):
            # Construct the full file path
            file_path = os.path.join(root, file)
            
            # Construct the destination path
            dest_path = os.path.join(output_folder, file)
            
            # Avoid overwriting files if they already exist by renaming
            counter = 1
            while os.path.exists(dest_path):
                name, ext = os.path.splitext(file)
                dest_path = os.path.join(output_folder, f"{name}_{counter}{ext}")
                counter += 1
            
            # Copy the file to the new directory
            shutil.copy(file_path, dest_path)
            print(f"Copied: {file_path} -> {dest_path}")



Copied: /home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC/subject_18/PXL_20220922_182645489.jpg -> /home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC_extracted/PXL_20220922_182645489.jpg
Copied: /home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC/subject_18/PXL_20220922_183318443.jpg -> /home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC_extracted/PXL_20220922_183318443.jpg
Copied: /home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC/subject_18/PXL_20220922_183322816.jpg -> /home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC_extracted/PXL_20220922_183322816.jpg
Copied: /home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC/subject_18/PXL_20220922_182710449.jpg -> /home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC_extracted/PXL_20220922_182710449.jpg
Copied: /home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC/subject_18/PXL_20220922_182722342.jpg -> /home/dasec-notebook/Thesis/Datasets/mst-e_data/FIQA_CSEC_extracted/PXL_20220922_1827

Recreate the folder stucture 

In [9]:
import os
import shutil

# Paths to the directories
# extracted_folder = "/home/dasec-notebook/Thesis/Datasets/CHROMA-FIT-Dataset/old/CSEC_extracted/"  # Directory with extracted files
# original_folder = "/home/dasec-notebook/Thesis/Datasets/CHROMA-FIT-Dataset/DATA_CROPPED_PORTRAIT2/"  # Original folder with folder structure
# output_folder = "/home/dasec-notebook/Thesis/Datasets/CHROMA-FIT-Dataset/CSEC/"  # New folder to add extracted files back

extracted_folder = "/home/dasec-notebook/Thesis/Datasets/mst-e_data/old/CSEC_extracted/"  # Directory with extracted files
original_folder = "/home/dasec-notebook/Thesis/Datasets/mst-e_data/mst-e_data_portrait/"  # Original folder with folder structure
output_folder = "/home/dasec-notebook/Thesis/Datasets/mst-e_data/CSEC/"  # New folder to add extracted files back


valid_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.JPG']

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Walk through the original folder to replicate its structure in the output folder
for root, dirs, files in os.walk(original_folder):
    for dir_name in dirs:
        original_dir_path = os.path.join(root, dir_name)
        relative_path = os.path.relpath(original_dir_path, original_folder)
        new_dir_path = os.path.join(output_folder, relative_path)

        # Create the same directory structure in the output folder
        if not os.path.exists(new_dir_path):
            os.makedirs(new_dir_path)

# Add extracted images to their corresponding folders
for file_name in os.listdir(extracted_folder):
    if any(file_name.lower().endswith(ext) for ext in valid_extensions):
        found = False
        for root, dirs, files in os.walk(original_folder):
            if file_name in files:
                # Construct relative path and new destination
                relative_path = os.path.relpath(root, original_folder)
                dest_dir = os.path.join(output_folder, relative_path)
                dest_path = os.path.join(dest_dir, file_name)

                # Copy the file to the corresponding folder
                shutil.copy(os.path.join(extracted_folder, file_name), dest_path)
                # print(f"Copied: {file_name} -> {dest_path}")
                found = True
                break

        if not found:
            print(f"Warning: {file_name} not found in original folder structure. Skipping.")


Compare 2 directories

In [7]:
import os

def get_files_with_relative_paths(root_folder):
    """
    Walk through the folder structure and return a set of all files with their relative paths.
    """
    file_set = set()
    for root, _, files in os.walk(root_folder):
        for file in files:
            # Get the relative path of each file
            relative_path = os.path.relpath(os.path.join(root, file), root_folder)
            file_set.add(relative_path)
    return file_set

def compare_directories(root1, root2):
    """
    Compare two root directories and print out the differences.
    """
    # Get all files with relative paths for both directories
    files1 = get_files_with_relative_paths(root1)
    files2 = get_files_with_relative_paths(root2)

    # Find differences
    only_in_root1 = files1 - files2
    only_in_root2 = files2 - files1

    # Print results
    if only_in_root1:
        print(f"Files only in {root1}:")
        for file in sorted(only_in_root1):
            print(f"  {file}")

    if only_in_root2:
        print(f"Files only in {root2}:")
        for file in sorted(only_in_root2):
            print(f"  {file}")

    if not only_in_root1 and not only_in_root2:
        print("The directories contain the same files.")

# Example usage:
root_folder_1 = "/home/dasec-notebook/Thesis/Datasets/CHROMA-FIT-Dataset/DATA_CROPPED_PORTRAIT2"  # Replace with the path to the first root folder
root_folder_2 = "/home/dasec-notebook/Thesis/Datasets/CHROMA-FIT-Dataset/CSEC"  # Replace with the path to the second root folder

compare_directories(root_folder_1, root_folder_2)


The directories contain the same files.
