In [47]:
import os
import pandas as pd
import shutil

def clean_filename(filename):
    # Remove leading and trailing whitespaces and other non-visible characters
    return filename.strip()

def rename_images_with_csv(directory, csv_file, output_folder):
    # Read the CSV file into a Pandas DataFrame
    df = pd.read_csv(csv_file)

    
    # Convert the 'id' column to strings before creating the rename dictionary
    df['id'] = df['id'].astype(str)

    # Create a dictionary mapping old filenames (cleaned) to new filenames
    rename_dict = dict(zip(df['id'], df['filename']))

    # Iterate through the files in the input directory
    for filename in os.listdir(directory):
        # Get the full path for the source file
        src = os.path.join(directory, filename)

        # Clean the filename (remove leading and trailing whitespaces and non-visible characters)
        filename_without_extension, file_extension = os.path.splitext(clean_filename(filename))


        print(rename_dict)
        if filename_without_extension in rename_dict:
            print("test")
            # Convert the integer value to a string before concatenating
            new_filename = str(rename_dict[filename_without_extension]) + file_extension

            # Get the full path for the destination file in the output folder
            dst = os.path.join(output_folder, new_filename)

            # Create any missing directories in the output folder
            os.makedirs(os.path.dirname(dst), exist_ok=True)

            # Copy the file to the output folder with the new filename
            shutil.copy(src, dst)

def rename_images(directory):
    # List of valid image file extensions (you can add more if needed)
    valid_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp')

    for filename in os.listdir(directory):
        if filename.lower().endswith(valid_extensions):
            # Split the filename at "-a" to get characters before it
            parts = filename.split("-annotation")
            
            # Check if there's a "-a" in the filename and at least one character before it
            if len(parts) > 1 and parts[0]:
                # Get the file extension from the original filename
                file_extension = os.path.splitext(filename)[-1]
                
                # Use the characters before "-a" as the new name
                new_name = parts[0]
                
                # Create the new filename by combining the new name and the file extension
                new_filename = f"{new_name}{file_extension}"
                
                # Get the full paths for the source and destination
                src = os.path.join(directory, filename)
                dst = os.path.join(directory, new_filename)
                
                # Rename the file
                os.rename(src, dst)

def remove_task_prefix(directory):
    # List of valid image file extensions (you can add more if needed)
    valid_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp')

    for filename in os.listdir(directory):
        if filename.lower().endswith(valid_extensions) and filename.startswith('task-'):
            new_filename = filename.replace('task-', '', 1)  # Remove 'task-' prefix once
            src = os.path.join(directory, filename)
            dst = os.path.join(directory, new_filename)
            os.rename(src, dst)

if __name__ == "__main__":
    directory = "dataset/dataset_masks_sept/masks2"  # Change this to the full path of your image directory
    output_folder = "dataset/dataset_masks_sept/masks2-copy"
    csv_file = "dataset/dataset_masks_sept/masks-september-25-fix-id.csv"  # Change this to the full path of your CSV file
    rename_images_with_csv(directory, csv_file, output_folder)
    # rename_images(directory)
    # remove_task_prefix(directory)


{'246': 3, '247': 6, '248': 17, '249': 26, '250': 39, '251': 44, '252': 48, '253': 51, '254': 55, '255': 59, '256': 63, '257': 69, '258': 74, '259': 78, '260': 84, '261': 87, '262': 91, '263': 95, '264': 100, '265': 104, '266': 108, '267': 111, '268': 116, '269': 121, '270': 126, '271': 131, '272': 159, '273': 163, '274': 166, '275': 170, '276': 183, '277': 188, '278': 194, '279': 199, '280': 143, '281': 152, '282': 268, '283': 272, '284': 276, '285': 280, '286': 284, '287': 298, '288': 202, '289': 205, '290': 215, '291': 264, '292': 367, '293': 408, '294': 414, '295': 451, '296': 458, '297': 302, '298': 310, '299': 321, '300': 328, '301': 332, '302': 580, '303': 586, '304': 589, '305': 592, '306': 600, '307': 604, '308': 622, '309': 626, '310': 630, '311': 636, '312': 648, '313': 652, '314': 656, '315': 690, '316': 462, '317': 475, '318': 498, '319': 513, '320': 517, '321': 531, '322': 546, '323': 552, '324': 555, '325': 565, '326': 568, '327': 856, '328': 859, '329': 880, '330': 902,