#### Placing different image resolutions into respective folders

In [None]:
import os
import shutil
from PIL import Image

# Define the source directory
source_dir = 'mars_images'

# Define valid image extensions (you can add more if needed)
valid_extensions = {'.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff'}

def get_unique_filename(dest_dir, filename):
    """
    Generate a unique filename by appending a counter suffix if needed.
    This ensures we don't overwrite files that already exist in the target directory.
    """
    base, ext = os.path.splitext(filename)
    counter = 1
    unique_filename = filename
    while os.path.exists(os.path.join(dest_dir, unique_filename)):
        unique_filename = f"{base}_{counter}{ext}"
        counter += 1
    return unique_filename

# Ensure the source directory exists
if not os.path.exists(source_dir):
    print(f"Directory '{source_dir}' does not exist.")
else:
    # Only process files that are directly in mars_images
    for item in os.listdir(source_dir):
        item_path = os.path.join(source_dir, item)
        if os.path.isfile(item_path):
            ext = os.path.splitext(item)[1].lower()
            if ext in valid_extensions:
                try:
                    # Open the image to determine its resolution
                    with Image.open(item_path) as img:
                        width, height = img.size

                    resolution_folder = f"{width}x{height}"
                    target_dir = os.path.join(source_dir, resolution_folder)
                    
                    # Create the resolution folder if it doesn't exist; do not remove existing content
                    os.makedirs(target_dir, exist_ok=True)
                    
                    # Ensure the filename is unique in the target directory
                    unique_filename = get_unique_filename(target_dir, item)
                    target_path = os.path.join(target_dir, unique_filename)
                    
                    # Move (place) the image file into the appropriate resolution folder
                    shutil.move(item_path, target_path)
                    print(f"Moved '{item}' to '{target_dir}'")
                except Exception as e:
                    print(f"Error processing file '{item}': {e}")
            else:
                print(f"Skipping '{item}': Not a supported image file.")


#### Remove duplicate images

In [None]:
import os
import hashlib

def compute_file_hash(filepath, chunk_size=8192):
    """
    Compute the MD5 hash of the given file.
    """
    hash_md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

# Define the base directory where subfolders of images exist.
base_dir = 'mars_images'

# Iterate over each subfolder in the base directory.
for folder in os.listdir(base_dir):
    folder_path = os.path.join(base_dir, folder)
    if os.path.isdir(folder_path):
        print(f"\nProcessing folder: {folder_path}")
        hashes = {}
        # Process each file in the subfolder.
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            if os.path.isfile(file_path):
                try:
                    file_hash = compute_file_hash(file_path)
                except Exception as e:
                    print(f"Error computing hash for {file_path}: {e}")
                    continue

                # If the hash already exists, it's a duplicate.
                if file_hash in hashes:
                    print(f"Duplicate found: '{file_path}' is identical to '{hashes[file_hash]}'. Deleting '{file_path}'.")
                    os.remove(file_path)
                else:
                    hashes[file_hash] = file_path

print("\nDuplicate removal complete.")


#### Move grey and colored images into a sub-directory

In [2]:
import os
import shutil

# Define the source directory containing the resolution folders
source_dir = 'mars_images'

# Define valid image extensions (you can add more if needed)
valid_extensions = {'.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff'}

def get_unique_filename(dest_dir, filename):
    """
    Generate a unique filename by appending a counter suffix if needed.
    This ensures that files moved back to the resolution folder don't overwrite each other.
    """
    base, ext = os.path.splitext(filename)
    counter = 1
    unique_filename = filename
    while os.path.exists(os.path.join(dest_dir, unique_filename)):
        unique_filename = f"{base}_{counter}{ext}"
        counter += 1
    return unique_filename

if not os.path.exists(source_dir):
    print(f"Directory '{source_dir}' does not exist.")
else:
    # Loop through each resolution folder (e.g., "640x480")
    for res_folder in os.listdir(source_dir):
        res_folder_path = os.path.join(source_dir, res_folder)
        # Assume resolution folders contain an 'x' in their name
        if os.path.isdir(res_folder_path) and 'x' in res_folder:
            for subfolder in ['grey', 'colored']:
                subfolder_path = os.path.join(res_folder_path, subfolder)
                # Check if the subdirectory exists
                if os.path.isdir(subfolder_path):
                    # Process each file in the subdirectory
                    for item in os.listdir(subfolder_path):
                        item_path = os.path.join(subfolder_path, item)
                        if os.path.isfile(item_path):
                            ext = os.path.splitext(item)[1].lower()
                            if ext in valid_extensions:
                                try:
                                    # Generate a unique filename in the resolution folder if needed
                                    unique_filename = get_unique_filename(res_folder_path, item)
                                    dest_path = os.path.join(res_folder_path, unique_filename)
                                    
                                    # Move the image back to the resolution folder
                                    shutil.move(item_path, dest_path)
                                    print(f"Moved '{item}' from '{subfolder}' to '{res_folder}'")
                                except Exception as e:
                                    print(f"Error moving file '{item}' from '{subfolder}': {e}")
                    # Remove the subdirectory if it's now empty
                    if not os.listdir(subfolder_path):
                        os.rmdir(subfolder_path)
                        print(f"Removed empty folder '{subfolder_path}'")


Removed empty folder 'mars_images/1200x240/grey'
Moved 'NLG_0367_0699529954_409ECM_N0110108NCAM00529_00_2I4J02_1200.jpg' from 'colored' to '1200x240'
Moved 'NLG_0367_0699529958_693ECM_N0110108NCAM00529_00_2I4J02_1200.jpg' from 'colored' to '1200x240'
Moved 'NLG_0367_0699529962_691ECM_N0110108NCAM00529_00_2I4J02_1200.jpg' from 'colored' to '1200x240'
Moved 'NLG_0367_0699529966_895ECM_N0110108NCAM00529_00_2I4J02_1200.jpg' from 'colored' to '1200x240'
Moved 'NLG_0367_0699529971_659ECM_N0110108NCAM00529_00_2I4J02_1200.jpg' from 'colored' to '1200x240'
Moved 'NLG_0367_0699529975_774ECM_N0110108NCAM00529_00_2I4J02_1200.jpg' from 'colored' to '1200x240'
Moved 'NLG_0368_0699607874_100ECM_N0110108NCAM00520_00_2I4J01_1200.jpg' from 'colored' to '1200x240'
Moved 'NLG_0368_0699607878_675ECM_N0110108NCAM00520_00_2I4J01_1200.jpg' from 'colored' to '1200x240'
Moved 'NLG_0368_0699607882_675ECM_N0110108NCAM00520_00_2I4J01_1200.jpg' from 'colored' to '1200x240'
Moved 'NLG_0368_0699607886_729ECM_N0110108