In [None]:
from google.colab import drive

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import shutil

def organize_dataset(image_folder, sketch_folder, output_folder, batch_size=1000, max_images=50000):
    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # List all files in the image folder
    all_files = [file for file in os.listdir(image_folder) if file.lower().endswith(('.png', '.jpg', '.jpeg'))]

    # Sort the files to ensure they are in numerical order
    all_files.sort(key=lambda x: int(os.path.splitext(x)[0]))

    processed_images = 0

    # Iterate through files in batches
    for start_index in range(0, min(max_images, len(all_files)), batch_size):
        end_index = min(start_index + batch_size, len(all_files))
        image_files = all_files[start_index:end_index]

        # Iterate through each file in the batch
        for image_file in image_files:
            # Construct the corresponding sketch file name
            sketch_file = image_file.replace('.', '_sketch.')

            # Check if the sketch file exists
            if os.path.exists(os.path.join(sketch_folder, sketch_file)):
                # Copy the image and sketch to the output folder
                image_path = os.path.join(image_folder, image_file)
                sketch_path = os.path.join(sketch_folder, sketch_file)

                # Create a folder for each sample
                sample_folder = os.path.join(output_folder, os.path.splitext(image_file)[0])
                os.makedirs(sample_folder, exist_ok=True)

                # Copy the image and sketch to the sample folder
                shutil.copyfile(image_path, os.path.join(sample_folder, 'image.png'))
                shutil.copyfile(sketch_path, os.path.join(sample_folder, 'sketch.png'))

                processed_images += 1

                # Print progress information
                print(f"Processed image {processed_images}/{min(max_images, len(all_files))}: {image_file}")

                # Break the loop if the maximum number of images is reached
                if processed_images == max_images:
                    break

    print("Processing complete.")

if __name__ == "__main__":
    # Replace these paths with the paths to your image and sketch folders
    image_folder_path = "/content/drive/MyDrive/hd7images"
    sketch_folder_path = "/content/drive/MyDrive/hd7_sketches"

    # Replace this path with the desired output folder for the organized dataset
    output_folder_path = "/content/drive/MyDrive/hd7_train"

    # Set the maximum number of images to process
    max_images_to_process = 65000

    organize_dataset(image_folder_path, sketch_folder_path, output_folder_path, max_images=max_images_to_process)


Processed image 1/65000: 0.png
Processed image 2/65000: 1.png
Processed image 3/65000: 2.png
Processed image 4/65000: 3.png
Processed image 5/65000: 4.png
Processed image 6/65000: 5.png
Processed image 7/65000: 6.png
Processed image 8/65000: 7.png
Processed image 9/65000: 8.png
Processed image 10/65000: 9.png
Processed image 11/65000: 10.png
Processed image 12/65000: 11.png
Processed image 13/65000: 12.png
Processed image 14/65000: 13.png
Processed image 15/65000: 14.png
Processed image 16/65000: 15.png
Processed image 17/65000: 16.png
Processed image 18/65000: 17.png
Processed image 19/65000: 18.png
Processed image 20/65000: 19.png
Processed image 21/65000: 20.png
Processed image 22/65000: 21.png
Processed image 23/65000: 22.png
Processed image 24/65000: 23.png
Processed image 25/65000: 24.png
Processed image 26/65000: 25.png
Processed image 27/65000: 26.png
Processed image 28/65000: 27.png
Processed image 29/65000: 28.png
Processed image 30/65000: 30.png
Processed image 31/65000: 31.

KeyboardInterrupt: 

In [None]:
import os
import shutil

def save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images):
    with open(checkpoint_file, 'w') as file:
        file.write(f"{image_folder}\n")
        file.write(f"{sketch_folder}\n")
        file.write(f"{output_folder}\n")
        file.write(f"{processed_images}\n")

def load_checkpoint(checkpoint_file):
    try:
        with open(checkpoint_file, 'r') as file:
            lines = file.readlines()
            if len(lines) == 4:
                image_folder = lines[0].strip()
                sketch_folder = lines[1].strip()
                output_folder = lines[2].strip()
                processed_images = int(lines[3])
                return image_folder, sketch_folder, output_folder, processed_images
            else:
                print("Checkpoint file is not formatted correctly. Creating a new checkpoint.")
                return None
    except FileNotFoundError:
        return None

def organize_dataset(image_folder, sketch_folder, output_folder, batch_size=1000, max_images=50000, checkpoint_file="checkpoint.txt"):
    # Load the checkpoint if it exists
    checkpoint = load_checkpoint(checkpoint_file)

    if checkpoint:
        image_folder, sketch_folder, output_folder, processed_images = checkpoint
        print(f"Resuming from checkpoint. Processed {processed_images} images.")
    else:
        processed_images = 0

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # List all files in the image folder
    all_files = [file for file in os.listdir(image_folder) if file.lower().endswith(('.png', '.jpg', '.jpeg'))]

    # Sort the files to ensure they are in numerical order
    all_files.sort(key=lambda x: int(os.path.splitext(x)[0]))

    # Iterate through files in batches
    for start_index in range(0, min(max_images, len(all_files)), batch_size):
        end_index = min(start_index + batch_size, len(all_files))
        image_files = all_files[start_index:end_index]

        # Iterate through each file in the batch
        for image_file in image_files:
            # Construct the corresponding sketch file name
            sketch_file = image_file.replace('.', '_sketch.')

            # Check if the sketch file exists
            if os.path.exists(os.path.join(sketch_folder, sketch_file)):
                # Copy the image and sketch to the output folder
                image_path = os.path.join(image_folder, image_file)
                sketch_path = os.path.join(sketch_folder, sketch_file)

                # Create a folder for each sample
                sample_folder = os.path.join(output_folder, os.path.splitext(image_file)[0])
                os.makedirs(sample_folder, exist_ok=True)

                # Copy the image and sketch to the sample folder
                shutil.copyfile(image_path, os.path.join(sample_folder, 'image.png'))
                shutil.copyfile(sketch_path, os.path.join(sample_folder, 'sketch.png'))

                processed_images += 1

                # Print progress information
                print(f"Processed image {processed_images}/{min(max_images, len(all_files))}: {image_file}")

                # Save the checkpoint after processing each image
                save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images)

                # Break the loop if the maximum number of images is reached
                if processed_images == max_images:
                    break

    print("Processing complete.")

if __name__ == "__main__":
    # Replace these paths with the paths to your image and sketch folders
    image_folder_path = "/content/drive/MyDrive/hd7images"
    sketch_folder_path = "/content/drive/MyDrive/hd7_sketches"

    # Replace this path with the desired output folder for the organized dataset
    output_folder_path = "/content/drive/MyDrive/hd7_train"

    # Set the maximum number of images to process
    max_images_to_process = 65000

    # Set the checkpoint file path (customize this according to your preference)
    checkpoint_file_path = "/content/drive/MyDrive/checkpointtrain.txt"

    organize_dataset(image_folder_path, sketch_folder_path, output_folder_path, max_images=max_images_to_process, checkpoint_file=checkpoint_file_path)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processed image 30782/65000: 5203.png
Processed image 30783/65000: 5204.png
Processed image 30784/65000: 5205.png
Processed image 30785/65000: 5206.png
Processed image 30786/65000: 5207.png
Processed image 30787/65000: 5208.png
Processed image 30788/65000: 5209.png
Processed image 30789/65000: 5210.png
Processed image 30790/65000: 5211.png
Processed image 30791/65000: 5212.png
Processed image 30792/65000: 5213.png
Processed image 30793/65000: 5214.png
Processed image 30794/65000: 5215.png
Processed image 30795/65000: 5216.png
Processed image 30796/65000: 5217.png
Processed image 30797/65000: 5218.png
Processed image 30798/65000: 5219.png
Processed image 30799/65000: 5220.png
Processed image 30800/65000: 5221.png
Processed image 30801/65000: 5222.png
Processed image 30802/65000: 5223.png
Processed image 30803/65000: 5224.png
Processed image 30804/65000: 5225.png
Processed image 30805/65000: 5226.png
Processed image 30806/6

OSError: [Errno 107] Transport endpoint is not connected

In [None]:
import os
import shutil

def save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images, processed_filenames):
    with open(checkpoint_file, 'w') as file:
        file.write(f"{image_folder}\n")
        file.write(f"{sketch_folder}\n")
        file.write(f"{output_folder}\n")
        file.write(f"{processed_images}\n")
        for filename in processed_filenames:
            file.write(f"{filename}\n")

def load_checkpoint(checkpoint_file):
    try:
        with open(checkpoint_file, 'r') as file:
            lines = file.readlines()
            if len(lines) >= 4:
                image_folder = lines[0].strip()
                sketch_folder = lines[1].strip()
                output_folder = lines[2].strip()
                processed_images = int(lines[3])
                processed_filenames = [filename.strip() for filename in lines[4:]]
                return image_folder, sketch_folder, output_folder, processed_images, processed_filenames
            else:
                print("Checkpoint file is not formatted correctly. Creating a new checkpoint.")
                return None
    except FileNotFoundError:
        return None

def organize_dataset(image_folder, sketch_folder, output_folder, batch_size=1000, max_images=50000, checkpoint_file="checkpoint.txt"):
    # Load the checkpoint if it exists
    checkpoint = load_checkpoint(checkpoint_file)

    if checkpoint:
        image_folder, sketch_folder, output_folder, processed_images, processed_filenames = checkpoint
        print(f"Resuming from checkpoint. Processed {processed_images} images.")
    else:
        processed_images = 0
        processed_filenames = []

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # List all files in the image folder
    all_files = [file for file in os.listdir(image_folder) if file.lower().endswith(('.png', '.jpg', '.jpeg'))]

    # Sort the files to ensure they are in numerical order
    all_files.sort(key=lambda x: int(os.path.splitext(x)[0]))

    # Iterate through files in batches
    for start_index in range(0, min(max_images, len(all_files)), batch_size):
        end_index = min(start_index + batch_size, len(all_files))
        image_files = all_files[start_index:end_index]

        # Iterate through each file in the batch
        for image_file in image_files:
            # Check if the image has already been processed
            if image_file in processed_filenames:
                print(f"Skipping image {image_file} as it has already been processed.")
                continue

            # Construct the corresponding sketch file name
            sketch_file = image_file.replace('.', '_sketch.')

            # Check if the sketch file exists
            if os.path.exists(os.path.join(sketch_folder, sketch_file)):
                # Copy the image and sketch to the output folder
                image_path = os.path.join(image_folder, image_file)
                sketch_path = os.path.join(sketch_folder, sketch_file)

                # Create a folder for each sample
                sample_folder = os.path.join(output_folder, os.path.splitext(image_file)[0])
                os.makedirs(sample_folder, exist_ok=True)

                # Copy the image and sketch to the sample folder
                shutil.copyfile(image_path, os.path.join(sample_folder, 'image.png'))
                shutil.copyfile(sketch_path, os.path.join(sample_folder, 'sketch.png'))

                processed_images += 1
                processed_filenames.append(image_file)

                # Print progress information
                print(f"Processed image {processed_images}/{min(max_images, len(all_files))}: {image_file}")

                # Save the checkpoint after processing each image
                save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images, processed_filenames)

                # Break the loop if the maximum number of images is reached
                if processed_images == max_images:
                    break

            else:
                print(f"Skipping image {image_file} as the corresponding sketch file does not exist.")

    print("Processing complete.")

if __name__ == "__main__":
    # Replace these paths with the paths to your image and sketch folders
    image_folder_path = "/content/drive/MyDrive/hd7images"
    sketch_folder_path = "/content/drive/MyDrive/hd7_sketches"

    # Replace this path with the desired output folder for the organized dataset
    output_folder_path = "/content/drive/MyDrive/hd7_train"

    # Set the maximum number of images to process
    max_images_to_process = 65000

    # Set the checkpoint file path (customize this according to your preference)
    checkpoint_file_path = "/content/drive/MyDrive/checkpointtrain.txt"

    organize_dataset(image_folder_path, sketch_folder_path, output_folder_path, max_images=max_images_to_process, checkpoint_file=checkpoint_file_path)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processed image 26444/65000: 867.png
Processed image 26445/65000: 868.png
Processed image 26446/65000: 869.png
Processed image 26447/65000: 870.png
Processed image 26448/65000: 871.png
Processed image 26449/65000: 872.png
Processed image 26450/65000: 873.png
Processed image 26451/65000: 874.png
Processed image 26452/65000: 875.png
Processed image 26453/65000: 876.png
Processed image 26454/65000: 877.png
Processed image 26455/65000: 878.png
Processed image 26456/65000: 879.png
Processed image 26457/65000: 880.png
Processed image 26458/65000: 881.png
Processed image 26459/65000: 882.png
Processed image 26460/65000: 883.png
Processed image 26461/65000: 884.png
Processed image 26462/65000: 885.png
Processed image 26463/65000: 886.png
Processed image 26464/65000: 887.png
Processed image 26465/65000: 888.png
Processed image 26466/65000: 889.png
Processed image 26467/65000: 890.png
Processed image 26468/65000: 891.png
Processed 

KeyboardInterrupt: 

In [None]:
#      TRAINNNNNN
import os
import shutil

def save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images, processed_filenames):
    with open(checkpoint_file, 'w') as file:
        file.write(f"{image_folder}\n")
        file.write(f"{sketch_folder}\n")
        file.write(f"{output_folder}\n")
        file.write(f"{processed_images}\n")
        for filename in processed_filenames:
            file.write(f"{filename}\n")

def load_checkpoint(checkpoint_file):
    try:
        with open(checkpoint_file, 'r') as file:
            lines = file.readlines()
            if len(lines) >= 4:
                image_folder = lines[0].strip()
                sketch_folder = lines[1].strip()
                output_folder = lines[2].strip()
                processed_images = int(lines[3])
                processed_filenames = [filename.strip() for filename in lines[4:]]
                return image_folder, sketch_folder, output_folder, processed_images, processed_filenames
            else:
                print("Checkpoint file is not formatted correctly. Creating a new checkpoint.")
                return None
    except FileNotFoundError:
        return None

def organize_dataset(image_folder, sketch_folder, output_folder, batch_size=1000, max_images=50000, checkpoint_file="checkpoint.txt"):
    # Load the checkpoint if it exists
    checkpoint = load_checkpoint(checkpoint_file)

    if checkpoint:
        image_folder, sketch_folder, output_folder, processed_images, processed_filenames = checkpoint
        print(f"Resuming from checkpoint. Processed {processed_images} images.")
    else:
        processed_images = 0
        processed_filenames = []

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # List all files in the image folder
    all_files = [file for file in os.listdir(image_folder) if file.lower().endswith(('.png', '.jpg', '.jpeg'))]

    # Sort the files to ensure they are in numerical order
    all_files.sort(key=lambda x: int(os.path.splitext(x)[0]))

    # Find the index to resume from
    resume_index = all_files.index(f'{processed_images}.png') if f'{processed_images}.png' in all_files else 0

    # Iterate through files in batches starting from the resume index
    for start_index in range(resume_index, min(max_images, len(all_files)), batch_size):
        end_index = min(start_index + batch_size, len(all_files))
        image_files = all_files[start_index:end_index]

        # Iterate through each file in the batch
        for image_file in image_files:
            # Check if the image has already been processed
            if image_file in processed_filenames:
                print(f"Skipping image {image_file} as it has already been processed.")
                continue

            # Construct the corresponding sketch file name
            sketch_file = image_file.replace('.', '_sketch.')

            # Check if the sketch file exists
            if os.path.exists(os.path.join(sketch_folder, sketch_file)):
                # Copy the image and sketch to the output folder
                image_path = os.path.join(image_folder, image_file)
                sketch_path = os.path.join(sketch_folder, sketch_file)

                # Create a folder for each sample
                sample_folder = os.path.join(output_folder, os.path.splitext(image_file)[0])
                os.makedirs(sample_folder, exist_ok=True)

                # Copy the image and sketch to the sample folder
                shutil.copyfile(image_path, os.path.join(sample_folder, 'image.png'))
                shutil.copyfile(sketch_path, os.path.join(sample_folder, 'sketch.png'))

                processed_filenames.append(image_file)

                # Print progress information
                print(f"Processed image {processed_images + 1}/{min(max_images, len(all_files))}: {image_file}")

                # Save the checkpoint after processing each image
                save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images + 1, processed_filenames)

                processed_images += 1

                # Break the loop if the maximum number of images is reached
                if processed_images == max_images:
                    break

            else:
                print(f"Skipping image {image_file} as the corresponding sketch file does not exist.")

    print("Processing complete.")

if __name__ == "__main__":
    # Replace these paths with the paths to your image and sketch folders
    image_folder_path = "/content/drive/MyDrive/hd7images"
    sketch_folder_path = "/content/drive/MyDrive/hd7_sketches"

    # Replace this path with the desired output folder for the organized dataset
    output_folder_path = "/content/drive/MyDrive/hd7_train"

    # Set the maximum number of images to process
    max_images_to_process = 65000

    # Set the checkpoint file path (customize this according to your preference)
    checkpoint_file_path = "/content/drive/MyDrive/checkpointtrain.txt"

    organize_dataset(image_folder_path, sketch_folder_path, output_folder_path, max_images=max_images_to_process, checkpoint_file=checkpoint_file_path)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processed image 60002/65000: 60003.png
Processed image 60003/65000: 60004.png
Processed image 60004/65000: 60005.png
Processed image 60005/65000: 60006.png
Processed image 60006/65000: 60007.png
Processed image 60007/65000: 60008.png
Processed image 60008/65000: 60009.png
Processed image 60009/65000: 60010.png
Processed image 60010/65000: 60011.png
Processed image 60011/65000: 60012.png
Processed image 60012/65000: 60013.png
Processed image 60013/65000: 60014.png
Processed image 60014/65000: 60015.png
Processed image 60015/65000: 60016.png
Processed image 60016/65000: 60017.png
Processed image 60017/65000: 60018.png
Processed image 60018/65000: 60019.png
Processed image 60019/65000: 60020.png
Processed image 60020/65000: 60021.png
Processed image 60021/65000: 60022.png
Processed image 60022/65000: 60023.png
Processed image 60023/65000: 60024.png
Processed image 60024/65000: 60025.png
Processed image 60025/65000: 60026.png

In [None]:
#      valid
import os
import shutil

def save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images, processed_filenames):
    with open(checkpoint_file, 'w') as file:
        file.write(f"{image_folder}\n")
        file.write(f"{sketch_folder}\n")
        file.write(f"{output_folder}\n")
        file.write(f"{processed_images}\n")
        for filename in processed_filenames:
            file.write(f"{filename}\n")

def load_checkpoint(checkpoint_file):
    try:
        with open(checkpoint_file, 'r') as file:
            lines = file.readlines()
            if len(lines) >= 4:
                image_folder = lines[0].strip()
                sketch_folder = lines[1].strip()
                output_folder = lines[2].strip()
                processed_images = int(lines[3])
                processed_filenames = [filename.strip() for filename in lines[4:]]
                return image_folder, sketch_folder, output_folder, processed_images, processed_filenames
            else:
                print("Checkpoint file is not formatted correctly. Creating a new checkpoint.")
                return None
    except FileNotFoundError:
        return None

def organize_dataset(image_folder, sketch_folder, output_folder, batch_size=1000, max_images=50000, checkpoint_file="checkpoint.txt"):
    # Load the checkpoint if it exists
    checkpoint = load_checkpoint(checkpoint_file)

    if checkpoint:
        image_folder, sketch_folder, output_folder, processed_images, processed_filenames = checkpoint
        print(f"Resuming from checkpoint. Processed {processed_images} images.")
    else:
        processed_images = 0
        processed_filenames = []

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # List all files in the image folder
    all_files = [file for file in os.listdir(image_folder) if file.lower().endswith(('.png', '.jpg', '.jpeg'))]

    # Sort the files to ensure they are in numerical order
    all_files.sort(key=lambda x: int(os.path.splitext(x)[0]))

    # Find the index to resume from
    resume_index = all_files.index(f'{processed_images}.png') if f'{processed_images}.png' in all_files else 0

    # Iterate through files in batches starting from the resume index
    for start_index in range(resume_index, min(max_images, len(all_files)), batch_size):
        end_index = min(start_index + batch_size, len(all_files))
        image_files = all_files[start_index:end_index]

        # Iterate through each file in the batch
        for image_file in image_files:
            # Check if the image has already been processed
            if image_file in processed_filenames:
                print(f"Skipping image {image_file} as it has already been processed.")
                continue

            # Construct the corresponding sketch file name
            sketch_file = image_file.replace('.', '_sketch.')

            # Check if the sketch file exists
            if os.path.exists(os.path.join(sketch_folder, sketch_file)):
                # Copy the image and sketch to the output folder
                image_path = os.path.join(image_folder, image_file)
                sketch_path = os.path.join(sketch_folder, sketch_file)

                # Create a folder for each sample
                sample_folder = os.path.join(output_folder, os.path.splitext(image_file)[0])
                os.makedirs(sample_folder, exist_ok=True)

                # Copy the image and sketch to the sample folder
                shutil.copyfile(image_path, os.path.join(sample_folder, 'image.png'))
                shutil.copyfile(sketch_path, os.path.join(sample_folder, 'sketch.png'))

                processed_filenames.append(image_file)

                # Print progress information
                print(f"Processed image {processed_images + 1}/{min(max_images, len(all_files))}: {image_file}")

                # Save the checkpoint after processing each image
                save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images + 1, processed_filenames)

                processed_images += 1

                # Break the loop if the maximum number of images is reached
                if processed_images == max_images:
                    break

            else:
                print(f"Skipping image {image_file} as the corresponding sketch file does not exist.")

    print("Processing complete.")

if __name__ == "__main__":
    # Replace these paths with the paths to your image and sketch folders
    image_folder_path = "/content/drive/MyDrive/hd7images"
    sketch_folder_path = "/content/drive/MyDrive/hd7_sketches"

    # Replace this path with the desired output folder for the organized dataset
    output_folder_path = "/content/drive/MyDrive/validation"

    # Set the maximum number of images to process
    max_images_to_process = 87385

    # Set the checkpoint file path (customize this according to your preference)
    checkpoint_file_path = "/content/drive/MyDrive/checkpointtrain.txt"

    organize_dataset(image_folder_path, sketch_folder_path, output_folder_path, max_images=max_images_to_process, checkpoint_file=checkpoint_file_path)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processed image 75231/87385: 82616.png
Processed image 75232/87385: 82617.png
Processed image 75233/87385: 82618.png
Processed image 75234/87385: 82619.png
Processed image 75235/87385: 82620.png
Processed image 75236/87385: 82621.png
Processed image 75237/87385: 82622.png
Processed image 75238/87385: 82623.png
Processed image 75239/87385: 82624.png
Processed image 75240/87385: 82625.png
Processed image 75241/87385: 82626.png
Processed image 75242/87385: 82627.png
Processed image 75243/87385: 82628.png
Processed image 75244/87385: 82629.png
Processed image 75245/87385: 82630.png
Processed image 75246/87385: 82631.png
Processed image 75247/87385: 82632.png
Processed image 75248/87385: 82633.png
Processed image 75249/87385: 82634.png
Processed image 75250/87385: 82635.png
Processed image 75251/87385: 82636.png
Processed image 75252/87385: 82637.png
Processed image 75253/87385: 82638.png
Processed image 75254/87385: 82639.png

In [None]:
import os
from PIL import Image

def count_images_in_folder(folder_path, allowed_extensions=('.png', '.jpg', '.jpeg')):
    total_images = 0

    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(allowed_extensions):
                total_images += 1

    return total_images

if __name__ == "__main__":
    # Replace this path with the path to your main folder
    main_folder_path = "/content/drive/MyDrive/validation"

    total_images_count = count_images_in_folder(main_folder_path)

    print(f"Total number of images in '{main_folder_path}' and its subfolders: {total_images_count}")


Total number of images in '/content/drive/MyDrive/validation' and its subfolders: 30456


In [None]:
# 65000 to 80000
import os
import shutil

def save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images, processed_filenames):
    with open(checkpoint_file, 'w') as file:
        file.write(f"{image_folder}\n")
        file.write(f"{sketch_folder}\n")
        file.write(f"{output_folder}\n")
        file.write(f"{processed_images}\n")
        for filename in processed_filenames:
            file.write(f"{filename}\n")

def load_checkpoint(checkpoint_file):
    try:
        with open(checkpoint_file, 'r') as file:
            lines = file.readlines()
            if len(lines) >= 4:
                image_folder = lines[0].strip()
                sketch_folder = lines[1].strip()
                output_folder = lines[2].strip()
                processed_images = int(lines[3])
                processed_filenames = [filename.strip() for filename in lines[4:]]
                return image_folder, sketch_folder, output_folder, processed_images, processed_filenames
            else:
                print("Checkpoint file is not formatted correctly. Creating a new checkpoint.")
                return None
    except FileNotFoundError:
        return None

def organize_dataset(image_folder, sketch_folder, output_folder, batch_size=1000, max_images=50000, checkpoint_file="checkpoint.txt"):
    # Load the checkpoint if it exists
    checkpoint = load_checkpoint(checkpoint_file)

    if checkpoint:
        image_folder, sketch_folder, output_folder, processed_images, processed_filenames = checkpoint
        print(f"Resuming from checkpoint. Processed {processed_images} images.")
    else:
        processed_images = 0
        processed_filenames = []

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # List all files in the image folder
    all_files = [file for file in os.listdir(image_folder) if file.lower().endswith(('.png', '.jpg', '.jpeg'))]

    # Sort the files to ensure they are in numerical order
    all_files.sort(key=lambda x: int(os.path.splitext(x)[0]))

    # Find the index to resume from
    resume_index = all_files.index(f'{processed_images}.png') if f'{processed_images}.png' in all_files else 0

    # Set the range to process images from 65000 to 80000
    start_index = max(resume_index, 65000)
    end_index = min(start_index + batch_size, min(max_images, 80000))

    # Iterate through files in the specified range
    for image_file in all_files[start_index:end_index]:
        # Check if the image has already been processed
        if image_file in processed_filenames:
            print(f"Skipping image {image_file} as it has already been processed.")
            continue

        # Construct the corresponding sketch file name
        sketch_file = image_file.replace('.', '_sketch.')

        # Check if the sketch file exists
        if os.path.exists(os.path.join(sketch_folder, sketch_file)):
            # Copy the image and sketch to the output folder
            image_path = os.path.join(image_folder, image_file)
            sketch_path = os.path.join(sketch_folder, sketch_file)

            # Create a folder for each sample
            sample_folder = os.path.join(output_folder, os.path.splitext(image_file)[0])
            os.makedirs(sample_folder, exist_ok=True)

            # Copy the image and sketch to the sample folder
            shutil.copyfile(image_path, os.path.join(sample_folder, 'image.png'))
            shutil.copyfile(sketch_path, os.path.join(sample_folder, 'sketch.png'))

            processed_filenames.append(image_file)

            # Print progress information
            print(f"Processed image {processed_images + 1}/{min(max_images, len(all_files))}: {image_file}")

            # Save the checkpoint after processing each image
            save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images + 1, processed_filenames)

            processed_images += 1

    print("Processing complete.")

if __name__ == "__main__":
    # Replace these paths with the paths to your image and sketch folders
    image_folder_path = "/content/drive/MyDrive/hd7images"
    sketch_folder_path = "/content/drive/MyDrive/hd7_sketches"

    # Replace this path with the desired output folder for the organized dataset
    output_folder_path = "/content/drive/MyDrive/hd7_valid"

    # Set the checkpoint file path (customize this according to your preference)
    checkpoint_file_path = "/content/drive/MyDrive/checkpointtrain.txt"

    organize_dataset(image_folder_path, sketch_folder_path, output_folder_path, max_images=80000, checkpoint_file=checkpoint_file_path)


Resuming from checkpoint. Processed 66173 images.
Skipping image 66622.png as it has already been processed.
Skipping image 66623.png as it has already been processed.
Skipping image 66624.png as it has already been processed.
Skipping image 66625.png as it has already been processed.
Skipping image 66626.png as it has already been processed.
Skipping image 66627.png as it has already been processed.
Skipping image 66628.png as it has already been processed.
Skipping image 66629.png as it has already been processed.
Skipping image 66630.png as it has already been processed.
Skipping image 66631.png as it has already been processed.
Skipping image 66632.png as it has already been processed.
Skipping image 66633.png as it has already been processed.
Skipping image 66634.png as it has already been processed.
Skipping image 66635.png as it has already been processed.
Skipping image 66636.png as it has already been processed.
Skipping image 66637.png as it has already been processed.
Skippi

66895


25577.png

In [None]:
#      TEST
# 80000 TO 95000
import os
import shutil

def save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images, processed_filenames):
    with open(checkpoint_file, 'w') as file:
        file.write(f"{image_folder}\n")
        file.write(f"{sketch_folder}\n")
        file.write(f"{output_folder}\n")
        file.write(f"{processed_images}\n")
        for filename in processed_filenames:
            file.write(f"{filename}\n")

def load_checkpoint(checkpoint_file):
    try:
        with open(checkpoint_file, 'r') as file:
            lines = file.readlines()
            if len(lines) >= 4:
                image_folder = lines[0].strip()
                sketch_folder = lines[1].strip()
                output_folder = lines[2].strip()
                processed_images = int(lines[3])
                processed_filenames = [filename.strip() for filename in lines[4:]]
                return image_folder, sketch_folder, output_folder, processed_images, processed_filenames
            else:
                print("Checkpoint file is not formatted correctly. Creating a new checkpoint.")
                return None
    except FileNotFoundError:
        return None

def organize_dataset(image_folder, sketch_folder, output_folder, batch_size=1000, max_images=50000, checkpoint_file="checkpoint.txt"):
    # Load the checkpoint if it exists
    checkpoint = load_checkpoint(checkpoint_file)

    if checkpoint:
        image_folder, sketch_folder, output_folder, processed_images, processed_filenames = checkpoint
        print(f"Resuming from checkpoint. Processed {processed_images} images.")
    else:
        processed_images = 0
        processed_filenames = []

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # List all files in the image folder
    all_files = [file for file in os.listdir(image_folder) if file.lower().endswith(('.png', '.jpg', '.jpeg'))]

    # Sort the files to ensure they are in numerical order
    all_files.sort(key=lambda x: int(os.path.splitext(x)[0]))

    # Find the index to resume from
    resume_index = all_files.index(f'{processed_images}.png') if f'{processed_images}.png' in all_files else 0

    # Set the range to process images from 80000 to 95000
    start_index = max(resume_index, 80000)
    end_index = min(start_index + batch_size, 95000)

    # Iterate through files in the specified range
    for image_file in all_files[start_index:end_index]:
        # Check if the image has already been processed
        if image_file in processed_filenames:
            print(f"Skipping image {image_file} as it has already been processed.")
            continue

        # Construct the corresponding sketch file name
        sketch_file = image_file.replace('.', '_sketch.')

        # Check if the sketch file exists
        if os.path.exists(os.path.join(sketch_folder, sketch_file)):
            # Copy the image and sketch to the output folder
            image_path = os.path.join(image_folder, image_file)
            sketch_path = os.path.join(sketch_folder, sketch_file)

            # Create a folder for each sample
            sample_folder = os.path.join(output_folder, os.path.splitext(image_file)[0])
            os.makedirs(sample_folder, exist_ok=True)

            # Copy the image and sketch to the sample folder
            shutil.copyfile(image_path, os.path.join(sample_folder, 'image.png'))
            shutil.copyfile(sketch_path, os.path.join(sample_folder, 'sketch.png'))

            processed_filenames.append(image_file)

            # Print progress information
            print(f"Processed image {processed_images + 1}/{min(max_images, len(all_files))}: {image_file}")

            # Save the checkpoint after processing each image
            save_checkpoint(checkpoint_file, image_folder, sketch_folder, output_folder, processed_images + 1, processed_filenames)

            processed_images += 1

    print("Processing complete.")

if __name__ == "__main__":
    # Replace these paths with the paths to your image and sketch folders
    image_folder_path = "/content/drive/MyDrive/hd7images"
    sketch_folder_path = "/content/drive/MyDrive/hd7_sketches"

    # Replace this path with the desired output folder for the organized dataset
    output_folder_path = "/content/drive/MyDrive/hd7_train"

    # Set the checkpoint file path (customize this according to your preference)
    checkpoint_file_path = "/content/drive/MyDrive/checkpointtrain.txt"

    organize_dataset(image_folder_path, sketch_folder_path, output_folder_path, max_images=95000, checkpoint_file=checkpoint_file_path)


In [None]:
import os
import shutil

def copy_sketches(source_folder, destination_folder, num_images=7000):
    # Ensure the destination folder exists
    os.makedirs(destination_folder, exist_ok=True)

    # List all files in the source folder
    all_files = [file for file in os.listdir(source_folder) if file.lower().endswith(('_sketch.png'))]

    # Convert filenames to integers for numerical sorting
    all_files = sorted(all_files, key=lambda x: int(os.path.splitext(x)[0].split('_')[0]))

    # Ensure that the number of images to copy is not more than available
    num_images = min(num_images, len(all_files))

    # Copy the specified number of sketches
    for i, sketch_file in enumerate(all_files[:num_images]):
        source_path = os.path.join(source_folder, sketch_file)

        # Rename the copied sketch to a sequential number
        destination_path = os.path.join(destination_folder, f"{i}_sketch.png")

        shutil.copyfile(source_path, destination_path)
        print(f"Sketch copied: {i}_sketch.png")

if __name__ == "__main__":
    # Replace these paths with your source and destination folders
    source_folder_path = "/content/drive/MyDrive/hd7_sketches"
    destination_folder_path = "/content/drive/MyDrive/train5000/sketches"

    # Specify the number of sketches to copy
    num_sketches_to_copy = 7000

    copy_sketches(source_folder_path, destination_folder_path, num_images=num_sketches_to_copy)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Sketch copied: 2000_sketch.png
Sketch copied: 2001_sketch.png
Sketch copied: 2002_sketch.png
Sketch copied: 2003_sketch.png
Sketch copied: 2004_sketch.png
Sketch copied: 2005_sketch.png
Sketch copied: 2006_sketch.png
Sketch copied: 2007_sketch.png
Sketch copied: 2008_sketch.png
Sketch copied: 2009_sketch.png
Sketch copied: 2010_sketch.png
Sketch copied: 2011_sketch.png
Sketch copied: 2012_sketch.png
Sketch copied: 2013_sketch.png
Sketch copied: 2014_sketch.png
Sketch copied: 2015_sketch.png
Sketch copied: 2016_sketch.png
Sketch copied: 2017_sketch.png
Sketch copied: 2018_sketch.png
Sketch copied: 2019_sketch.png
Sketch copied: 2020_sketch.png
Sketch copied: 2021_sketch.png
Sketch copied: 2022_sketch.png
Sketch copied: 2023_sketch.png
Sketch copied: 2024_sketch.png
Sketch copied: 2025_sketch.png
Sketch copied: 2026_sketch.png
Sketch copied: 2027_sketch.png
Sketch copied: 2028_sketch.png
Sketch copied: 2029_sketch.png
Sketc

In [None]:
import os
import shutil

def copy_images(source_folder, destination_folder, start_index, end_index):
    # Ensure the destination folder exists, create it if not
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # List all files in the source folder
    files = os.listdir(source_folder)

    # Counter for copied images
    copied_count = 0

    for filename in files:
        # Check if the file is an image file and within the specified range
        if filename.endswith(('.png', '.jpg', '.jpeg')) and start_index <= int(filename.split('.')[0]) <= end_index:
            source_path = os.path.join(source_folder, filename)
            destination_path = os.path.join(destination_folder, filename)

            # Copy the image to the destination folder
            shutil.copyfile(source_path, destination_path)

            # Print the name of the file being copied
            print(f"Copied: {filename}")

            # Increment the counter
            copied_count += 1

    # Print a message when the process is done
    print(f"Process complete. {copied_count} images copied from {source_folder} to {destination_folder}.")

# Replace 'source_folder' and 'destination_folder' with your actual paths
source_folder = '/content/drive/MyDrive/hd7_sketches'
destination_folder = '/content/drive/MyDrive/validate5000/sketches'
start_index = 7000
end_index = 10000

copy_images(source_folder, destination_folder, start_index, end_index)


ValueError: invalid literal for int() with base 10: '74233_sketch'

In [None]:
import os
import shutil

def move_images(source_folder, destination_folder, start_index, end_index):
    # Ensure the destination folder exists, create it if not
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # List all files in the source folder
    files = os.listdir(source_folder)

    # Counter for moved images
    moved_count = 0

    for filename in files:
        # Check if the file is an image file and within the specified range
        if filename.endswith(('.png', '.jpg', '.jpeg')) and start_index <= int(filename.split('.')[0]) <= end_index:
            source_path = os.path.join(source_folder, filename)
            destination_path = os.path.join(destination_folder, filename)

            # Move the image to the destination folder
            shutil.move(source_path, destination_path)

            # Print the name of the file being moved
            print(f"Moved: {filename}")

            # Increment the counter
            moved_count += 1

    # Print a message when the process is done
    print(f"Process complete. {moved_count} images moved from {source_folder} to {destination_folder}.")

# Replace 'source_folder' and 'destination_folder' with your actual paths
source_folder = '/content/drive/MyDrive/train5000/sketches'
destination_folder = '/content/drive/MyDrive/validate5000/sketches'
start_index = 5999
end_index = 7000

move_images(source_folder, destination_folder, start_index, end_index)


ValueError: invalid literal for int() with base 10: '6000_sketch'

In [None]:
import os
import shutil

def move_images(source_folder, destination_folder, start_index, end_index):
    # Ensure the destination folder exists, create it if not
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # List all files in the source folder
    files = os.listdir(source_folder)

    # Counter for moved images
    moved_count = 0

    for filename in files:
        # Check if the file is an image file and within the specified range
        file_number = int(filename.split('_')[0])
        if filename.endswith(('.png', '.jpg', '.jpeg')) and start_index <= file_number <= end_index:
            source_path = os.path.join(source_folder, filename)
            destination_path = os.path.join(destination_folder, filename)

            # Move the image to the destination folder
            shutil.move(source_path, destination_path)

            # Print the name of the file being moved
            print(f"Moved: {filename}")

            # Increment the counter
            moved_count += 1

    # Print a message when the process is done
    print(f"Process complete. {moved_count} images moved from {source_folder} to {destination_folder}.")

# Replace 'source_folder' and 'destination_folder' with your actual paths
source_folder = '/content/drive/MyDrive/train5000/sketches'
destination_folder = '/content/drive/MyDrive/validate5000/sketches'
start_index = 5999
end_index = 7000

move_images(source_folder, destination_folder, start_index, end_index)


Moved: 6000_sketch.png
Moved: 6001_sketch.png
Moved: 6002_sketch.png
Moved: 6003_sketch.png
Moved: 6004_sketch.png
Moved: 6005_sketch.png
Moved: 6006_sketch.png
Moved: 6007_sketch.png
Moved: 6008_sketch.png
Moved: 6009_sketch.png
Moved: 6010_sketch.png
Moved: 6011_sketch.png
Moved: 6012_sketch.png
Moved: 6013_sketch.png
Moved: 6014_sketch.png
Moved: 6015_sketch.png
Moved: 6016_sketch.png
Moved: 6017_sketch.png
Moved: 6018_sketch.png
Moved: 6019_sketch.png
Moved: 6020_sketch.png
Moved: 6021_sketch.png
Moved: 6022_sketch.png
Moved: 6023_sketch.png
Moved: 6024_sketch.png
Moved: 6025_sketch.png
Moved: 6026_sketch.png
Moved: 6027_sketch.png
Moved: 6028_sketch.png
Moved: 6029_sketch.png
Moved: 6030_sketch.png
Moved: 6031_sketch.png
Moved: 6032_sketch.png
Moved: 6033_sketch.png
Moved: 6034_sketch.png
Moved: 6035_sketch.png
Moved: 6036_sketch.png
Moved: 6037_sketch.png
Moved: 6038_sketch.png
Moved: 6039_sketch.png
Moved: 6040_sketch.png
Moved: 6041_sketch.png
Moved: 6042_sketch.png
Moved: 6043

In [None]:
import shutil
import os

# Define the source and destination directories
source_dir = '/content/drive/MyDrive/hd7images'
destination_dir = '/content/drive/MyDrive/train5000/images'

# Ensure the destination directory exists
os.makedirs(destination_dir, exist_ok=True)

for file in os.listdir(destination_dir):
    file_path = os.path.join(destination_dir, file)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print(f'Failed to delete {file_path}. Reason: {e}')

# Loop through the file range and copy each file
for i in range(1, 5001):
    filename = f"{i}.png"
    source_path = os.path.join(source_dir, filename)
    destination_path = os.path.join(destination_dir, filename)

    try:
        # Copy the file
        shutil.copy(source_path, destination_path)
        print(f"copied image {i}.png")
    except FileNotFoundError:
        print(f"File not found: {filename}, skipping...")
        continue

print("Files copied successfully.")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
copied image 2.png
copied image 3.png
copied image 4.png
copied image 5.png
copied image 6.png
copied image 7.png
copied image 8.png
copied image 9.png
copied image 10.png
copied image 11.png
copied image 12.png
copied image 13.png
copied image 14.png
copied image 15.png
copied image 16.png
copied image 17.png
copied image 18.png
copied image 19.png
copied image 20.png
copied image 21.png
copied image 22.png
copied image 23.png
copied image 24.png
copied image 25.png
copied image 26.png
copied image 27.png
copied image 28.png
copied image 29.png
copied image 30.png
copied image 31.png
copied image 32.png
copied image 33.png
copied image 34.png
copied image 35.png
copied image 36.png
copied image 37.png
copied image 38.png
copied image 39.png
copied image 40.png
copied image 41.png
copied image 42.png
copied image 43.png
copied image 44.png
copied image 45.png
copied image 46.png
copied image 47.png
copied image 48.png
cop

In [None]:
import shutil
import os

# Define the source and destination directories
source_dir = '/content/drive/MyDrive/hd7_sketches'
destination_dir = '/content/drive/MyDrive/train5000/sketches'

# Ensure the destination directory exists
os.makedirs(destination_dir, exist_ok=True)

for file in os.listdir(destination_dir):
    file_path = os.path.join(destination_dir, file)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print(f'Failed to delete {file_path}. Reason: {e}')

# Loop through the file range and copy each file
for i in range(1, 5001):
    filename = f"{i}_sketch.png"
    source_path = os.path.join(source_dir, filename)
    destination_path = os.path.join(destination_dir, filename)

    try:
        # Copy the file
        shutil.copy(source_path, destination_path)
        print(f"copied image {i}_sketch.png")
    except FileNotFoundError:
        print(f"File not found: {filename}, skipping...")
        continue

print("Files copied successfully.")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
copied image 2_sketch.png
copied image 3_sketch.png
copied image 4_sketch.png
copied image 5_sketch.png
copied image 6_sketch.png
copied image 7_sketch.png
copied image 8_sketch.png
copied image 9_sketch.png
copied image 10_sketch.png
copied image 11_sketch.png
copied image 12_sketch.png
copied image 13_sketch.png
copied image 14_sketch.png
copied image 15_sketch.png
copied image 16_sketch.png
copied image 17_sketch.png
copied image 18_sketch.png
copied image 19_sketch.png
copied image 20_sketch.png
copied image 21_sketch.png
copied image 22_sketch.png
copied image 23_sketch.png
copied image 24_sketch.png
copied image 25_sketch.png
copied image 26_sketch.png
copied image 27_sketch.png
copied image 28_sketch.png
File not found: 29_sketch.png, skipping...
copied image 30_sketch.png
copied image 31_sketch.png
copied image 32_sketch.png
copied image 33_sketch.png
copied image 34_sketch.png
copied image 35_sketch.png
copied im