### Follow the steps in the README.md file to set up

In [None]:
import os
import time
from PIL import Image

In [None]:
def process_images(input_folder, output_folder, target_size=(1024, 1024)):
    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    processed_count = 0

    # Iterate over each file in the input folder
    for filename in os.listdir(input_folder):
        input_path = os.path.join(input_folder, filename)
        
        try:
            with Image.open(input_path) as img:
                # Crop to a square aspect ratio based on the shortest side
                min_side = min(img.size)
                # If w,h => 4000,3000 
                left = (img.width - min_side) / 2 # 500
                top = (img.height - min_side) / 2   # 0
                right = (img.width + min_side) / 2  # 3500
                bottom = (img.height + min_side) / 2  # 3000
                cropped_img = img.crop((left, top, right, bottom))

                # Resize the cropped image to the target size
                resized_img = cropped_img.resize(target_size)

                # Save the processed image to the output folder
                output_path = os.path.join(output_folder, filename)
                resized_img.save(output_path)

                processed_count += 1
        except (IOError, OSError, Image.UnidentifiedImageError):
            print(f"Skipping non-image file: {filename}")

    print("Image processing complete.")
    return processed_count


### Run the preprocess function

In [None]:
# Set your input and output folders
input_folder = 'raw_images'
output_folder = 'images'

# Record start time
start_time = time.time()

# Call the process_images function
# Add your target size if it is different from the default, e.g. (256, 256)
processed_count = process_images(input_folder, output_folder)

# Record end time
end_time = time.time()

# Calculate and print the elapsed time
elapsed_time = end_time - start_time
print(f"Elapsed time: {elapsed_time} seconds")
print(f"Number of processed images: {processed_count}")
