In [1]:
! pip install pillow
! pip freeze > requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
from pathlib import Path
from typing import List, Tuple

from PIL import Image

In [3]:
raw_data_path = Path("data/raw")

In [4]:
def crop_images(image_paths: List[Path], 
                output_dir: Path, 
                crop_size: Tuple[int, int] = (640, 640), 
                overlap: int = 320) -> None:
    """Crops all images in the provided list of paths into smaller 640x640 tiles and saves them.
    
    Args:
        image_paths (List[Path]): List of paths to the images.
        output_dir (Path): Directory where the cropped images will be saved.
        crop_size (Tuple[int, int], optional): The target size for cropping (width, height). 
                                               Defaults to (640, 640).
        overlap (int, optional): Overlap size between crops to achieve additional tiles. Defaults to 320.
    """
    output_dir.mkdir(parents=True, exist_ok=True)
    
    crop_width, crop_height = crop_size
    for img_path in image_paths:
        with Image.open(img_path) as img:
            img_width, img_height = img.size
            crop_count = 0
            step_x = crop_width - overlap
            step_y = crop_height - overlap
            for top in range(0, img_height - crop_height + 1, step_y):
                for left in range(0, img_width - crop_width + 1, step_x):
                    right = left + crop_width
                    bottom = top + crop_height
                    cropped_img = img.crop((left, top, right, bottom))
                    crop_count += 1
                    save_path = output_dir / f"{img_path.stem}_crop_{crop_count}.jpg"
                    cropped_img.save(save_path)
                    print(f"Cropped and saved image: {save_path}")
            
            print(f"Total crops for {img_path.name}: {crop_count}")

In [5]:
image_paths = list(raw_data_path.glob('*.JPG'))
output_directory = Path("data/cropped_images")
crop_images(image_paths, output_directory)

Cropped and saved image: data/cropped_images/31_crop_1.jpg
Cropped and saved image: data/cropped_images/31_crop_2.jpg
Cropped and saved image: data/cropped_images/31_crop_3.jpg
Cropped and saved image: data/cropped_images/31_crop_4.jpg
Cropped and saved image: data/cropped_images/31_crop_5.jpg
Cropped and saved image: data/cropped_images/31_crop_6.jpg
Cropped and saved image: data/cropped_images/31_crop_7.jpg
Cropped and saved image: data/cropped_images/31_crop_8.jpg
Cropped and saved image: data/cropped_images/31_crop_9.jpg
Cropped and saved image: data/cropped_images/31_crop_10.jpg
Cropped and saved image: data/cropped_images/31_crop_11.jpg
Cropped and saved image: data/cropped_images/31_crop_12.jpg
Cropped and saved image: data/cropped_images/31_crop_13.jpg
Cropped and saved image: data/cropped_images/31_crop_14.jpg
Cropped and saved image: data/cropped_images/31_crop_15.jpg
Cropped and saved image: data/cropped_images/31_crop_16.jpg
Cropped and saved image: data/cropped_images/31_c