In [9]:
import os
import matplotlib.pyplot as plt
from PIL import Image

### Crop and save

In [10]:
# set image and ground truth folders
image_folder = 'massachusetts/images'
gt_folder = 'massachusetts/groundtruth'

In [11]:
# get paths to images and gt
image_files = sorted([os.path.join(image_folder, file) for file in os.listdir(image_folder)])
gt_files = sorted([os.path.join(gt_folder, file) for file in os.listdir(gt_folder)])

In [12]:
print("Number of images:", len(image_files))
print("Number of groundtruths:", len(gt_files))

Number of images: 1108
Number of groundtruths: 1108


In [13]:
# choose size of the cropped images
CROP_SIZE = 384
OUTPUT_DIR_IMAGES = f'massachusetts_{CROP_SIZE}/images/'
OUTPUT_DIR_GT = f'massachusetts_{CROP_SIZE}/groundtruth/'

In [15]:
# choose number of images/gt to be used
K = 5
image_files = image_files[:K]
gt_files = gt_files[:K]

In [16]:
# create dirs 
if not os.path.exists(OUTPUT_DIR_IMAGES):
    os.makedirs(OUTPUT_DIR_IMAGES)
if not os.path.exists(OUTPUT_DIR_GT):
    os.makedirs(OUTPUT_DIR_GT)

In [17]:
for image_path, gt_path in zip(image_files, gt_files):
    # get image and gt ID
    image_ID = image_path.split("/")[-1].split(".")[0]
    gt_ID = gt_path.split("/")[-1].split(".")[0]
    # check if the gt corresponds to the image
    if image_ID == gt_ID:
        ID = image_ID
    else:
        raise ValueError("Groundtruth does not coresspond to image!")
    
    # open the image and ground truth
    image = Image.open(image_path)
    ground_truth = Image.open(gt_path)

    # get the size of the images
    width, height = image.size

    # calculate the number of chunks in both dimensions
    num_chunks_x = width // CROP_SIZE
    num_chunks_y = height // CROP_SIZE

    # loop through each chunk and save it
    for i in range(num_chunks_x):
        for j in range(num_chunks_y):
            left = i * CROP_SIZE
            upper = j * CROP_SIZE
            right = left + CROP_SIZE
            lower = upper + CROP_SIZE

            # crop image and ground truth
            image_chunk = image.crop((left, upper, right, lower))
            gt_chunk = ground_truth.crop((left, upper, right, lower))

            # save the chunks
            image_chunk.save(os.path.join(OUTPUT_DIR_IMAGES, f'{ID}_{i}_{j}.png'))
            gt_chunk.save(os.path.join(OUTPUT_DIR_GT, f'{ID}_{i}_{j}.png'))

### Check

In [18]:
# set image and ground truth folders
CROP_SIZE = 384
image_folder = f'massachusetts_{CROP_SIZE}/images'
gt_folder = f'massachusetts_{CROP_SIZE}/groundtruth'

In [19]:
# get paths to images and gt
image_files = sorted([os.path.join(image_folder, file) for file in os.listdir(image_folder)])
gt_files = sorted([os.path.join(gt_folder, file) for file in os.listdir(gt_folder)])

In [20]:
# check length 
print("Number of images:", len(image_files))
print("Number of groundtruths:", len(gt_files))

Number of images: 45
Number of groundtruths: 45


In [21]:
# check pairs
for image_path, gt_path in zip(image_files, gt_files):
    image_name = image_path.split("/")[-1]
    gt_name = gt_path.split("/")[-1]
    if image_name != gt_name:
        print("There is something wrong!")

### Visual check

In [None]:
for image_path, gt_path in zip(image_files, gt_files):
    image_name = image_path.split("/")[-1]
    print(image_name)
    image = Image.open(image_path)
    gt = Image.open(gt_path)
    fig, ax = plt.subplots(1, 2, figsize=(8, 4))
    ax[0].imshow(image)
    ax[0].set_title("Image")
    ax[1].imshow(gt)
    ax[1].set_title("Ground Truth")
    plt.show()