In [None]:
import os
from PIL import Image
import numpy as np

path_sat = "../datasets/mass/sat/"
path_msk = "../datasets/mass/map/"

out_path_sat = "../datasets/mass/crop/sat/"
out_path_msk = "../datasets/mass/crop/map/"

# throw out images that are at least 5% whitespace
threshold = .05

In [None]:
def get_whitespace(im):
    # compute white amount for image
    arr = np.array(im)

    # count how many fully white pixels
    arr = (arr//255) * 255
    img_sum = arr.sum()

    # fully white image would be 1500x1500x3x255
    full_white = np.array(arr.shape).prod() * 255
    
    return img_sum/full_white

In [None]:
crops = [
    lambda im : im.crop((0,0,1024,1024)), # top left
    lambda im : im.crop((im.height-1024,0,im.width,1024)), # top right
    lambda im : im.crop((0,im.height-1024,1024,im.height)), # bottom left
    lambda im : im.crop((im.width-1024,im.height-1024,im.width,im.height))  # bottom right
]

sat_names = set([files for _,_,files in os.walk(path_sat, topdown=False)][0])
msk_names = set([files for _,_,files in os.walk(path_msk, topdown=False)][0])

# only use the images that have both a satellite image and a mask
names = sat_names.intersection(msk_names)

for name in names:           
        min_whitespace = float('inf')
        min_crop = None
        
        im_sat = Image.open(os.path.join(path_sat, name))
        im_msk = Image.open(os.path.join(path_msk, name))
                
        
    
        # get the crop with the least amount of whitespace
        for i,crop in enumerate(crops):
            im_crop = crop(im_sat)
            whitespace_crop = get_whitespace(im_crop)
            
            if whitespace_crop < min_whitespace:
                min_whitespace = whitespace_crop
                min_crop = crop

        # print it out for analysis
        print(name, crops.index(min_crop), min_whitespace)
        
        if min_whitespace < threshold:
            # save image if we're within the threshold of whitespace
            min_crop(im_sat).save(os.path.join(out_path_sat, name),"PNG")
            min_crop(im_msk).save(os.path.join(out_path_msk, name),"PNG")
