In [1]:
import itertools
import os
import PIL
import numpy as np
import matplotlib.pyplot as plt
from PIL.ImageOps import flip, mirror
from PIL import Image
from tqdm import tqdm


In [29]:
valid_file_types = ['jpg']
# folder_to_scan = '/data/datasets/earth_images/proc/' # done!
folder_to_scan = '/data/datasets/earth_images/proc_sea/' # done!
# folder_to_scan = '/data/datasets/mars_images/scrapped/'

full_path_valid_files = [f'{folder_to_scan}/{f}' for f in os.listdir(folder_to_scan) if f.split('.')[-1] in valid_file_types]
print(len(full_path_valid_files))


118


In [35]:
def generate_1d_limits(wind, limit, thresh, offset=0, endset=0):
    """
    thresh: 0 - 1: if we reach the lim and the new area is bigger than thresh we keep the last square
    """    
    x_left = []
    x_right = []
    if endset:
        limit = limit - endset
    if limit >= wind:
        x_lim_reached = False
        i = 0
        while not x_lim_reached:
            x_l = i * wind + offset
            x_r = (i + 1) * wind + offset

            if x_r <= limit:
                x_right.append(x_r)
                x_left.append(x_l)
            else:
                x_lim_reached = True
                # some extra padding
                if (x_r - limit) / wind < thresh:
                    x_r = limit
                    x_l = limit - wind
                    x_right.append(x_r)
                    x_left.append(x_l)
            i += 1
    return (x_left, x_right)


def generate_cropping_boxes_from_limits(x_left, x_rigth, x_bottom, x_top):
    croping_boxes = []
    x_lims = [(x_l, x_r) for x_l, x_r in zip(x_left, x_rigth)]
    y_lims = [(x_l, x_r) for x_l, x_r in zip(x_bottom, x_top)]
    bounding_boxes = list(itertools.product(x_lims, y_lims))
    for i in range(len(bounding_boxes)):
        ((x1, x2), (y1, y2)) = bounding_boxes[i]
        croping_boxes.append((x1, y1, x2, y2))
    return croping_boxes


def generate_cropping_boxes(image, cropping_window, tresh, h_offset=0, v_offset=0, h_endset=0, v_endset=0):
    image_width, image_height = image.size
    x_left, x_rigth = generate_1d_limits(cropping_window, image_width, tresh, h_offset, h_endset)
    x_bottom, x_top = generate_1d_limits(cropping_window, image_height, tresh, v_offset, v_endset)
    croping_boxes = generate_cropping_boxes_from_limits(x_left, x_rigth, x_bottom, x_top)
    return croping_boxes


def image_square_resize(im_input, new_size):
    im = im_input.copy()
    im = im.resize((new_size, new_size), PIL.Image.ANTIALIAS)    
    return im


def image_rotator(im_input, angle):
    if angle==90:
        return im_input.transpose(Image.ROTATE_90) 
    elif angle == 180:
        return im_input.transpose(Image.ROTATE_180) 
    elif angle == 270:
        return im_input.transpose(Image.ROTATE_270) 
    else:
        raise ValueError('angle not supported')    
        
        
def image_augmentator(im_input, return_orig = True):
    im_aug = []
    if return_orig:
        im_aug.append(im_input.copy())
    # 1.flip
    im_aug.append(im_input.copy().transpose(Image.ROTATE_90))
    # 
#     im_aug.append(im_input.copy().transpose(Image.ROTATE_180))
    # 
    # 
#     im_aug.append(im_input.copy().transpose(Image.ROTATE_270))
    # 
    im_aug.append(im_input.copy().transpose(PIL.Image.FLIP_LEFT_RIGHT))
    #
    im_aug.append(im_input.copy().transpose(PIL.Image.FLIP_TOP_BOTTOM))
    return im_aug    

def proc_folder_name(fol, suffix='cropped'):
    s = fol.split('/')
    if s[-1] == '':
        proc_f_name = '/'.join(s[:-1]) + f'_{suffix}/'
        return proc_f_name
    else:
        proc_f_name = '/'.join(s[:]) + f'_{suffix}/'
        return proc_f_name

In [36]:
# class SizeCounter:
#     def __init__(self):
#         self.counter = {}
    
#     def add(self, size):
#         try:
#             self.counter[size]
#             self.counter[size] +=1
#         except KeyError:
#             self.counter[size] = 1
        

In [37]:
# sizes = SizeCounter()
# for file in full_path_valid_files:
#     im = Image.open(file)
#     sizes.add(size=im.size)
    
# # sizes.counter

### No augmentation

In [38]:
# cropping_window = 512
cropping_window = 256
augmentate = True

suffix = f'cropped_{cropping_window}'
if augmentate:
    suffix = f'{suffix}_augmentated'
saving_folder_name = proc_folder_name(folder_to_scan, suffix=suffix)
padding_tresh = 0.25
resize = False
image_output_size = 256
print(f' saving files in {saving_folder_name}')
os.makedirs(saving_folder_name, exist_ok=True)

# to avoid black borders in Lansat 8 images
# params = {
#     'h_offset': 50,
#     'h_endset': 50,
#     'v_offset': 50,
#     'v_endset': 50
# }

# mars
params = {
    'h_offset': 0,
    'h_endset': 0,
    'v_offset': 60,
    'v_endset': 60
}

 saving files in /data/datasets/earth_images/proc_sea_cropped_256_augmentated/


In [39]:
counter = 0

for fc, file in tqdm(enumerate(full_path_valid_files), total=len(full_path_valid_files)):
    im = Image.open(file)
    croping_boxes = generate_cropping_boxes(im, cropping_window, padding_tresh, **params) 
    file_name = os.path.basename(file)
    base_name, file_ext = os.path.splitext(file_name)
    for i,b in enumerate(croping_boxes):
        counter +=1
        try:
            imc = im.crop(b) #[left bottom right upper]
            if resize: 
                imc = image_square_resize(imc, image_output_size)
            if augmentate:
                imc_augs = image_augmentator(imc)
                for ii, imcc in enumerate(imc_augs):
                    f_name = saving_folder_name + base_name +'_{}_{}'.format(i, ii) + file_ext
                    imcc.save(f_name)
            else:
                f_name = saving_folder_name + base_name +'_{}'.format(i) + file_ext
                imc.save(f_name)
        except:
            print(i, file, 'error')
print(len(os.listdir(saving_folder_name)))

100%|██████████| 118/118 [08:23<00:00,  4.26s/it]

282652



