In [1]:
import os
import shutil
from PIL import Image
import random
import math
import tensorflow as tf
import pathlib
from tqdm import tqdm

2023-07-26 23:34:02.276436: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
FROM_IMAGE_PATH = "preprocessedDataset/from/"
TO_IMAGE_PATH = "preprocessedDataset/to/" 
TRAIN_PATH = "dataset/train/"
TEST_PATH = "dataset/test/"
SPLIT_SIZE_IMAGE = 256  
DATASET_TRAIN_PERCENT = 0.8  
PREPARE_DATASET = False
BATCH_SIZE = 1


In [3]:
if not os.path.isdir("AerialImageDataset"):
    !curl -k https://files.inria.fr/aerialimagelabeling/getAerial.sh | bash
    shutil.rmtree("AerialImageDataset/test")

In [4]:

def split_image(original_image_path, output_folder, split_size):
    original_image = Image.open(original_image_path)
    width, height = original_image.size

    width = width - (width % split_size)
    height = height - (height % split_size)

    image_number = 0
    name, extension = os.path.splitext(os.path.basename(original_image_path))

    for y in range(0, height, split_size):
        for x in range(0, width, split_size):
            box = (x, y, x + split_size, y + split_size)
            cropped_image = original_image.crop(box)
            cropped_image.save(f"{output_folder}/{name}_{image_number}{extension}")
            image_number += 1

In [5]:
def merge_image(img1_path, img2_path, new_image_path):
    image1 = Image.open(img1_path)
    image2 = Image.open(img2_path)
    
    #resize, first image
    image1 = image1.resize((SPLIT_SIZE_IMAGE, SPLIT_SIZE_IMAGE))
    image1_size = image1.size
    image2_size = image2.size
    new_image = Image.new('RGB',(2*image1_size[0], image1_size[1]), (250,250,250))
    new_image.paste(image1,(0,0))
    new_image.paste(image2,(image1_size[0],0))
    new_image.save(new_image_path,"JPEG")

In [6]:

def create_dataset(train_percent):
    
    files = [element for element in os.listdir(FROM_IMAGE_PATH) if not element.startswith(".") if ".tif" in element]
    elements = files
    for i in tqdm(range(math.ceil(len(files) * train_percent))):
        element = random.choice(elements)
        elements.remove(element)
        merge_image(
            FROM_IMAGE_PATH + element,
            TO_IMAGE_PATH + element,
            TRAIN_PATH + element.replace(".tif", ".jpeg")
        )
        os.remove(FROM_IMAGE_PATH + element)
        os.remove(TO_IMAGE_PATH + element)

    for element in tqdm(elements):
        merge_image(
            FROM_IMAGE_PATH + element,
            TO_IMAGE_PATH + element,
            TEST_PATH + element.replace(".tif", ".jpeg")
        )
        os.remove(FROM_IMAGE_PATH + element)
        os.remove(TO_IMAGE_PATH + element)

In [7]:
if PREPARE_DATASET: 
    os.makedirs(FROM_IMAGE_PATH)
    os.makedirs(TO_IMAGE_PATH)
    os.makedirs(TRAIN_PATH)
    os.makedirs(TEST_PATH)

In [8]:
if PREPARE_DATASET: 
    gt_path = "AerialImageDataset/train/gt/"
    for img in tqdm(os.listdir(gt_path)):
        split_image(gt_path + img, FROM_IMAGE_PATH, SPLIT_SIZE_IMAGE)

100%|██████████| 180/180 [04:30<00:00,  1.51s/it]


In [9]:
if PREPARE_DATASET:
    gt_path = "AerialImageDataset/train/images/"
    for img in tqdm(os.listdir(gt_path)):
        split_image(gt_path + img, TO_IMAGE_PATH, SPLIT_SIZE_IMAGE)

100%|██████████| 180/180 [06:58<00:00,  2.33s/it]


In [10]:
if PREPARE_DATASET:    
    create_dataset(DATASET_TRAIN_PERCENT)

100%|██████████| 51984/51984 [18:13<00:00, 47.55it/s]
100%|██████████| 12996/12996 [04:14<00:00, 51.10it/s]
