In [1]:
%matplotlib inline
from fastai.conv_learner import *
from fastai.dataset import *

from pathlib import Path

In [2]:
from models import *

In [None]:
ORIGINAL_SIZE = 650

BASE_TRAIN_DIR = "data/train"
WORKING_DIR = "data/working"
IMAGE_DIR = "data/working/images/full_rgb/"


SOURCE_DIR = Path("data/working/images/full_rgb/")
TARGET_DIR = Path("data/working/images/overlap_rgb/")
LIST = Path('list')
MASK = Path('mask')

# Input files
FMT_TRAIN_SUMMARY_PATH = str(
    Path(BASE_TRAIN_DIR) /
    Path("{prefix:s}_Train/") /
    Path("summaryData/{prefix:s}_Train_Building_Solutions.csv"))
FMT_TRAIN_RGB_IMAGE_PATH = str(
    Path("{datapath:s}/") /
    Path("RGB-PanSharpen/RGB-PanSharpen_{image_id:s}.tif"))
FMT_TEST_RGB_IMAGE_PATH = str(
    Path("{datapath:s}/") /
    Path("RGB-PanSharpen/RGB-PanSharpen_{image_id:s}.tif"))
FMT_TRAIN_MSPEC_IMAGE_PATH = str(
    Path("{datapath:s}/") /
    Path("MUL-PanSharpen/MUL-PanSharpen_{image_id:s}.tif"))
FMT_TEST_MSPEC_IMAGE_PATH = str(
    Path("{datapath:s}/") /
    Path("MUL-PanSharpen/MUL-PanSharpen_{image_id:s}.tif"))

# Preprocessing result
FMT_RGB_BANDCUT_TH_PATH = IMAGE_DIR + "/rgb_bandcut{}.csv"
FMT_MUL_BANDCUT_TH_PATH = IMAGE_DIR + "/mul_bandcut{}.csv"

# Image list, Image container and mask container
FMT_VALTRAIN_IMAGELIST_PATH = IMAGE_DIR + "/{prefix:s}_valtrain_ImageId.csv"
FMT_VALTRAIN_MASK_STORE = IMAGE_DIR + "/valtrain_{}_mask.h5"
FMT_VALTRAIN_IM_STORE = IMAGE_DIR + "/valtrain_{}_im.h5"
FMT_VALTRAIN_MUL_STORE = IMAGE_DIR + "/valtrain_{}_mul.h5"

FMT_VALTEST_IMAGELIST_PATH = IMAGE_DIR + "/{prefix:s}_valtest_ImageId.csv"
FMT_VALTEST_MASK_STORE = IMAGE_DIR + "/valtest_{}_mask.h5"
FMT_VALTEST_IM_STORE = IMAGE_DIR + "/valtest_{}_im.h5"
FMT_VALTEST_MUL_STORE = IMAGE_DIR + "/valtest_{}_mul.h5"

FMT_IMMEAN = IMAGE_DIR + "/{}_immean.h5"
FMT_MULMEAN = IMAGE_DIR + "/{}_mulmean.h5"

FMT_TEST_IMAGELIST_PATH = IMAGE_DIR + "/{prefix:s}_test_ImageId.csv"
FMT_TEST_IM_STORE = IMAGE_DIR + "/test_{}_im.h5"
FMT_TEST_MUL_STORE = IMAGE_DIR + "/test_{}_mul.h5"


In [26]:
sz = 192
# batch size = 30: GPU memory usage: 9751MiB / 11441MiB

bs = 30

## Pre-cut images to 192x192 with stride 48

In [21]:
if not TARGET_DIR.exists():
    (TARGET_DIR / Path('list')).mkdir(parents=True)
    (TARGET_DIR / Path('mask')).mkdir(parents=True)

Use parallel if too slow

In [None]:
stride = 48
lists = sorted((SOURCE_DIR / Path('list')).glob('*'))
masks = sorted((SOURCE_DIR / Path('mask')).glob('*'))
for i, img in enumerate(lists):
    lst = plt.imread(str(img))
    msk = plt.imread(str(masks[i]))

    for x in np.arange(ORIGINAL_SIZE, step=stride):
        for y in np.arange(ORIGINAL_SIZE, step=stride):
            if (y + sz) > ORIGINAL_SIZE:
                y = ORIGINAL_SIZE - sz - 1
            if (x + sz) > ORIGINAL_SIZE:
                x = ORIGINAL_SIZE - sz - 1
                    
            lst_patch = np.copy(lst[x:x + sz, y:y + sz])
            msk_left = sz/2 - stride/2
            msk_patch = np.copy(msk[int(x + msk_left):int(x + msk_left + stride),
                                   int(y + msk_left):int(y + msk_left + stride)])
            
            plt.imsave(TARGET_DIR / Path('list') / Path(img.name[:-4] + '_' + str(x) + '_' + str(y) + '.png'), lst_patch)
            plt.imsave(TARGET_DIR / Path('mask') / Path(img.name[:-4] + '_' + str(x) + '_' + str(y) + '.png'), msk_patch)
            

Save to ramdisk if io bottlenecks

In [7]:
class MatchedFilesDataset(FilesDataset):
    def __init__(self, fnames, y, transform, path):
        self.y=y
        assert(len(fnames)==len(y))
        super().__init__(fnames, transform, path)
    def get_y(self, i): return open_image(os.path.join(self.path, self.y[i]))
    def get_c(self): return 0

In [None]:
TARGET_DIR.glob()

In [8]:
x_names = np.array((TARGET_DIR / LIST).glob()); x_names.sort()
y_names = np.array((TARGET_DIR / MASK).glob()); y_names.sort()

In [11]:
next(zip(x_names, y_names))

('/data/working/images/v5/256/AOI_3_Paris_img10.jpg',
 '/data/working/images/v5/256/AOI_3_Paris_img10.png')

In [9]:
# 80:20 split
val_idxs = np.random.permutation(range(230)) 
((val_x,trn_x),(val_y,trn_y)) = split_by_idx(val_idxs, x_names, y_names)