In [50]:
import os
import sys
import time
import gc

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"/scripts")
    
from destruction_utilities import *

In [51]:
DEBUG = False
CITY = 'aleppo'
DATA_DIR = "../../data"
PRE_IMAGE_INDEX=[0]
WINDOW = True
WINDOW_SIZE = (40,20)
DATASET = 'all'
BALANCE=True
TILE_SIZE = (128,128)

In [52]:
if WINDOW:
    window = center_window(f'{DATA_DIR}/{CITY}/others/{CITY}_samples.tif', (WINDOW_SIZE[0]*1, WINDOW_SIZE[1]*1))
    samples = read_raster(f'{DATA_DIR}/{CITY}/others/{CITY}_samples.tif', window=window)
else:
    samples = read_raster(f'{DATA_DIR}/{CITY}/others/{CITY}_samples.tif')
images  = search_data(pattern(city=CITY, type='image'), directory=DATA_DIR)
labels  = search_data(pattern(city=CITY, type='label'), directory=DATA_DIR)

In [53]:
if DATASET == 'train' or DATASET=='all':
    delete_zarr_if_exists(CITY, 'labels_conv_train', path=DATA_DIR)
    delete_zarr_if_exists(CITY, 'images_conv_train', path=DATA_DIR)
if DATASET == 'validate' or DATASET=='all':
    delete_zarr_if_exists(CITY, 'labels_conv_valid', path=DATA_DIR)
    delete_zarr_if_exists(CITY, 'images_conv_valid', path=DATA_DIR)
if DATASET == 'test' or DATASET=='all':
    delete_zarr_if_exists(CITY, 'labels_conv_test', path=DATA_DIR)
    delete_zarr_if_exists(CITY, 'images_conv_test', path=DATA_DIR)

In [54]:
image_dates = sorted([el.split("image_")[1].split('.tif')[0] for el in images])
label_dates = sorted([el.split("label_")[1].split('.tif')[0] for el in labels])

for label in label_dates:
    if label.replace("-", "_") not in image_dates:
        latest_available_image = sorted([im for im in image_dates if time.strptime(im, "%Y_%m_%d")  < time.strptime(label, "%Y-%m-%d")])
        latest_available_image = latest_available_image[-1]
        if DEBUG:
            print(label, latest_available_image)
        images.append(images[0].split("image_")[0]+"image_"+latest_available_image+".tif")
images = sorted(images)

In [55]:
if DEBUG:
    print("label", "image")
    for i, im in enumerate(images):
        print(labels[i].split("label_")[1], images[i].split("image_")[1])

In [56]:
for i in range(len(images)):
    if WINDOW:
        window = center_window(labels[i], (WINDOW_SIZE[0]*1, WINDOW_SIZE[1]*1))
        label = np.array(read_raster(labels[i], window=window))
    else:
        label = np.array(read_raster(labels[i]))

    h,w,b = label.shape
    label = label.reshape(1, h, w, b)
    label = tile_sequences(label, tile_size=(1,1))
    exclude = np.where(label.flatten() == -1)
    label = np.delete(label, exclude, 0)
    label[label!=3.0] = 0.0
    label[label==3.0] = 1.0
    _, train, validate, test = sample_split(label, np.delete(samples.flatten(), exclude))
    
    if DEBUG:
        print(f"\n{label_dates[i]}")
        print("Train distribution:", np.unique(train.flatten(), return_counts=True))
        print("Validate distribution:", np.unique(validate.flatten(), return_counts=True))
        print("Test distribution:", np.unique(test.flatten(), return_counts=True))
        print("Uncertains:", len(exclude[0]))
        
    if DATASET == 'train' or DATASET=='all':
            train_shuffle = np.arange(len(train))
            np.random.shuffle(train_shuffle)
            save_zarr(train[train_shuffle].reshape(np.take(train.shape, [0,2,3,4])), CITY, 'labels_conv_train', path=DATA_DIR)

    if DATASET == 'validate' or DATASET=='all':
        validate_shuffle = np.arange(len(validate))
        np.random.shuffle(validate_shuffle)
        save_zarr(validate[validate_shuffle].reshape(np.take(validate.shape, [0,2,3,4])), CITY, 'labels_conv_valid', path=DATA_DIR)

    if DATASET == 'test' or DATASET=='all':
        test_shuffle = np.arange(len(test))
        np.random.shuffle(test_shuffle)
        save_zarr(test[test_shuffle].reshape(np.take(test.shape, [0,2,3,4])), CITY, 'labels_conv_test', path=DATA_DIR)
        
    del _, train, validate, test, label
    
    if WINDOW:
        window = center_window(images[i], (WINDOW_SIZE[0]*TILE_SIZE[0], WINDOW_SIZE[1]*TILE_SIZE[1]))
        image = np.array(read_raster(images[i], window=window))
    else:
        image = np.array(read_raster(images[i]))
        
    h,w,b = image.shape
    image = image.reshape(1,h,w,b)
    image = tile_sequences(image,  tile_size=TILE_SIZE)
    image = np.delete(image, exclude, 0)
    dum_ = image # comment in prod
    
    _, train, validate, test = sample_split(image, np.delete(samples.flatten(), exclude))
    if DEBUG:
        print("New Image Shape:", image.shape)
        
    if DATASET == 'train' or DATASET=='all':
        save_zarr(train[train_shuffle].reshape(np.take(train.shape, [0,2,3,4])), CITY, 'images_conv_train', path=DATA_DIR)
    if DATASET == 'validate' or DATASET=='all':
        save_zarr(validate[validate_shuffle].reshape(np.take(validate.shape, [0,2,3,4])), CITY,'images_conv_valid', path=DATA_DIR)
    if DATASET == 'test' or DATASET=='all':
        save_zarr(test[test_shuffle].reshape(np.take(test.shape, [0,2,3,4])), CITY,'images_conv_test', path=DATA_DIR) 
    del _, train, validate, test, image, exclude
    print(f'------ {label_dates[i]}')

    gc.collect(generation=2)
del samples, images, labels

if DATASET == 'train' or DATASET=='all':
    #%% 
    # Generate a balanced (upsampled) dataset and shuffle it..
    delete_zarr_if_exists(CITY, 'labels_conv_train_balanced', path=DATA_DIR)
    delete_zarr_if_exists(CITY, 'images_conv_train_balanced', path=DATA_DIR)
    delete_zarr_if_exists(CITY, 'labels_conv_train_balanced_shuffled', path=DATA_DIR)
    delete_zarr_if_exists(CITY, 'images_conv_train_balanced_shuffled', path=DATA_DIR)
    if BALANCE:
        print('\n--- Generate a balanced (upsampled) dataset..')
        balance(CITY, path=DATA_DIR)
    print('--- Shuffle dataset..')
    shuffle(CITY, TILE_SIZE, (20,75), path=DATA_DIR)

print(f"--- Data prep complete for {CITY}")

------ 2011-01-01
------ 2013-05-26
------ 2013-09-23
------ 2014-05-23
------ 2014-07-14
------ 2015-04-26
------ 2015-05-01
------ 2016-03-29
------ 2016-09-18

--- Generate a balanced (upsampled) dataset..
--- Shuffle dataset..
------ Reordering array in batches of 20. Total 144 sets..
--------- Finished 50 sets
--------- Finished 100 sets
------ Shuffling array in batches of 75. Total 38 sets..
--------- Finished 15 sets
--------- Finished 30 sets
--- Data prep complete for aleppo


In [57]:
dum_.shape

(800, 1, 128, 128, 3)

<zarr.core.Array (2892, 128, 128, 3) float64>

In [70]:
def downsample(image, factor):
    print(image.shape)
    p, n, h, w, d = image.shape
    new_array = np.zeros((p, n, h//factor, w//factor, d))
    for i in range(p):
        new_array[i, 0, :, :, :] = np.array([[2,2],[2,2]]).reshape(2,2,1)
    return new_array

In [71]:
downsample(dum_)

(800, 1, 128, 128, 3)


array([[[[[2.],
          [2.]],

         [[2.],
          [2.]]]],



       [[[[2.],
          [2.]],

         [[2.],
          [2.]]]],



       [[[[2.],
          [2.]],

         [[2.],
          [2.]]]],



       ...,



       [[[[2.],
          [2.]],

         [[2.],
          [2.]]]],



       [[[[2.],
          [2.]],

         [[2.],
          [2.]]]],



       [[[[2.],
          [2.]],

         [[2.],
          [2.]]]]])

In [27]:
testTest(t)

array([[[[<class 'float'>, <class 'float'>, <class 'float'>],
         [<class 'float'>, <class 'float'>, <class 'float'>],
         [<class 'float'>, <class 'float'>, <class 'float'>],
         ...,
         [<class 'float'>, <class 'float'>, <class 'float'>],
         [<class 'float'>, <class 'float'>, <class 'float'>],
         [<class 'float'>, <class 'float'>, <class 'float'>]],

        [[<class 'float'>, <class 'float'>, <class 'float'>],
         [<class 'float'>, <class 'float'>, <class 'float'>],
         [<class 'float'>, <class 'float'>, <class 'float'>],
         ...,
         [<class 'float'>, <class 'float'>, <class 'float'>],
         [<class 'float'>, <class 'float'>, <class 'float'>],
         [<class 'float'>, <class 'float'>, <class 'float'>]],

        [[<class 'float'>, <class 'float'>, <class 'float'>],
         [<class 'float'>, <class 'float'>, <class 'float'>],
         [<class 'float'>, <class 'float'>, <class 'float'>],
         ...,
         [<class 'float'