# Prepare SpaceNet 7 Data for Model Training

We assume that initial steps of README have been executed and that this notebook is running in a docker container.  See the `src` directory for functions used in the algorithm.  

In [1]:
# Dataset location (edit as needed)
root_dir = '/Midgard/Data/hfang/sn7/'

In [2]:
import multiprocessing
import pandas as pd
import numpy as np
import skimage
import gdal
import sys
import os

import matplotlib as mpl
import matplotlib.cm as cmx
import matplotlib.pyplot as plt
import matplotlib.colors as colors
plt.rcParams.update({'font.size': 16})
mpl.rcParams['figure.dpi'] = 300

import solaris as sol
from solaris.raster.image import create_multiband_geotiff
from solaris.utils.core import _check_gdf_load

# import from data_prep_funcs
module_path = os.path.abspath(os.path.join('../src/'))
if module_path not in sys.path:
    sys.path.append(module_path)
from sn7_baseline_prep_funcs import map_wrapper, make_geojsons_and_masks

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
# Create Testing Masks (for evaluation)
# Multi-thread to increase speed
# We'll only make a 1-channel mask for now, but Solaris supports a multi-channel mask as well, see
#     https://github.com/CosmiQ/solaris/blob/master/docs/tutorials/notebooks/api_masks_tutorial.ipynb

aois = sorted([f for f in os.listdir(os.path.join(root_dir, 'test_public'))
               if os.path.isdir(os.path.join(root_dir, 'test_public', f))])
n_threads = 10
params = [] 
make_fbc = False

input_args = []
for i, aoi in enumerate(aois):
    print(i, "aoi:", aoi)
    im_dir = os.path.join(root_dir, 'test_public', aoi, 'images_masked/')
    json_dir = os.path.join(root_dir, 'test_public', aoi, 'labels_match/')
    out_dir_mask = os.path.join('/Midgard/home/hfang/sn7_test_public_gt/', 'test_public', aoi, 'masks/')
    out_dir_mask_fbc = os.path.join('/Midgard/home/hfang/sn7_test_public_gt/', 'test_public', aoi, 'masks_fbc/')
    os.makedirs(out_dir_mask, exist_ok=True)
    if make_fbc:
        os.makedirs(out_dir_mask_fbc, exist_ok=True)

    json_files = sorted([f
                for f in os.listdir(os.path.join(json_dir))
                if f.endswith('Buildings.geojson') and os.path.exists(os.path.join(json_dir, f))])
    for j, f in enumerate(json_files):
        # print(i, j, f)
        name_root = f.split('.')[0]
        json_path = os.path.join(json_dir, f)
        image_path = os.path.join(im_dir, name_root + '.tif').replace('labels', 'images').replace('_Buildings', '')
        output_path_mask = os.path.join(out_dir_mask, name_root + '.tif')
        if make_fbc:
            output_path_mask_fbc = os.path.join(out_dir_mask_fbc, name_root + '.tif')
        else:
            output_path_mask_fbc = None
            
        if (os.path.exists(output_path_mask)):
             continue
        else: 
            input_args.append([make_geojsons_and_masks, 
                               name_root, image_path, json_path,
                               output_path_mask, output_path_mask_fbc])

# execute 
print("len input_args", len(input_args))
print("Execute...\n")
with multiprocessing.Pool(n_threads) as pool:
    pool.map(map_wrapper, input_args)


0 aoi: L15-0358E-1220N_1433_3310_13
1 aoi: L15-0586E-1127N_2345_3680_13
2 aoi: L15-0614E-0946N_2459_4406_13
3 aoi: L15-1014E-1375N_4056_2688_13
4 aoi: L15-1200E-0847N_4802_4803_13
5 aoi: L15-1296E-1198N_5184_3399_13
6 aoi: L15-1389E-1284N_5557_3054_13
7 aoi: L15-1538E-1163N_6154_3539_13
8 aoi: L15-1615E-1206N_6460_3366_13
9 aoi: L15-1691E-1211N_6764_3347_13
len input_args 237
Execute...

  name_root:  name_root:  name_root:  name_root:  name_root:  name_root:global_monthly_2018_01_mosaic_L15-0586E-1127N_2345_3680_13_Buildings  name_root:  name_root:   global_monthly_2019_07_mosaic_L15-0586E-1127N_2345_3680_13_Buildings
       name_root:  name_root:global_monthly_2019_08_mosaic_L15-0358E-1220N_1433_3310_13_Buildingsglobal_monthly_2018_08_mosaic_L15-0614E-0946N_2459_4406_13_Buildings  global_monthly_2019_01_mosaic_L15-0586E-1127N_2345_3680_13_Buildingsglobal_monthly_2020_01_mosaic_L15-0586E-1127N_2345_3680_13_Buildings

global_monthly_2018_08_mosaic_L15-0358E-1220N_1433_3310_13_Buildings

--------
We are now ready to proceed with training and testing, see sn7_baseline.ipynb.