In [3]:
import rasterio as rio
import numpy as np

In [4]:
import pandas as pd
import skimage as ski
import matplotlib.pyplot as plt
from tqdm import tqdm
from glob import glob
from pathlib import Path
import re
import shutil

In [127]:
#Read in Atlanta CSVs
atlanta_dir = "/data/spacenet/AOI_6_Atlanta/"
atlanta_csv_dir = "/data/spacenet/AOI_6_Atlanta/summaryData"
atl_csvs = os.listdir(atlanta_csv_dir)
tiff_dir_dict = {}
for idx, csv in enumerate(atl_csvs):
    tiff_dir = os.path.join(atlanta_dir, csv.split('_Train')[0])
    csv = pd.read_csv(os.path.join(atlanta_csv_dir, csv))
    tiff_dir_dict[os.path.join(atlanta_dir, tiff_dir, 'Pan-Sharpen')] = csv


###DEBUGGING#
#tiff_dir_dict2 = {}
#tiff_dir_dict2['/data/spacenet/AOI_6_Atlanta/Atlanta_nadir10_catid_1030010003993E00/Pan-Sharpen'] = tiff_dir_dict['/data/spacenet/AOI_6_Atlanta/Atlanta_nadir10_catid_1030010003993E00/Pan-Sharpen']
#tiff_dir_dict = tiff_dir_dict2

In [128]:
atl_3band = os.path.join(atlanta_dir, 'train', '3band')
atl_masks = os.path.join(atlanta_dir, 'train', 'masks')
Path(atl_3band).mkdir(parents=True, exist_ok=True)
Path(atl_masks).mkdir(parents=True, exist_ok=True)

In [129]:
nadir_angle_list = []
for idx, key in enumerate(tiff_dir_dict):
    nadir_angle = key.split("nadir")[1].split('_')[0]
    nadir_angle_list.append(nadir_angle)

for nadir_angle in nadir_angle_list:
    nadir_3band = os.path.join(atlanta_dir, 'train', '3band', nadir_angle)
    nadir_masks = os.path.join(atlanta_dir, 'train', 'masks', nadir_angle)
    Path(nadir_3band).mkdir(parents=True, exist_ok=True)
    Path(nadir_masks).mkdir(parents=True, exist_ok=True)

In [130]:
numbers = re.compile(r'\d+(?:\.\d+)?')
for idx, key in enumerate(tiff_dir_dict):
    summary_csv_df = tiff_dir_dict[key]
    image_ids_arr = summary_csv_df['ImageId'].unique()
    nadir_angle = key.split("nadir")[1].split('_')[0]
    #Remove entries with "PAN" and "MS" in front
    del_list = []
    for idx, value in enumerate(image_ids_arr):
        if value[0] != 'A':
            del_list.append(idx)

    image_ids_arr = np.delete(image_ids_arr, np.array(del_list))
    
    ###DEBUGGING
    #image_ids_arr = image_ids_arr[0:500]

    image_lookup_dict = {}

    file_paths = glob(os.path.join(key, '*'))

    print('Finding dimensions of each tiff...')
    for image_id in tqdm(image_ids_arr):

        #Open TIFF to get size
        file_flag = False

        for file_path in file_paths:
            filename = file_path.split('/')[-1]

            if filename.find(image_id) != -1:

                raster = rio.open(file_path)
                tiff_h, tiff_w = raster.height, raster.width
                file_flag = True
                raster.close()
                shutil.copy(file_path, os.path.join(atl_3band, nadir_angle, filename))
                break
        if file_flag:
            image_lookup_dict[image_id] = ((tiff_h, tiff_w))
        else:
            image_lookup_dict[image_id] = ((0, 0))

    print('Gathering polygons for each tiff...')
    image_polygons_dict = {}
    for image_id in tqdm(image_ids_arr):
        curr_id_rows = summary_csv_df[summary_csv_df['ImageId'] == image_id]
        image_polygons_dict[image_id] = []
        for idx, row in curr_id_rows.iterrows():
            image_polygons_dict[image_id] += [row['PolygonWKT_Pix']]

    print('Building masks for each tiff...')
    for image_id in tqdm(image_ids_arr):
        image_dim = image_lookup_dict[row['ImageId']]
        if image_dim == (0, 0):
            #No matching tiff file
            break

        curr_id_polys = image_polygons_dict[image_id]
        masked_arr = np.zeros(image_dim)
        #try:
        for poly in curr_id_polys:
            if poly.find('EMPTY') != -1:
                polygon_arr = np.empty(image_dim)
            else:
                polygon_arr = np.array(list(numbers.findall(i) for i in poly.split("((")[1].split("))")[0].split(','))).astype(float)

            y_arr = polygon_arr[:,0:1].flatten()
            x_arr = polygon_arr[:,1:2].flatten()
            
            rr, cc = ski.draw.polygon(x_arr, y_arr, image_dim)
            masked_arr[rr,cc] = 1

        masked_arr = masked_arr.astype(np.uint8)
        np.save(os.path.join(atl_masks, nadir_angle, image_id + '_mask.npy'), masked_arr)
    #except:
    #    print('Error with: ', image_id)
    #    exit()


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [00:13<00:00, 77.60it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 171.83it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 35.70it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [00:07<00:00, 135.93it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 173.92it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:27<00:00, 36.91it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [00:25<00:00, 39.81it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 171.13it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:27<00:00, 36.73it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [00:25<00:00, 39.93it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 172.92it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:27<00:00, 36.44it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [00:30<00:00, 33.82it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 173.28it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 36.17it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 35.20it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 171.26it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:27<00:00, 36.46it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [00:58<00:00, 17.30it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 173.21it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 36.08it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:45<00:00,  9.65it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 172.16it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 35.47it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:25<00:00, 11.93it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 173.24it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:29<00:00, 35.09it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:27<00:00, 11.62it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 170.73it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 35.91it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:55<00:00,  8.81it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 173.36it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 35.77it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:24<00:00, 12.04it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 174.59it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:27<00:00, 36.53it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:25<00:00, 11.88it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 170.67it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 36.14it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:35<00:00, 10.68it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 173.68it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 35.90it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:38<00:00, 10.36it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 171.12it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 36.11it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:39<00:00, 10.19it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 172.15it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 36.03it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:22<00:00, 12.29it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 171.80it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 36.28it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:15<00:00, 13.49it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 175.69it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 35.97it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:46<00:00,  9.58it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 170.18it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:27<00:00, 36.79it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:45<00:00,  9.64it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 173.38it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:27<00:00, 36.59it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:22<00:00, 12.30it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 173.10it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:29<00:00, 35.01it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:46<00:00,  9.53it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 170.85it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 35.83it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:44<00:00,  9.72it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 172.52it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 35.27it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:42<00:00,  9.90it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 171.96it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:28<00:00, 35.23it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:38<00:00, 10.30it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 173.88it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:29<00:00, 34.89it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:02<00:00, 16.28it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 171.62it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:30<00:00, 33.55it/s]


Finding dimensions of each tiff...


100%|██████████| 1019/1019 [01:20<00:00, 12.66it/s]


Gathering polygons for each tiff...


100%|██████████| 1019/1019 [00:05<00:00, 171.43it/s]


Building masks for each tiff...


100%|██████████| 1019/1019 [00:29<00:00, 35.07it/s]
