In [1]:
import numpy as np
import geopandas as gpd
import geojson
import os
import sys
import pandas as pd
import glob
import shutil
import datetime
import rasterio
import fiona
import skimage
import shapely
import cv2
import multiprocessing


from osgeo import gdal
from osgeo import ogr
from osgeo import osr

import argparse
from tqdm import tqdm

from osgeo import ogr
from utils import convert_poly_coords, create_mask, convert, map_wrapper
from tile_utils import slice_im_plus_boxes

In [2]:
def count_unique_index(df, by):
    return df.groupby(by).size().reset_index().rename(columns={0: 'count'})

In [3]:
dir_str = 'data/train/RarePlanes_train_geojson_aircraft_tiled/geojson_aircraft_tiled/'
directory = os.fsencode(dir_str)

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    file_base = filename.rsplit('.', -1)[0]
    print(file_base)

    label_path = os.path.join(dir_str+file_base + '.geojson')
    img_path = os.path.join(dir_str+file_base + '.')

    if filename.endswith(".geojson"):
        geojson_path = dir_str+filename
        with open(geojson_path) as f:
            gjson = geojson.load(f)
    else:
        continue

In [4]:
verbose = True
mask_burnValue = 255
sliceHeight, sliceWidth = 416, 416
cls_id = 0
valid_freq_iter = 6  # 6 corresponds to 1/6 of the data for validation
n_threads = 8

# data directory 
data_dir = 'data/'
data_dir_train = os.path.join(data_dir, 'train/RarePlanes_train_PS-RGB_tiled/PS-RGB_tiled/')

# output dirs
out_dir_root = 'wdata'
#out_dir_root = os.path.join(data_dir, 'data_yolov5_2')
# make dirs
for d in [out_dir_root]:
    os.makedirs(d, exist_ok=True)

# iterate through data, create masks, pngs, and labels
subdirs = sorted(os.listdir(data_dir_train))
shape_list = []
input_args = []


In [5]:
def prep_one(label_path, img_path, 
             subdir, suff, 
             out_dir_image, out_dir_label, out_dir_mask,
             out_path_image, out_path_label, out_path_mask, 
             sliceHeight=416, sliceWidth=416, mask_burnValue=255, 
             cls_id=0,
             verbose=True):
                    
    # ##################
    # # Image
    # ##################

    im_tmp = skimage.io.imread(img_path)
    h, w = im_tmp.shape[:2]
    aspect_ratio = 1.0 * h / w
    dx = np.abs(h - w)
    max_dx = 3

    ##################
    # Labels
    ##################
    
    if label_path:
        with fiona.open(label_path, "r") as annotation_collection:
            annotations = [feature["geometry"] for feature in annotation_collection]     
        # get pixel coords of bounding boxes
        boxes, dhs = [], []
        for a in annotations:
            geom = shapely.geometry.Polygon(a['coordinates'][0])
            pixel_geom = convert_poly_coords(geom, raster_src=img_path, 
                                                affine_obj=None, inverse=True,
                                                precision=2)
            # Get bounding box. object.bounds:  Returns a (minx, miny, maxx, maxy) tuple.
            minx, miny, maxx, maxy = pixel_geom.bounds
            boxes.append([minx, miny, maxx, maxy])
            dhs.append([maxy-miny])
        
        # set classes
        classes = len(boxes) * [cls_id]
    else:
        classes, boxes = [], []
                
    ##################
    # Process data
    ##################

    # create masks
    if out_path_mask and (not os.path.exists(out_path_mask)):
        create_mask(img_path, label_path, out_path_mask,
                    burnValue=mask_burnValue)
                        
    # tile data, if needed
    if suff == '_tile':
        # tile image, labels, and mask
        out_name = subdir + '_PS-RGB'
        # tile (also creates labels)
        # for training, skip highly overlapped edge tiles
        skip_highly_overlapped_tiles=True
        slice_im_plus_boxes(
            img_path, out_name, out_dir_image,
            boxes=boxes, yolo_classes=classes, out_dir_labels=out_dir_label,
            mask_path=out_path_mask, out_dir_masks=out_dir_mask,
            sliceHeight=sliceHeight, sliceWidth=sliceWidth,
            overlap=0.1, slice_sep='|',
            skip_highly_overlapped_tiles=skip_highly_overlapped_tiles,
            out_ext='.png', verbose=False)
    
    else:
        # no tiling
        # first let's process images, then later we'll make labels
        
        # simply copy to dest folder if object is yuge
        if suff == '_yuge':
            shutil.copyfile(img_path, out_path_image)
            hfinal, wfinal = h, w
        
        # simply copy to dest folder if aspect ratio is reasonable
        elif (0.9 < aspect_ratio < 1.1):
            shutil.copyfile(img_path, out_path_image)
            hfinal, wfinal = h, w
            
        # else let's add a border on right or bottom,
        #  (which doesn't affect pixel coords of labels).
        else:
            topBorderWidth, bottomBorderWidth, leftBorderWidth, rightBorderWidth = 0, 0, 0, 0
            if h / w > 1.1:
                rightBorderWidth = np.abs(h - w)
            if h / w < 0.9:
                bottomBorderWidth = np.abs(h - w)
            # add border to image
            # im_tmp = cv2.imread(out_path_image, 1) # make everything 3-channel?
            outputImage = cv2.copyMakeBorder(
                            im_tmp,
                            topBorderWidth,
                            bottomBorderWidth,
                            leftBorderWidth,
                            rightBorderWidth,
                            cv2.BORDER_CONSTANT,
                            value=0)
            skimage.io.imsave(out_path_image, outputImage)
            # cv2.imwrite(out_path_image, outputImage)
            hfinal, wfinal = outputImage.shape[:2]
            
            if out_path_mask:
                # add border to mask
                im_tmp2 = skimage.io.imread(out_path_mask)
                #im2 = cv2.imread(out_path_mask, 0)
                outputImage2 = cv2.copyMakeBorder(
                                im_tmp2,
                                topBorderWidth,
                                bottomBorderWidth,
                                leftBorderWidth,
                                rightBorderWidth,
                                cv2.BORDER_CONSTANT,
                                value=0)
                skimage.io.imsave(out_path_mask, outputImage2)
                # cv2.imwrite(out_path_mask, outputImage2)

        # make yolo labels
        if out_path_label:
            txt_outfile = open(out_path_label, "w")
            # create yolo style labels
            for class_tmp,box_tmp in zip(classes, boxes):
                minx, miny, maxx, maxy = box_tmp
                bb = convert((wfinal, hfinal), [minx, maxx, miny, maxy])
                # (xb,yb,wb,hb) = bb
                if (np.min(bb) < 0) or (np.max(bb) > 1):
                    print("  yolo coords:", bb)
                    raise ValueError("  coords outside bounds, breaking!")
                outstring = str(class_tmp) + " " + " ".join([str(a) for a in bb]) + '\n'
                if verbose: 
                    print("  outstring:", outstring.strip())
                txt_outfile.write(outstring)
            txt_outfile.close()

In [6]:
list_of_files = []

for i, subdir in enumerate(subdirs):
    # if i > 200:
    #     break
    print("\n")
    print(i, "/", len(subdirs), subdir)
    suff = ''

    ##################
    # Data Locs 
    ##################
    file_base = subdir.rsplit('.', -1)[0]
    file_ext = subdir.rsplit('.', -1)[1]
    subdir = subdir.rsplit('.', -1)[0]
    

    #Check for duplicates

    if file_base in list_of_files:
        continue
    else:
        list_of_files.append(file_base)

    print(file_base)
    print(file_ext)

    if file_ext == 'xml':
        continue

    label_path = os.path.join(data_dir_train + file_base + '.geojson')
    img_path = os.path.join(data_dir_train+file_base + '.png')

    print(label_path)
    print(img_path)

    out_dir_tiff = os.path.join(out_dir_root, 'train')
    os.makedirs(out_dir_tiff, exist_ok=True)
            
    ##################
    # Image




    ##################
    
    im_tmp = skimage.io.imread(img_path) #  (assume pan is the same size as ps-rgb will be!)
    # im_tmp = skimage.io.imread(ps_rgb_path)
    h, w = im_tmp.shape[:2]
    shape_list.append([subdir, h, w])
    # im_tmp = skimage.io.imread(pan_path)
    # h, w = im_tmp.shape
    # shape_list.append([subdir + '_PAN', h, w])
    aspect_ratio = 1.0 * h / w
    dx = np.abs(h - w)
    max_dx = 3

    ##################
    # Labels
    ##################
    
    with fiona.open(label_path, "r") as annotation_collection:
        annotations = [feature["geometry"] for feature in annotation_collection]      
    if verbose:
        print("  h, w:", h, w)
        print("  aspect_ratio:", aspect_ratio)
        print("  n annotations:", len(annotations))
                
    ##################
    # Set output paths
    ##################
    
    # put every fifth item in valid
    if (i % valid_freq_iter) == 0:
        pop = 'valid'
    else:
        pop = 'train'
        
    # check if it's a huge square image (these all have a large circle centered in the middle,
    #  so we can skip for training)
    if (((h >= 600) and (w >= 600) and (dx <= max_dx)) \
        or ((h >= 1000) and (w >= 1000) and (0.97 < aspect_ratio < 1.03))) \
        and (len(annotations) == 1):        # skipped in original non-tiling version
        # or (h * w > 800 * 800):  # original version (no tiling)
        suff = '_yuge'
    
    # # also can skip if the labels are huge
    # elif np.max(dhs) > sliceHeight:
    #     suff = '_yuge'
    
    # look for large images with multiple annotatios
    elif ((h >= 600) and (w >= 600)) and \
        (len(annotations) > 1):        
        suff = '_tile'
    
    else:
        suff = ''

    # set output folders
    # out_dir_tiff = os.path.join(out_dir_root, pop, 'PS-RGB' + suff)
    out_dir_image = os.path.join(out_dir_root, pop, 'images' + suff)
    out_dir_label = os.path.join(out_dir_root, pop, 'labels' + suff)
    out_dir_mask = os.path.join(out_dir_root, pop, 'masks' + suff)        
    for d in [out_dir_image, out_dir_label, out_dir_mask]:
        os.makedirs(d, exist_ok=True)
    # output files
    # ps_rgb_path = os.path.join(out_dir_tiff, subdir + '_PS-RGB.tif')
    out_path_image = os.path.join(out_dir_image, subdir + '.png')
    out_path_label = os.path.join(out_dir_label, subdir + '.txt')
    out_path_mask = os.path.join(out_dir_mask, subdir + '.png')        

    input_args.append([prep_one,
            label_path, img_path, 
            subdir, suff, 
            out_dir_image, out_dir_label, out_dir_mask,
            out_path_image, out_path_label, out_path_mask,
            sliceHeight, sliceWidth, mask_burnValue, 
            cls_id,
            verbose])



0 / 17445 100_1040010029990A00_tile_319.geojson
100_1040010029990A00_tile_319
geojson
data/train/RarePlanes_train_PS-RGB_tiled/PS-RGB_tiled/100_1040010029990A00_tile_319.geojson
data/train/RarePlanes_train_PS-RGB_tiled/PS-RGB_tiled/100_1040010029990A00_tile_319.png
  h, w: 512 512
  aspect_ratio: 1.0
  n annotations: 2


1 / 17445 100_1040010029990A00_tile_319.png


2 / 17445 100_1040010029990A00_tile_319.png.aux.xml


3 / 17445 100_1040010029990A00_tile_333.geojson
100_1040010029990A00_tile_333
geojson
data/train/RarePlanes_train_PS-RGB_tiled/PS-RGB_tiled/100_1040010029990A00_tile_333.geojson
data/train/RarePlanes_train_PS-RGB_tiled/PS-RGB_tiled/100_1040010029990A00_tile_333.png
  h, w: 512 512
  aspect_ratio: 1.0
  n annotations: 2


4 / 17445 100_1040010029990A00_tile_333.png


5 / 17445 100_1040010029990A00_tile_333.png.aux.xml


6 / 17445 100_1040010029990A00_tile_347.geojson
100_1040010029990A00_tile_347
geojson
data/train/RarePlanes_train_PS-RGB_tiled/PS-RGB_tiled/100_10400100

In [1]:
print("len input_args", len(input_args))
print("Execute...\n")
with multiprocessing.Pool(n_threads) as pool:
    pool.map(map_wrapper, input_args)


NameError: name 'input_args' is not defined