In [1]:
via_annotations_file = "D:/FCAT/annotations/tiles2_VIA_annotations_800_313.csv"
tiling_scheme_file = "D:/FCAT/tiles2/tiling_scheme.json"
output_dir = "D:/FCAT/annotations/"
rasterfile = "D:/FCAT/FCAT2APPK.tif"


from pprint import pprint
import numpy as np

In [2]:
# this function imports necessary metadata from the tiling scheme file generated during the earlier tiling script
def import_tiling_scheme(tiling_scheme_file):
    import json
    from affine import Affine
    
    with open(tiling_scheme_file) as f:
        tiling_scheme = json.load(f)
    gt = tiling_scheme["transform"]
    geotransform = (gt[2], gt[0], gt[1], gt[5], gt[3], gt[4])
    geotransform = Affine.from_gdal(*geotransform)
    tiling_scheme["transform"] = geotransform
    return tiling_scheme

tiling_scheme = import_tiling_scheme(tiling_scheme_file)

In [3]:
### This section converts a VIA file into generic tile format containing: box, class, tile filename
### If you are using a different annotation format (coco, yolo, etc.) you will probably need your own
### parsing function to convert that into the format that subsequent functions accept
### (see "new_row" line below for desired output format)

def VIA_to_generic_tiles(via_annotations_file):
    import csv, json
    via_annotations_list = []

    # read each line, parse it, convert it, put it all back together
    # then drop it in the appropriate subset
    with open(via_annotations_file, "r") as f:
        reader = csv.reader(f, delimiter=",")
        for line in reader: 
            # skip empty line
            if not line[5]:
                continue
            if '{}' in line[5]:
                continue

            if 'filename' in line[0]:
                continue

            filename = line[0]

            # pulling from column named "region_shape_attributes"
            box_entry = json.loads(line[5])
            top_left_x, top_left_y, width, height = box_entry["x"], box_entry["y"], box_entry["width"], box_entry["height"]
            if width == 0 or height == 0:
                continue
                # skip tiny/empty boxes

            # convert from "top left and width/height" to "x and y values at each corner of the box"
            if top_left_x < 0: top_left_x = 1
            if top_left_y < 0: top_left_y = 1
            x1, x2, y1, y2 = top_left_x, top_left_x + width, top_left_y, top_left_y + height 

            # pulling from column named "region_attributes" to get class names
            class_name = next(iter(json.loads(line[6]).values()))

            # skip unknown class, in this case. Might be useful in other applications though, e.g. total count
            if class_name == "Unknown":
                continue

            # create the annotation row
            new_row = {'box': [(x1,y1), (x2,y1), (x2,y2), (x1,y2)], 'class': class_name, 'tile_ID': filename}

            # append the row to the our list
            via_annotations_list.append(new_row)

    return via_annotations_list

generic_tiles =  VIA_to_generic_tiles(via_annotations_file)
print(generic_tiles[0:5])

[{'box': [(682, 337), (800, 337), (800, 598), (682, 598)], 'class': 'Bottlebrush unk.', 'tile_ID': 'FCAT2APPK---69.png'}, {'box': [(195, 337), (457, 337), (457, 598), (195, 598)], 'class': 'Bottlebrush unk.', 'tile_ID': 'FCAT2APPK---70.png'}, {'box': [(324, 427), (587, 427), (587, 688), (324, 688)], 'class': 'Bottlebrush unk.', 'tile_ID': 'FCAT2APPK---70.png'}, {'box': [(446, 454), (708, 454), (708, 715), (446, 715)], 'class': 'Bottlebrush unk.', 'tile_ID': 'FCAT2APPK---70.png'}, {'box': [(720, 548), (800, 548), (800, 800), (720, 800)], 'class': 'Bottlebrush unk.', 'tile_ID': 'FCAT2APPK---70.png'}]


In [4]:
### This section converts tile annotations to orthomosaic annotations
### then reduces the bounding box format to a more efficient x1y1, x2y2 format
### which is the format we use for the non-max suppression functions

import copy
def tile_annotations_to_ortho(generic_tiles):
    ortho_tiles = copy.deepcopy(generic_tiles)
    for k, i in enumerate(ortho_tiles):
        bounding_box = np.array(i['box'])
        # update the new coordinates format from local tile coordinates to orthomosaic coordinates
        bounding_box = bounding_box + [tiling_scheme['tile_pointers']["image_locations"][i['tile_ID']]]
        ortho_tiles[k]['box'] = bounding_box.tolist()
    return(ortho_tiles)
ortho_annotations = tile_annotations_to_ortho(generic_tiles)

def pairs_to_xyxy(annotations):
    for k, i in enumerate(annotations):
        x_coordinates, y_coordinates = zip(*i['box'])
        x1 = min(x_coordinates)
        y1 = min(y_coordinates)
        x2 = max(x_coordinates)
        y2 = max(y_coordinates)
        # convert our bounding box from coordinates format back to x1/y1/x2/y2 format
        annotations[k]['box'] = [min(x_coordinates), min(y_coordinates), max(x_coordinates), max(y_coordinates)]
    return annotations
ortho_annotations = pairs_to_xyxy(ortho_annotations)
pprint(ortho_annotations[0:5])

### at this point we've converted our tile annotations to orthomosaic annotations, but there are likely redundancies
### especially if there is overlap between tiles. We'll need to use non-maximum supppression or a comparable algorithm
### to eliminate most/all of these redundancies

[{'box': [4578, 824, 4696, 1085],
  'class': 'Bottlebrush unk.',
  'tile_ID': 'FCAT2APPK---69.png'},
 {'box': [4578, 824, 4840, 1085],
  'class': 'Bottlebrush unk.',
  'tile_ID': 'FCAT2APPK---70.png'},
 {'box': [4707, 914, 4970, 1175],
  'class': 'Bottlebrush unk.',
  'tile_ID': 'FCAT2APPK---70.png'},
 {'box': [4829, 941, 5091, 1202],
  'class': 'Bottlebrush unk.',
  'tile_ID': 'FCAT2APPK---70.png'},
 {'box': [5103, 1035, 5183, 1287],
  'class': 'Bottlebrush unk.',
  'tile_ID': 'FCAT2APPK---70.png'}]


In [5]:
print(f'Counts before NMS')
for cl in list(set([i['class'] for i in ortho_annotations])):    
    temp = [i for i in ortho_annotations if i['class'] == cl]
    print(f'{cl}: {len(temp)}/{len(ortho_annotations)}')

Counts before NMS
Fan unk.: 324/10208
Oenocarpus bataua: 80/10208
Astrocaryum standleyanum: 8/10208
Attalea colenda: 12/10208
Palm unk.: 180/10208
Socratea exorrhiza: 76/10208
Iriartea deltoidea: 576/10208
Bottlebrush unk.: 8952/10208


In [6]:
### These functions implement "non-max suppression" to remove redundant boxes
### which are introduced by (scenario # 1) overlap between tiles and/or
### (scenario #2) computer vision proposals, natively.
### They eliminate duplicates based on either IOU (intersection over union) or IOC (intersection over candidate)
### IOU does better for similar boxes over the same object (scenario #2)
### whereas IOC does better for clipped portions of a box overlapping the complete box (scenario #1)

# sourced from Malisiewicz et al.
# https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/

def non_max_suppression_iou(boxes, overlapThresh, probs=None):

    ### this verion performs an intersect-over-union, comparing overlap to the combined areas
    ### good for eliminating similar overlapping boxes using a generous overlap threshold
    
    import numpy as np

    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []

    # if the bounding boxes are integers, convert them to floats -- this
    # is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")

    # initialize the list of picked indexes
    pick = []

    # grab the coordinates of the bounding boxes
    x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]

    # compute the area of the bounding boxes and grab the indexes to sort
    # (in the case that no probabilities are provided, simply sort on the
    # areas)
    
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = area

    # if probabilities are provided, sort on them instead
    if probs is not None:
        idxs = probs

    # sort the indexes
    idxs = np.argsort(idxs)

    # keep looping while some indexes still remain in the indexes list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the index value
        # to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        # find the largest (x, y) coordinates for the start of the bounding
        # box and the smallest (x, y) coordinates for the end of the bounding
        # box for the shared zone
        
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
        
        
        # compute the width and height of the intersection box
        #w, h = np.maximum(0, xx2 - xx1 + 1), np.maximum(0, yy2 - yy1 + 1)
        w, h = np.maximum(0, xx2 - xx1 + 1), np.maximum(0, yy2 - yy1 + 1)

        # compute the ratio of overlap between shared area and candidates
        overlap_ratio = (w * h) / ((area[idxs[:last]] + area[i]) - (w * h))
        tf = np.array(overlap_ratio) > overlapThresh
        
        # delete all indexes from the index list that have overlap greater
        # than the provided overlap threshold

        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(np.array(overlap_ratio) > overlapThresh)[0])))
        

    # return the index of the bounding boxes that were picked
    #print(f'{len(pick)} items remaining')
    return pick

def non_max_suppression_ioc(boxes, overlapThresh, probs=None):

    ### this version performs an intersect-over-candidate, comparing overlap to the candidate box's area
    ### good for eliminating similar clipped parts of the same box using a strict overlap threshold
    
    import numpy as np

    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []

    # if the bounding boxes are integers, convert them to floats -- this
    # is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")

    # initialize the list of picked indexes
    pick = []

    # grab the coordinates of the bounding boxes
    x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]

    # compute the area of the bounding boxes and grab the indexes to sort
    # (in the case that no probabilities are provided, simply sort on the
    # areas)
    
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = area

    # if probabilities are provided, sort on them instead
    if probs is not None:
        idxs = probs

    # sort the indexes
    idxs = np.argsort(idxs)

    # keep looping while some indexes still remain in the indexes list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the index value
        # to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        # find the largest (x, y) coordinates for the start of the bounding
        # box and the smallest (x, y) coordinates for the end of the bounding
        # box for the shared zone
        
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
        
        
        # compute the width and height of the intersection box
        #w, h = np.maximum(0, xx2 - xx1 + 1), np.maximum(0, yy2 - yy1 + 1)
        w, h = np.maximum(0, xx2 - xx1 + 1), np.maximum(0, yy2 - yy1 + 1)

        # compute the ratio of overlap between shared area and candidates
        overlap_ratio = (w * h) / area[idxs[:last]]
        tf = np.array(overlap_ratio) > overlapThresh
        
        # delete all indexes from the index list that have overlap greater
        # than the provided overlap threshold
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(np.array(overlap_ratio) > overlapThresh)[0])))
        

    # return the index of the bounding boxes that were picked
    #print(f'{len(pick)} items remaining')
    return pick

In [7]:

def nms_iou_on_ortho_annotations(ortho_annotations):
    nms_detection_list=[]
    
    # this loop makes sure we only eliminate redundancies of the same class
    for cl in list(set([i['class'] for i in ortho_annotations])):
        temp = [i for i in ortho_annotations if i['class'] == cl]
        bboxes = np.array([i['box'] for i in temp])
        pick = non_max_suppression_iou(bboxes, 0.4)
        for i in pick:
            nms_detection_list.append(temp[i])
    return nms_detection_list

ortho_annotations_nms = nms_iou_on_ortho_annotations(ortho_annotations)
print(f'{len(ortho_annotations_nms)} remaining from {len(ortho_annotations)} before NMS')

def nms_ioc_on_ortho_annotations(ortho_annotations):
    nms_detection_list=[]
    
    # this loop makes sure we only eliminate redundancies of the same class
    for cl in list(set([i['class'] for i in ortho_annotations])):    
        temp = [i for i in ortho_annotations if i['class'] == cl]
        bboxes = np.array([i['box'] for i in temp])
        pick = non_max_suppression_ioc(bboxes, 0.9)
        for i in pick:
            nms_detection_list.append(temp[i])
    return nms_detection_list

ortho_annotations_nms = nms_ioc_on_ortho_annotations(ortho_annotations)

def xyxy_to_pairs(annotations2):
    annotations = copy.deepcopy(annotations2)
    for k, i in enumerate(annotations):
        bounding_box = [[i['box'][0], i['box'][1]], [i['box'][2], i['box'][1]], [i['box'][2], i['box'][3]], [i['box'][0], i['box'][3]]]
        annotations[k]['box'] = bounding_box
    return annotations

ortho_annotations_nms = xyxy_to_pairs(ortho_annotations_nms)
print(f'{len(ortho_annotations_nms)} remaining from {len(ortho_annotations)} before NMS')
pprint(ortho_annotations_nms[0:5])

4193 remaining from 10208 before NMS
2552 remaining from 10208 before NMS
[{'box': [[6726, 2779], [6989, 2779], [6989, 3040], [6726, 3040]],
  'class': 'Fan unk.',
  'tile_ID': 'FCAT2APPK---318.png'},
 {'box': [[7331, 14416], [7594, 14416], [7594, 14677], [7331, 14677]],
  'class': 'Fan unk.',
  'tile_ID': 'FCAT2APPK---1784.png'},
 {'box': [[21939, 12619], [22202, 12619], [22202, 12880], [21939, 12880]],
  'class': 'Fan unk.',
  'tile_ID': 'FCAT2APPK---1569.png'},
 {'box': [[22055, 13037], [22318, 13037], [22318, 13298], [22055, 13298]],
  'class': 'Fan unk.',
  'tile_ID': 'FCAT2APPK---1631.png'},
 {'box': [[23259, 12948], [23522, 12948], [23522, 13209], [23259, 13209]],
  'class': 'Fan unk.',
  'tile_ID': 'FCAT2APPK---1633.png'}]


In [8]:
print(f'Counts after NMS')
for cl in list(set([i['class'] for i in ortho_annotations])):    
    temp = [i for i in ortho_annotations if i['class'] == cl]
    print(f'{cl}: {len(temp)}/{len(ortho_annotations_nms)}')

Counts after NMS
Fan unk.: 324/2552
Oenocarpus bataua: 80/2552
Astrocaryum standleyanum: 8/2552
Attalea colenda: 12/2552
Palm unk.: 180/2552
Socratea exorrhiza: 76/2552
Iriartea deltoidea: 576/2552
Bottlebrush unk.: 8952/2552


In [9]:
# This section converts from orthomosaic annotations to global annotations
def global_transform(box, geotransform):   
    for k, point in enumerate(box):
        point = geotransform * point
        box[k] = point
    return box

def ortho_annotations_to_global(ortho_annotations2, geotransform):
    ortho_annotations = copy.deepcopy(ortho_annotations2)
    for k, i in enumerate(ortho_annotations):
        ortho_annotations_nms[k]['box'] = global_transform(i['box'], geotransform)
    return ortho_annotations
    
global_annotations = ortho_annotations_to_global(ortho_annotations_nms, tiling_scheme["transform"])

pprint(global_annotations[0:5])

[{'box': [(-79.67087372308067, 0.3740214239912682),
          (-79.67076577131166, 0.3740214239912682),
          (-79.67076577131166, 0.3739135733102682),
          (-79.67087372308067, 0.3739135733102682)],
  'class': 'Fan unk.',
  'tile_ID': 'FCAT2APPK---318.png'},
 {'box': [(-79.67062539296568, 0.36921277121426815),
          (-79.67051744119667, 0.36921277121426815),
          (-79.67051744119667, 0.36910492053326815),
          (-79.67062539296568, 0.36910492053326815)],
  'class': 'Fan unk.',
  'tile_ID': 'FCAT2APPK---1784.png'},
 {'box': [(-79.66462934946166, 0.3699553293512682),
          (-79.66452139769267, 0.3699553293512682),
          (-79.66452139769267, 0.3698474786702682),
          (-79.66462934946166, 0.3698474786702682)],
  'class': 'Fan unk.',
  'tile_ID': 'FCAT2APPK---1569.png'},
 {'box': [(-79.66458173575367, 0.36978260297326815),
          (-79.66447378398468, 0.36978260297326815),
          (-79.66447378398468, 0.36967475229226815),
          (-79.6645817357536

In [10]:
### During first runs I encountered an error where Fiona didn't recognize the CRS
### some error tracing suggested that this was because my spatial packages were not
### all installed in my conda environment using conda (some used pip)
### and when I tried to rectify this by reinstalling in Conda the result was that
### fiona couldn't access GDAL at all anymore
### the solution I found was to create an environment with python 3.6, gdal 3.0.2
### as described here: https://github.com/OSGeo/gdal/issues/6569
### your experience may vary, if you are not using a conda environment or a windows system

def shapefile_write_out(global_annotations, tiling_scheme, output_dir):
        
    import os, fiona
    from collections import OrderedDict

    output_path = "{o}/annotations.shp".format(o=output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    schema = {
        'geometry': 'Polygon',
        'properties': OrderedDict([
            ('TileID', 'str'),
            ('Class', 'str')
        ])
    }
    
    crs = tiling_scheme['spatial_reference']
    
    with fiona.open(output_path,
                   'w',
                    driver='ESRI Shapefile',
                    crs=crs,
                    schema=schema) as c:
                   
                    for num, i in enumerate(global_annotations):
                        record = {
                            'geometry': {'coordinates': [np.array(i['box']).astype(float)], 'type': 'Polygon'},
                            'id': num,
                            'properties': OrderedDict([('TileID', i['tile_ID']),
                                                       ('Class', i['class']),
                                                       ]),
                            'type': 'Feature'}
                        #print(record)
                        c.write(record)
    return output_path

shapefile_output = shapefile_write_out(global_annotations, tiling_scheme, output_dir)
print(f"annotations saved at {shapefile_output}")

annotations saved at D:/FCAT/annotations//annotations.shp
