## Auditing and Exporting Detections

#### Before running this script, make sure that your Google Drive folder contains the orthomosaic GeoTiff (`step 0`) and no other GeoTIFF files, and the `data.json` file you created (`step 1`) and the `new_detections.json` file you created (`step 4`). If you want to add your new CNN detections to the manually annotated detections you created in VIA, also add the JSON file you exported using VIA in `step 2`. You will need to input that file name directly, since it is not standardized in our workflow.

<a href="https://colab.research.google.com/github/gl7176/GreySealCNN/blob/master/5_export_detections.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
#####  <center> Be sure to update this hyperlink above if you clone and want to point to a different GitHub </center>

In [1]:
!pip install -U -q PyDrive
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# choose a local (colab) directory to store the data.
local_download_path = os.path.expanduser('data')
try:
  os.makedirs(local_download_path)
except: pass

# 2. Auto-iterate using the query syntax
#    https://developers.google.com/drive/v2/web/search-parameters

# set variable to the destination google drive folder you want to pull from
drive_folder = 'https://drive.google.com/drive/folders/1INuRNVKvKMy8L_Nb6lmoVbyvScWK0-0D'

# this bit points the code to that google drive folder
pointer = str("'" + drive_folder.split("/")[-1] + "'" + " in parents")

file_list = drive.ListFile(
    {'q': pointer}).GetList()

#    this bit pulls every file in the directory specified above
orthomosaic_file = {}
count = 0
for f in file_list:
  count += 1
  if count % 10 == 0:
    print(count)
  # 3. Create & download by id.
  fname = os.path.join(local_download_path, f['title'])
  if fname.endswith(".tif") or fname.ednswith(".json"):
    f_ = drive.CreateFile({'id': f['id']})
    f_.GetContentFile(fname)
    os.stat(fname)
    # if the file is a *.tif and larger than 100 mb we label it the orthomosaic
    if fname.endswith(".tif") and os.stat(fname).st_size > 10^8 :
      # if there are multiple orthomosaic files detected we spit an error
      if len(orthomosaic_file) != 0:
        raise Exception("more than one orthomosaic file identified based on size and type")
      orthomosaic_file = fname
      print("orthomosaic identified as " + orthomosaic_file)
  

10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230


In [6]:
import os
import argparse
import numpy as np
import json
import csv
!pip install rasterio
import rasterio

!shapely.geometry import mapping, Polygon
!pip install fiona
import fiona # only required for exporting to shapefiles

/bin/bash: shapely.geometry: command not found
Collecting fiona
[?25l  Downloading https://files.pythonhosted.org/packages/36/8b/e8b2c11bed5373c8e98edb85ce891b09aa1f4210fd451d0fb3696b7695a2/Fiona-1.8.17-cp36-cp36m-manylinux1_x86_64.whl (14.8MB)
[K     |████████████████████████████████| 14.8MB 322kB/s 
[?25hCollecting munch
  Downloading https://files.pythonhosted.org/packages/cc/ab/85d8da5c9a45e072301beb37ad7f833cd344e04c817d97e0cc75681d248f/munch-2.5.0-py2.py3-none-any.whl
Installing collected packages: munch, fiona
Successfully installed fiona-1.8.17 munch-2.5.0


In [37]:
# ingest the image
infile = "data/2015_02_02_hay_island_flight03_s110rgb_jpeg_mosaic_group1.tif"

img_dir = infile.split(".")[0]
prj_name = img_dir.split("/")[-1]
data_dir = img_dir.split("/")[0]

In [38]:
# open the output from seal_detection

with open('data/new_detections.json') as f:
    detected_labels = json.load(f)

In [40]:
# open the previous training data

with open(data_dir + '/via_SealCNN_TrainingData.json') as f:
    existing_labels = json.load(f)

In [41]:
len(detected_labels)

8

### Exporting to Shapefile

In [47]:
image_annotations = []
for key, value in detected_labels.items():
    print(key)
    annotation = [[key][0].split("/")[-1]]
    detections = []
    for item in value:
        box = item['box']
        detections.append(box)
        print(item)
    annotation.append(detections)
    image_annotations.append(annotation)

data/2015_02_02_hay_island_flight03_s110rgb_jpeg_mosaic_group1---211.png
{'box': [156, 901, 206, 985], 'label': 0, 'score': 0.8815857768058777}
{'box': [737, 767, 796, 824], 'label': 0, 'score': 0.8385722637176514}
{'box': [364, 428, 404, 470], 'label': 1, 'score': 0.7205950021743774}
{'box': [345, 554, 417, 611], 'label': 0, 'score': 0.6761151552200317}
{'box': [397, 806, 441, 840], 'label': 1, 'score': 0.6630175709724426}
{'box': [387, 678, 427, 720], 'label': 1, 'score': 0.6524453163146973}
{'box': [131, 265, 182, 356], 'label': 0, 'score': 0.6438854336738586}
{'box': [233, 325, 291, 380], 'label': 0, 'score': 0.6438494920730591}
{'box': [258, 458, 296, 505], 'label': 1, 'score': 0.6359509229660034}
{'box': [223, 302, 269, 336], 'label': 1, 'score': 0.6229948997497559}
{'box': [374, 403, 421, 434], 'label': 1, 'score': 0.6110461354255676}
{'box': [367, 406, 418, 465], 'label': 0, 'score': 0.6066299080848694}
{'box': [779, 485, 853, 541], 'label': 0, 'score': 0.5931316614151001}
{'bo

In [48]:
scores = []
for key, value in detected_labels.items():
    for item in value:
        score = item['score']
        scores.append(score)

In [54]:
with open('data/data.json') as f:
  img_data = json.load(f)

image_bbox = []
for annotation in image_annotations:
    for detection in annotation[1]:
        try:
            local_bounding_box = np.array([[detection[0], detection[1]], [detection[2], detection[1]], [detection[2], detection[3]], [detection[0], detection[3]]]).astype(int)
            image_located_bb = local_bounding_box + [img_data["image_locations"][annotation[0]]]
            image_bbox.append(image_located_bb)
            
        except ValueError: # if the image doesn't have a detection
            pass

In [55]:
bbox = []
for annotation in image_bbox:
    x1 = annotation[0][0]
    y1 = annotation[0][1]
    x2 = annotation[1][0]
    y2 = annotation[2][1]
    bounding_box = [x1,y1,x2,y2]
    bbox.append(bounding_box)

In [58]:
# Malisiewicz et al.
# import the necessary packages
import numpy as np

def non_max_suppression(boxes, probs=None, overlapThresh=0.6):
    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []

    # if the bounding boxes are integers, convert them to floats -- this
    # is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")

    # initialize the list of picked indexes
    pick = []

    # grab the coordinates of the bounding boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    # compute the area of the bounding boxes and grab the indexes to sort
    # (in the case that no probabilities are provided, simply sort on the
    # bottom-left y-coordinate)
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = y2

    # if probabilities are provided, sort on them instead
    if probs is not None:
        idxs = probs

    # sort the indexes
    idxs = np.argsort(idxs)

    # keep looping while some indexes still remain in the indexes list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the index value
        # to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        # find the largest (x, y) coordinates for the start of the bounding
        # box and the smallest (x, y) coordinates for the end of the bounding
        # box
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])

        # compute the width and height of the bounding box
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)

        # compute the ratio of overlap
        overlap = (w * h) / area[idxs[:last]]

        # delete all indexes from the index list that have overlap greater
        # than the provided overlap threshold
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlapThresh)[0])))

    # return only the bounding boxes that were picked
    return boxes[pick].astype("int")

In [64]:
bboxes = np.array(bbox)
pick = non_max_suppression(bboxes, scores, 0.6)
pick_list = pick.tolist()

In [66]:
# ingest back in the coordinates of detections within an image referenced by their filename

with open('data/data.json') as f:
    img_data = json.load(f)

# open the satellite image
dataset = rasterio.open(infile)

geolocated_annotations_before_nms = []

for annotation in image_annotations:
    for detection in annotation[1]:
        try:            
            local_bounding_box = np.array([[detection[0], detection[1]], [detection[2], detection[1]], [detection[2], detection[3]], [detection[0], detection[3]]]).astype(int)
            image_located_bb = local_bounding_box + [img_data["image_locations"][annotation[0]]]
            
            geolocated_bb = []
            for point in image_located_bb:
                geolocated_bb.append(dataset.transform * point)
            geolocated_annotations_before_nms.append(geolocated_bb)
        except ValueError: # if the image doesn't have a detection
            pass

geolocated_annotations_after_nms = []
        
for box in pick_list:
    image_located_bb = np.array([[box[0], box[1]], [box[2], box[1]], [box[2], box[3]], [box[0], box[3]]]).astype(int)
            
    geolocated_bb = []
    for point in image_located_bb:
        geolocated_bb.append(dataset.transform * point)
    geolocated_annotations_after_nms.append(geolocated_bb)

In [1]:
print("before NMS: " + str(len(geolocated_annotations_before_nms)))
print("after NMS: " + str(len(geolocated_annotations_after_nms)))

NameError: name 'geolocated_annotations_before_nms' is not defined

In [69]:
len(geolocated_annotations_after_nms)

98

In [46]:
# write out the detections as a shapefile

from collections import OrderedDict
import fiona
from fiona.crs import from_epsg

# Define your schema as a polygon geom with a couple of fields
schema = {
    'geometry': 'Polygon',
    'properties': OrderedDict([
        ('ImageName', 'str'),
        ('Detection', 'str')
  ])
}

with fiona.open(
    'shapefiles_albatross/final/grandjason_seblob.shp',
    'w',
    driver='ESRI Shapefile',
    crs=dataset.crs,
    schema=schema) as c:
    
    for num, polygon in enumerate(geolocated_annotations_after_nms):
        record = {
            'geometry': {'coordinates': [polygon], 'type': 'Polygon'},
            'id': num,
            'properties': OrderedDict([('ImageName', infile),
                                       ('Detection', 'Albatross')
                                       ]),
            'type': 'Feature'}
        c.write(record)

### RetinaNet to Existing VIA

In [None]:
# add the new detections to the old via_region_data.json file

#"11_fiX1mEhK","[""2015_02_02_hay_island_flight03_s110rgb_jpeg_mosaic_group1---27.png""]",0,"[]","[2,509.275,929.174,89.376,48.904]","{}"

for filepath, detections in detected_labels.items():
    fn = filepath.split("/")[-1]
    # TODO is deep copy correct?
    for filename_size, metadata in existing_labels.items():
        if fn == metadata["filename"]:
            for detection in detections:
                # 'box' : [x1, y1, x2, y2]
                x1 = detection["box"][0]
                y1 = detection["box"][1]
                x2 = detection["box"][2]
                y2 = detection["box"][3]
                #print(x1,x2,y1,y2)
                metadata["regions"].append({'shape_attributes': {'name': 'rect', 'x': x1, 'y': y1, 'width': x2-x1, 'height': y2-y1}, 'region_attributes': {}})
    

In [None]:
# write out new VIA file with additional detections

with open(img_dir + '/via_region_data_detections.json', 'w') as fp:
    json.dump(existing_labels, fp)