In [1]:
import pycocowriter.cocomerge
import csv
import json
import tempfile
from pynoddgcs.connect import GCS

In [2]:
SOURCE_BUCKET = "nmfs_odp_hq"
SOURCE_FILE = "nodd_tools/datasets/gfisher/annotations.json"
DESTINATION_FILE = "nodd_tools/datasets/gfisher/annotations_worms.json"

In [3]:
client = GCS()

In [4]:
with open('worms_map.csv', 'r') as f:
    reader = csv.reader(f)
    header = next(reader)
    name_map = {row[0]: row[1] for row in reader}

In [5]:
with tempfile.NamedTemporaryFile() as f:
    client.download(SOURCE_BUCKET, SOURCE_FILE, f.name)
    raw_coco = json.load(f)

In [6]:
108965130

108965130

## Updating "crowd" type categories to have the "iscrowd" attribute

"SCHOOL" and possibly others are "crowd" type categories.

In [7]:
crowd_categories = set(["SCHOOL"])
crowd_category_ids = set([cat["id"] for cat in raw_coco["categories"] if cat["name"] in crowd_categories])
for ann in raw_coco["annotations"]:
    if ann["id"] in crowd_category_ids:
        ann["iscrowd"] = 1
crowd_category_ids

{114}

In [8]:
pycocowriter.cocomerge.coco_remap_categories(raw_coco, name_map)
pycocowriter.cocomerge.coco_collapse_categories(raw_coco)
pycocowriter.cocomerge.coco_reindex_categories(raw_coco)
raw_coco['categories']

[{'name': 'Mycteroperca microlepis', 'id': 1},
 {'name': 'Serranus phoebe', 'id': 2},
 {'name': 'Seriola fasciata', 'id': 3},
 {'name': 'Rhomboplites aurorubens', 'id': 4},
 {'name': 'Lutjanus synagris', 'id': 5},
 {'name': 'Callionymidae', 'id': 6},
 {'name': 'Epinephelus morio', 'id': 7},
 {'name': 'Halichoeres', 'id': 8},
 {'name': 'Pterois', 'id': 9},
 {'name': 'Bodianus pulchellus', 'id': 10},
 {'name': 'Haemulon aurolineatum', 'id': 11},
 {'name': 'Lutjanus griseus', 'id': 12},
 {'name': 'Pomacentridae', 'id': 13},
 {'name': 'Seriola', 'id': 14},
 {'name': 'Pristigenys alta', 'id': 15},
 {'name': 'Caranx bartholomaei', 'id': 16},
 {'name': 'Balistes capriscus', 'id': 17},
 {'name': 'Lutjanus campechanus', 'id': 18},
 {'name': 'Chaetodon ocellatus', 'id': 19},
 {'name': 'Holacanthus bermudensis', 'id': 20},
 {'name': 'Mycteroperca phenax', 'id': 21},
 {'name': 'Cephalopholis cruentata', 'id': 22},
 {'name': 'Lutjanus', 'id': 23},
 {'name': 'Scomberomorus maculatus', 'id': 24},
 {'

## Upload the rectified file to GCS

In [9]:
with tempfile.NamedTemporaryFile('w') as f:
    json.dump(raw_coco, f)
    f.flush()
    client.upload(SOURCE_BUCKET, f.name, DESTINATION_FILE)

File /tmp/tmp7ong7bb1 uploaded to nodd_tools/datasets/gfisher/annotations_worms.json.
