In [1]:
import json
import csv
import tempfile
from pynoddgcs.connect import GCS

In [2]:
SOURCE_BUCKET = "nmfs_odp_hq"
SOURCE_FILE = "nodd_tools/datasets/gfisher/annotations_worms.json"
DESTINATION_FILE = "nodd_tools/datasets/gfisher/gfisher_fathomnet.csv"

In [3]:
client = GCS()

In [4]:
def generate_fathomnet_dataset_from_coco_object_detection(coco: dict):
    HEADER = ["concept", "image", "x", "y", "width", "height", "groupof"]
    yield HEADER

    imgIndex = {im['id']: im for im in coco['images']}
    catIndex = {cat['id']: cat for cat in coco['categories']}
    for annotation in coco['annotations']:
        concept = catIndex[annotation['category_id']]['name']
        image = imgIndex[annotation['image_id']]['coco_url']
        x, y, width, height = annotation['bbox']
        groupof = 'true' if 'iscrowd' in annotation and int(annotation['iscrowd']) else 'false'
        yield (concept, image, x, y, width, height, groupof)

In [5]:
with tempfile.NamedTemporaryFile() as f:
    client.download(SOURCE_BUCKET, SOURCE_FILE, f.name)
    raw_coco = json.load(f)

In [6]:
with open('gfisher_fathomnet.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerows(generate_fathomnet_dataset_from_coco_object_detection(raw_coco))

In [7]:
import pandas as pd

In [8]:
df = pd.read_csv('gfisher_fathomnet.csv')
df

Unnamed: 0,concept,image,x,y,width,height,groupof
0,Mycteroperca microlepis,https://storage.googleapis.com/nmfs_odp_hq/nod...,1762,536,136,72,False
1,Mycteroperca microlepis,https://storage.googleapis.com/nmfs_odp_hq/nod...,1770,546,114,54,False
2,Mycteroperca microlepis,https://storage.googleapis.com/nmfs_odp_hq/nod...,1773,546,93,51,False
3,Mycteroperca microlepis,https://storage.googleapis.com/nmfs_odp_hq/nod...,1776,540,74,56,False
4,Mycteroperca microlepis,https://storage.googleapis.com/nmfs_odp_hq/nod...,1771,536,63,54,False
...,...,...,...,...,...,...,...
640065,Lutjanus campechanus,https://storage.googleapis.com/nmfs_odp_hq/nod...,33,6,645,288,False
640066,Lutjanus campechanus,https://storage.googleapis.com/nmfs_odp_hq/nod...,36,3,444,249,False
640067,Lutjanus campechanus,https://storage.googleapis.com/nmfs_odp_hq/nod...,36,6,381,216,False
640068,Lutjanus campechanus,https://storage.googleapis.com/nmfs_odp_hq/nod...,34,-4,204,179,False


In [13]:
with open('gfisher_fathomnet.csv', 'r') as f:
    reader = csv.reader(f)
    for i in range(10):
        print(next(reader))

['concept', 'image', 'x', 'y', 'width', 'height', 'groupof']
['Mycteroperca microlepis', 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.00.000000.jpg', '1762', '536', '136', '72', 'false']
['Mycteroperca microlepis', 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.00.200000.jpg', '1770', '546', '114', '54', 'false']
['Mycteroperca microlepis', 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.00.400000.jpg', '1773', '546', '93', '51', 'false']
['Mycteroperca microlepis', 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.00.600000.jpg', '1776', '540', '74', '56', 'false']
['Mycteroperca microlepis', 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.00.800000.jpg', '1771', '536', '63', '5

In [9]:
df['image'][:10].tolist()

['https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.00.000000.jpg',
 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.00.200000.jpg',
 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.00.400000.jpg',
 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.00.600000.jpg',
 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.00.800000.jpg',
 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.01.000000.jpg',
 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.01.400000.jpg',
 'https://storage.googleapis.com/nmfs_odp_hq/nodd_tools/datasets/gfisher/2021_NCD-047b/2021_NCD-047b.mp4.00.00.

In [10]:
client.upload(SOURCE_BUCKET, 'gfisher_fathomnet.csv', DESTINATION_FILE)

File gfisher_fathomnet.csv uploaded to nodd_tools/datasets/gfisher/gfisher_fathomnet.csv.
