In [None]:
import numpy as np
import pandas as pd
import yaml
import json
import glob
import os
import warnings
import pymap3d
from tqdm import tqdm

In [None]:
label_csv = "/home/auv/Downloads/to_jackson/finetuning/m5169_4classes_18dives/sampled_images_tasmania_2008_18dives_dan_pred.csv"
existingdivedir = "/home/auv/data/jhs/datasets/2008/Tasmania200810/tasmania-2008-datasets/dives"
output_dir = os.path.join("cocos", "4_class")


In [None]:
df = pd.read_csv(label_csv)


In [None]:
dive_list = [os.path.basename(os.path.dirname(path)) for path in glob.glob(existingdivedir + "/*/")]
divenum_to_divestr = {int(d.split('_')[-2]):d for d in dive_list}
divestr_to_divenum = {v:k for k,v in divenum_to_divestr.items()}


def tkrelpath_to_dive(relpath):
    divenum = int(relpath.split('tasmania_')[1].split('/')[0])
    return divenum_to_divestr[divenum]

In [None]:
dive_to_series = {d:[] for d in dive_list}
count = 0
for index, row in df.iterrows():
    d = tkrelpath_to_dive(row['relative_path'])
    if d in dive_to_series:
        dive_to_series[d].append(row)
    else:
        warnings.warn("Dive %s not available" % d)
dive_to_df = {k: pd.DataFrame(v) for k,v in dive_to_series.items()}

In [None]:
def divedf_to_coco(divedf, divename, output_coco):
    images = []
    cats = []
    anns = []
    datum = None
    uniq_cids = []
    pbar = tqdm(total=len(divedf), desc=dive)
    for index, row in divedf.iterrows():
        pbar.update(1)
        if datum is None:
            datum = [row['latitude [deg]'], row['longitude [deg]']]
        iid = index
        aid = index
        cid = row['pred']
        fn = os.path.basename(row['relative_path'])
        north,east,down = pymap3d.geodetic2ned(lat=row['latitude [deg]'], lon=row['longitude [deg]'], h=0., lat0=datum[0], lon0=datum[1], h0=0.)
        im = {
            "file_name": fn,
            "geo_location": [row['latitude [deg]'], row['longitude [deg]'], row['bathymetry [m]']],
            "height": 1024,
            "width": 1360,
            "id": iid,
            "path": os.path.join('dummy', divename, fn),
            "pose": {
                "orientation": [
                    row['roll [deg]'] * np.pi /180.0,
                    row['pitch [deg]'] * np.pi /180.0,
                    row['heading [deg]'] * np.pi /180.0],
                "altitude": row['altitude [m]'],
                "position": [north, east, down]
            }
        }
        ann = {
            "annotation_type": "point",
            "area": 10,
            "bbox": [1,2,3,4],
            "category_id": cid,
            "id": aid,
            "iscrowd": False,
            "image_id": iid,
            "occluded": False
        }
        if cid not in uniq_cids:
            uniq_cids.append(cid)
    
        images.append(im)
        anns.append(ann)
    pbar.close()
    for cid in uniq_cids:
        cat = {
            "id": cid,
            "name": str(cid),
            "supercategory": ""
        }
        cats.append(cat)

    dataset = {
        "images": images,
        "annotations": anns,
        "categories": cats
    }
    json.dump(dataset, open(output_coco, 'w'), indent=4)


In [None]:
os.makedirs(output_dir, exist_ok=True)

for dive, ddf in dive_to_df.items():
    divedf_to_coco(ddf, dive, os.path.join(output_dir, dive + ".json"))

In [None]:
ss = df.loc[0]['relative_path']
print(ss)
row

In [None]:
count = 0
tmp = []
for index, row in df.iterrows():
    tmp.append(row)
    count += 1
    if count > 10:
        break

In [None]:
tmp_df = pd.DataFrame(tmp)