In [1]:
import numpy as np
import csv
import json
import os
from collections import defaultdict

In [2]:
def load_label_hierarchy(fn_tree):
    '''
    Load and parse label tree. 
    '''
    with open(fn_tree, 'r') as fh:
        json_data = json.loads(fh.read())

Load class hierarchy and name data.

In [5]:
with open('./data/annotations/bbox_labels_600_hierarchy.json', 'r') as fh:
    json_data = fh.read()
class_hierarchy = json.loads(json_data)

with open('./data/annotations/class-descriptions-boxable.csv', 'r') as fh:
    desc_data = fh.read().splitlines()

desc_data = [c.split(',') for c in desc_data]
cls_desc = {c[0].strip(): c[1].strip() for c in desc_data}
inv_cls_desc = {c[1].strip(): c[0].strip() for c in desc_data}

cls_desc['/m/0bl9f'] = 'Entity'
inv_cls_desc['Entity'] = '/m/0bl9f'

In [6]:
def convert_entry(entry, cls_desc):
    cls_name = cls_desc[entry['LabelName']]
    res_entry = {}
    for childname in ['Subcategory', 'Part']:
        if childname in entry:
            res_entry[childname] = [convert_entry(e, cls_desc) for e in entry[childname]]
    res_entry['LabelName'] = cls_name
    return res_entry

In [7]:
converted_hierarchy = convert_entry(class_hierarchy, cls_desc)

Give a class ID to each class, and build a hierarchy-based positive negative binary label map.

In [8]:
def list_names(entry, name_list=[]):
    name_list.append(entry['LabelName'])
    for childname in ['Subcategory', 'Part']:
        if childname in entry:
            for e in entry[childname]:
                list_names(e, name_list) 

In [10]:
nlist = []
list_names(converted_hierarchy, nlist)

# keep 'entity' in the index 0
name_list = nlist[0:1] + list(set(nlist[1:]))

name2idx = {k: name_list.index(k) for k in name_list}
idx2name = {name_list.index(k): k for k in name_list}

fn_idx2name = 'data/annotations/imagelabel_idx2cname.txt'
with open(fn_idx2name, 'w') as fh:
    for k, v in idx2name.items():
        one_line = '{}, {}\n'.format(k, v)
        fh.write(one_line)

fn_idx2cid = 'data/annotations/imagelabel_idx2cid.txt'
with open(fn_idx2cid, 'w') as fh:
    for k, v in idx2name.items():
        one_line = '{}, {}\n'.format(k, inv_cls_desc[v])
        fh.write(one_line)

Build the class tree list.
Each entry in the list contains its parent(s) and children.

In [11]:
def build_tree(entry, class_tree, inv_cls_desc, pname):
    cname = entry['LabelName']
    parent = pname + [cname] if cname is not 'Entity' else pname
    children = []

    for subname in ['Subcategory', 'Part']:
        if subname in entry:
            for e in entry[subname]:
                children.append(e['LabelName'])
                build_tree(e, class_tree, inv_cls_desc, parent)
                
    class_tree[cname]['parent'] += pname
    class_tree[cname]['children'] += children
    class_tree[cname]['class_id'].append(inv_cls_desc[cname])

In [12]:
class_tree = {cname: {'class_id': [], 'parent': [], 'children': []} for cname in name_list}
build_tree(converted_hierarchy, class_tree, inv_cls_desc, [])
# remove duplications
class_tree = {k: {vk: list(set(vv)) for vk, vv in v.items()}
              for k, v in class_tree.items()}

Load positive and negative labels for all images.
Options: 'train' or 'val'

In [21]:
db_type = 'validation'

In [22]:
fn_label = './data/annotations/{}-annotations-human-imagelabels-boxable.csv'.format(db_type)
pos_labels = defaultdict(list)
neg_labels = defaultdict(list)

with open(fn_label, 'r', newline='') as fh:
    reader = csv.reader(fh)
    for ii, row in enumerate(reader):
        if ii == 0:
            continue
        iid, _, cid, is_pos = row
        cname = cls_desc[cid]
        if int(is_pos) == 1:
            pos_labels[iid].append(cname)
        else:
            neg_labels[iid].append(cname)
            
pos_labels = dict(pos_labels)
neg_labels = dict(neg_labels)
iid_all = list(set(list(pos_labels.keys()) + list(neg_labels.keys())))

Remove images not in the image directory.

In [23]:
path_img = './data/images/{}'.format(db_type)
iid_all = [iid for iid in iid_all if os.path.exists(os.path.join(path_img, iid+'.jpg'))]

Update positive and negative labels w.r.t the class tree.
Rules are:
1. For a positive label, add its parents as positive.
2. For a negative label, add its children as negative.

In [24]:
def update_labels(labels, class_tree, is_pos):
    res = [l for l in labels]
    for l in labels:
        res += class_tree[l]['parent'] if is_pos else class_tree[l]['children']
    return list(set(res))

In [25]:
new_pos_labels = {iid: update_labels(labels, class_tree, True)
                  for iid, labels in pos_labels.items()}

In [26]:
new_neg_labels = {iid: update_labels(labels, class_tree, False)
                  for iid, labels in neg_labels.items()}

Save labels into the two files, one for positive and one for negative

In [27]:
fn_imagelist = 'data/annotations/{}_imagelist.txt'.format(db_type)
with open(fn_imagelist, 'w') as fh:
    for iid in iid_all:
        fh.write(iid + '\n')

In [28]:
fn_positive_labels = 'data/annotations/{}_positive_imagelabel.txt'.format(db_type)
with open(fn_positive_labels, 'w') as fh:
    for k, v in new_pos_labels.items():
        # -1 to name2idx to remove 'entity'
        one_line = '{}, '.format(k) + ', '.join([str(name2idx[vi] - 1) for vi in v])
        fh.write(one_line + '\n')

In [29]:
fn_negative_labels = 'data/annotations/{}_negative_imagelabel.txt'.format(db_type)
with open(fn_negative_labels, 'w') as fh:
    for k, v in new_neg_labels.items():
        # -1 to name2idx to remove 'entity'
        one_line = '{}, '.format(k) + ', '.join([str(name2idx[vi] - 1) for vi in v])
        fh.write(one_line + '\n')