In [1]:
from nltk.corpus import wordnet as wn
from collections import defaultdict

In [2]:
with open('../data/concept_sets/imagenet_label_to_wordnet_synset.txt', 'r') as f:
    class_to_id = eval(f.read())
print(class_to_id[0])

def get_all_hypernyms(synset):
    hypernyms = synset.hypernyms()
    if not hypernyms:
        return []
    else:
        all_hypernyms = []
        for hypernym in hypernyms:
            all_hypernyms.extend(get_all_hypernyms(hypernym))
            all_hypernyms.append(hypernym)
        return all_hypernyms

superclass_to_ids = defaultdict(lambda: set())

for i in range(1000):
    wn_id = class_to_id[i]['id']
    wn_number, wn_letter = wn_id.split('-')
    synset = wn.synset_from_pos_and_offset(wn_letter, int(wn_number))
    superclasses = get_all_hypernyms(synset)
    for s_synset in superclasses:
        name = s_synset.name().split('.')[0]
        name = name.replace('_', ' ')
        superclass_to_ids[name].add(i)

superclass_to_ids = dict(superclass_to_ids)
for key in superclass_to_ids:
    superclass_to_ids[key] = sorted(list(superclass_to_ids[key]))
print(len(superclass_to_ids))

{'id': '01440764-n', 'label': 'tench, Tinca tinca', 'uri': 'http://wordnet-rdf.princeton.edu/wn30/01440764-n'}
834


## Delete superclasses that are identical to a final class

In [3]:
to_delete = []
for key in superclass_to_ids:
    if len(superclass_to_ids[key])==1:
        #print(key, superclass_to_ids[key])
        to_delete.append(key)

for key in to_delete:
    del superclass_to_ids[key]
    
print(len(superclass_to_ids))

464


## Delete duplicate superclasses

In [4]:
#prints which superclasses are identical with each other
for key in superclass_to_ids:
    for key2 in superclass_to_ids:
        if key == key2:
            continue
        elif superclass_to_ids[key] == superclass_to_ids[key2]:
            print("Identical: {}: {}".format(key, key2))

Identical: living thing: organism
Identical: organism: living thing
Identical: chordate: vertebrate
Identical: vertebrate: chordate
Identical: aquatic vertebrate: fish
Identical: fish: aquatic vertebrate
Identical: bony fish: teleost fish
Identical: teleost fish: bony fish
Identical: cypriniform fish: cyprinid
Identical: cyprinid: cypriniform fish
Identical: cartilaginous fish: elasmobranch
Identical: elasmobranch: cartilaginous fish
Identical: passerine: oscine
Identical: oscine: passerine
Identical: anapsid: chelonian
Identical: anapsid: turtle
Identical: chelonian: anapsid
Identical: chelonian: turtle
Identical: turtle: anapsid
Identical: turtle: chelonian
Identical: saurian: lizard
Identical: lizard: saurian
Identical: constrictor: boa
Identical: boa: constrictor
Identical: pit viper: rattlesnake
Identical: rattlesnake: pit viper
Identical: gallinaceous bird: game bird
Identical: game bird: gallinaceous bird
Identical: waterfowl: anseriform bird
Identical: anseriform bird: waterfow

In [5]:
#delete duplicate classes, subjectively keeping more understandable one/more descriptive of leaf classes
to_delete = ["living thing", "chordate", "aquatic vertebrate", "teleost fish", "cyprinid", "elasmobranch", "passerine", "anapsid",
             "chelonian", "saurian", "constrictor", "pit viper", "gallinaceous bird", "anseriform bird", "monotreme", "metatherian",
            "cetacean", "racer", "scarabaeid beetle", "lepidopterous insect", "lagomorph", "odd-toed ungulate", "ruminant", "anthropoid ape",
            "pachyderm", "proboscidean", "outerwear", "outbuilding", "mercantile establishment", "writing implement",
            "piece", "neckwear", "shield", "kitchen utensil", "cooking utensil", "cutting implement", "edge tool", "personal computer",
            "durables", "cosmetic", "agent", "wagon", "electrical device", "transducer", "scientific instrument", "article", "house",
            "photographic equipment", "tank", "ingredient", "flavorer", "baked goods", "snack food", "plant part", "plant organ",
             "reproductive structure", "fluid", "liquid", "drug of abuse", "punch", "vascular plant", "spermatophyte", "angiosperm", "ware"]

for key in to_delete:
    try:
        del superclass_to_ids[key]
    except(KeyError):
        print("failed to delete:{}".format(key))
print(print(len(superclass_to_ids)))

for key in superclass_to_ids:
    for key2 in superclass_to_ids:
        if key == key2:
            continue
        elif superclass_to_ids[key] == superclass_to_ids[key2]:
            print("Identical: {}, {}".format(key, key2))

400
None


In [6]:
import json

with open('../data/concept_sets/superclass_to_ids.json', 'w') as f:
    json.dump(superclass_to_ids, f)