In [1]:
import networkx as nx
import requests
import urllib
import gzip

In [2]:
file_dir = '/Users/annaysun/codenames/babelnet_v4/'

In [136]:
G = nx.DiGraph()

In [137]:
word = 'bear'
with gzip.open(file_dir + word + '.gz', 'r') as f:
    for line in f:
        source, target, language, short_name, relation_group, is_automatic = line.decode("utf-8").strip().split('\t')
#         if source == 'bn:00010720n':
#             print(line.decode("utf-8"))
        if relation_group == 'HYPERNYM' and is_automatic == 'False':
            G.add_edge(source, target)

In [30]:
source, target, language, short_name, relation_group, is_automatic = line.split('\t')

In [138]:
print(G.number_of_nodes())
print(G.number_of_edges())

28
28


In [139]:
nn_w_dists = {}
with open(file_dir+word+'_synsets', 'r') as f:
    for line in f:
        synset = line.strip()
        lengths = nx.single_source_shortest_path_length(
            G, source=synset, cutoff=10
        )
        for neighbor, length in lengths.items():
            if neighbor not in nn_w_dists:
                nn_w_dists[neighbor] = length
            else:
                print(neighbor, 'length:', length, 'prev length:', nn_w_dists[neighbor])
            nn_w_dists[neighbor] = min(length, nn_w_dists[neighbor])

In [140]:
# bear
nn_w_dists

{'bn:00083228v': 0,
 'bn:00089240v': 1,
 'bn:00009342n': 0,
 'bn:00016143n': 1,
 'bn:00076248n': 1,
 'bn:00031965n': 2,
 'bn:00009677n': 2,
 'bn:00010540n': 2,
 'bn:17528560n': 2,
 'bn:00053079n': 3,
 'bn:00019393n': 3,
 'bn:00004257n': 3,
 'bn:00031027n': 3,
 'bn:00021547n': 3,
 'bn:00041942n': 3,
 'bn:02928599n': 3,
 'bn:00009343n': 0,
 'bn:00047367n': 1,
 'bn:00015665n': 2,
 'bn:00012439n': 2,
 'bn:00060821n': 2,
 'bn:00064608n': 2,
 'bn:00046516n': 3,
 'bn:00014136n': 3,
 'bn:17880853n': 3,
 'bn:00014138n': 3,
 'bn:00012059n': 3,
 'bn:00071261n': 3}

In [135]:
# bison
nn_w_dists

{'bn:00003357n': 0,
 'bn:00010720n': 0,
 'bn:00076248n': 1,
 'bn:00012459n': 1,
 'bn:00010540n': 2,
 'bn:17528560n': 2,
 'bn:00068536n': 2,
 'bn:00041942n': 3,
 'bn:02928599n': 3,
 'bn:00006179n': 3,
 'bn:00053079n': 3}

In [79]:
G.in_degree('bn:00011917n')

0

In [77]:
list(G.edges)

[('bn:00011917n', 'bn:00005793n'),
 ('bn:00011917n', 'bn:00032351n'),
 ('bn:00011917n', 'bn:01643573n'),
 ('bn:00005793n', 'bn:00046978n'),
 ('bn:00032351n', 'bn:00026716n'),
 ('bn:00032351n', 'bn:00313677n'),
 ('bn:01643573n', 'bn:00005704n'),
 ('bn:01643573n', 'bn:05012142n'),
 ('bn:00046978n', 'bn:00005956n'),
 ('bn:00026716n', 'bn:00046978n'),
 ('bn:00026716n', 'bn:00077585n'),
 ('bn:00026716n', 'bn:00313677n'),
 ('bn:00026716n', 'bn:15154456n'),
 ('bn:00313677n', 'bn:00077585n'),
 ('bn:00313677n', 'bn:14439409n'),
 ('bn:00005704n', 'bn:00046961n'),
 ('bn:00005704n', 'bn:00054416n'),
 ('bn:00005704n', 'bn:00077585n'),
 ('bn:05012142n', 'bn:15154456n'),
 ('bn:00083756v', 'bn:00082838v'),
 ('bn:00082838v', 'bn:00085711v'),
 ('bn:03315803n', 'bn:00034471n'),
 ('bn:03315803n', 'bn:14956284n'),
 ('bn:03315803n', 'bn:03315803n'),
 ('bn:00034471n', 'bn:00064584n'),
 ('bn:00034471n', 'bn:00071316n'),
 ('bn:00034471n', 'bn:00081581n'),
 ('bn:00034471n', 'bn:01094292n'),
 ('bn:00034471n', 'b

In [33]:
s = set()
s.add(1)

In [None]:
"""
bn:00009694n    bn:00058844n    EN      is-a    HYPERNYM        False
bn:00009694n    bn:00102477a    EN      gloss-related   OTHER   False
bn:00009694n    bn:00058844n    EN      gloss-related   OTHER   False
bn:00009694n    bn:00102477a    EN      gloss-related   OTHER   False
bn:00009694n    bn:00112223a    EN      gloss-related   OTHER   False
bn:00009694n    bn:00644696n    MUL     subclass_of     HYPERNYM        False
bn:00009694n    bn:03244583n    EN      has-kind        HYPONYM True
bn:00009694n    bn:03746929n    EN      has-kind        HYPONYM True
bn:00009694n    bn:03881611n    DA      is-a    HYPERNYM        True
bn:00009694n    bn:00003687n    EN      related OTHER   False
"""

In [166]:
def get_labels_from_synset(synset):
    url = "https://babelnet.org/sparql/"
    queryString = """
    SELECT ?label WHERE {{
        <http://babelnet.org/rdf/s{synset}> a skos:Concept .
        OPTIONAL {{
            <http://babelnet.org/rdf/s{synset}> lemon:isReferenceOf ?sense .
            ?entry lemon:sense ?sense .
            ?entry lemon:language "EN" .
            ?entry rdfs:label ?label
        }}
    }}
    """.format(synset=synset.lstrip('bn:'))
    query = queryString.replace(" ", "+")
    fmt = urllib.parse.quote("application/sparql-results+json".encode('UTF-8'), safe="")
    params = {
        "query": query,
        "format": fmt,
#         "key": "e3b6a00a-c035-4430-8d71-661cdf3d5837",
    }
    payload_str = "&".join("%s=%s" % (k,v) for k,v in params.items())
    
    res = requests.get('?'.join([url, payload_str]))
    if 'label' not in res.json()['results']['bindings'][0]:
        return []
    labels = [
        r['label']['value']
        for r in res.json()['results']['bindings']
    ]
    return labels

In [182]:
labels = get_labels_from_synset('bn:00026716n')
labels

['device',
 'device_(machine)',
 'device',
 'device_(machine)',
 'Industrial_Hydraulics',
 'Capital_equipment',
 'machine',
 'machine_(politics)',
 'Complex_machine',
 'Machinery_and_mechanisms',
 'IndustrialHydraulis',
 'Machines_(video_game)',
 'Machines',
 "Machines_(Or_'Back_to_Humans')",
 "Machines_(or_'Back_to_Humans')",
 'Machines_(or_Back_to_Humans)',
 'Machines_(Biffy_Clyro_Song)',
 'Machines_(Biffy_Clyro_song)',
 'Machines_(song)',
 'Machines_(Doctor_Who_audio)',
 'Machines_(EP)',
 'Machines_(Red_Flag_song)',
 'Machines_(single)',
 'Machineries']

In [183]:
print(set([label for label in labels if len(label.split('_')) == 1]))

{'machine', 'Machineries', 'device', 'IndustrialHydraulis', 'Machines'}


In [88]:
set( ['Astronomical_Objects',
 'celestial_body',
 'Celestial_object',
 'Cosmological_object',
 'Celestial_bodes',
 'Celestial_objects',
 'Astronomical_bodies',
 'Celestial_bodies',
 'Celestial_bodies_(Cthulhu_mythos)',
 'Astronomical_objects',
 'heavenly_body',
 'Astronomical_body',
 'Astronomical_object']
)

{'Astronomical_Objects',
 'Astronomical_bodies',
 'Astronomical_body',
 'Astronomical_object',
 'Astronomical_objects',
 'Celestial_bodes',
 'Celestial_bodies',
 'Celestial_bodies_(Cthulhu_mythos)',
 'Celestial_object',
 'Celestial_objects',
 'Cosmological_object',
 'celestial_body',
 'heavenly_body'}