In [1]:
import requests
import urllib
import json
from tqdm import tqdm

In [2]:
file_dir = '/Users/annaysun/codenames/babelnet_v4/'

In [3]:
def write_lemma_to_synsets(synsets, filename):
    # filename denotes word
    with open(filename, 'w') as f:
        for synset in synsets:
            f.write(synset + '\n')

In [4]:
def get_synsets_from_lemma(word, limit):
    url = "https://babelnet.org/sparql/"
    queryString = """
    SELECT DISTINCT ?synset WHERE {{
        ?entries a lemon:LexicalEntry .
        ?entries lemon:sense ?sense .
        ?sense lemon:reference ?synset .
        ?entries rdfs:label "{word}"@en
    }} LIMIT {limit}
    """.format(limit=limit, word=word)
    query = queryString.replace(" ", "+")
    fmt = urllib.parse.quote("application/sparql-results+json".encode('UTF-8'), safe="")
    params = {
        "query": query,
        "format": fmt,
        "key": "e3b6a00a-c035-4430-8d71-661cdf3d5837",
    }
    payload_str = "&".join("%s=%s" % (k,v) for k,v in params.items())
    
    res = requests.get('?'.join([url, payload_str]))
    synsets = [
        'bn:' + r['synset']['value'].split('/')[-1].lstrip('s')
        for r in res.json()['results']['bindings']
    ]
    return synsets

In [5]:
def get_nonautomatic_hypernyms(results):
    return [
        result for result in results 
        if result['pointer']['isAutomatic'] is False 
        and result['pointer']['relationGroup'] == "HYPERNYM"
    ]

In [6]:
def get_outgoing_edges_json(synset_id):
    url = 'https://babelnet.io/v5/getOutgoingEdges'
    params = {
        'id': synset_id,
        'key': 'e3b6a00a-c035-4430-8d71-661cdf3d5837',
    }
    headers = {'Accept-Encoding': 'gzip'}
    res = requests.get(url=url, params=params, headers=headers)
    return res.json()

In [7]:
def append_to_file(source_id, results, filename):
    with open(filename, 'a') as f:
        for result in results:
            to_write = [
                source_id,
                result['target'],
                result['language'],
                result['pointer']['shortName'],
                result['pointer']['relationGroup'],
                str(result['pointer']['isAutomatic']),
            ]
            f.write('\t'.join(to_write) + '\n')

In [8]:
tmp_synsets_lowered = get_synsets_from_lemma('boot', 3)

In [9]:
tmp_synsets_lowered

['bn:00007381n', 'bn:00083789v', 'bn:00012166n']

In [10]:
tmp_synsets_capitalized = get_synsets_from_lemma('Boot', 1)
tmp_synsets_capitalized

[]

In [8]:
all_codewords = []
with open('/Users/annaysun/codenames/data/codewords.txt', 'r') as f:
    for line in f:
        all_codewords.append(line.strip().lower())
print(all_codewords[:10])

['africa', 'agent', 'air', 'alien', 'alps', 'amazon', 'ambulance', 'america', 'angel', 'antarctica']


In [33]:
words_completed = set([
    'buffalo', 'bear', 'bison', 'jupiter', 'moon',
    'phoenix', 'beijing', 'cap', 'boot',
    'india', 'germany',
    'pipe', 'racket', 'bug', 'play', 'table',
    'cloak', 'diamond', 'witch', 'swing', 'circle',
    'unicorn', 'cliff', 'death', 'litter', 'car',
    'crown', 'australia', 'roulette', 'kid', 'gas',
    'ray', 'mammoth', 'ivory', 'key', 'piano',
    'lab', 'school', 'lead', 'laser', 'pan',
    'stock', 'box', 'game', 'whip', 'tube', 
    'vacuum', 'king', 'lemon', 'conductor',
    'moscow', 'hand', 'change', 'scientist', 'worm',
    'row', 'penguin', 'stick', 'scale', 'figure',
    'cricket', 'ball', 'nut', 'horseshoe', 'amazon',
    'thumb', 'spider', 'lion', 'stream', 'bomb',
    'shark', 'africa', 'agent', 'air', 'alien',
    'alps', 'ambulance', 'america', 'angel', 'antarctica',
    'apple', 'arm', 'atlantis', 'aztec', 'back',
    'band', 'bank', 'bar', 'bark', 'bat',
    'battery', 'beach', 'beat', 'bed', 'bell',
    'belt', 'berlin', 'bermuda', 'berry', 'bill',
    'block', 'board', 'bolt', 'bond', 'boom',
    'bottle', 'bow', 'bridge', 'brush', 'buck',
    'bugle', 'button', 'calf', 'canada', 'capital',
    'card', 'carrot', 'casino', 'cast', 'cat',
    'cell', 'centaur', 'center', 'chair', 'charge',
    'check', 'chest', 'chick', 'china', 'chocolate',
    'church'
])

In [30]:
words = []
for word in all_codewords:
    if word in words_completed or len(word.split(' ')) != 1:
        continue
    words.append(word)
    if len(words) == 15:
        break
print(' '.join(words))

carrot casino cast cat cell centaur center chair charge check chest chick china chocolate church


In [31]:
print(words)

['carrot', 'casino', 'cast', 'cat', 'cell', 'centaur', 'center', 'chair', 'charge', 'check', 'chest', 'chick', 'china', 'chocolate', 'church']


In [13]:
# synsets_queried = set()

In [32]:
for word in tqdm(words):
    synsets_capitalized = get_synsets_from_lemma(word.capitalize(), 1)
    synsets_lowered = get_synsets_from_lemma(word.lower(), 3)
    write_lemma_to_synsets(synsets_capitalized + synsets_lowered, file_dir+word+'_synsets')
    for synset_0 in synsets_capitalized + synsets_lowered:
        results_0 = get_outgoing_edges_json(synset_0)
        append_to_file(synset_0, results_0, file_dir+word)
        hypernyms_0 = get_nonautomatic_hypernyms(results_0)
        for synset_1 in hypernyms_0:
            results_1 = get_outgoing_edges_json(synset_1['target'])
            append_to_file(synset_1['target'], results_1, file_dir+word)
            hypernyms_1 = get_nonautomatic_hypernyms(results_1)
            for synset_2 in hypernyms_1:
                results_2 = get_outgoing_edges_json(synset_2['target'])
                append_to_file(synset_2['target'], results_2, file_dir+word)
                hypernyms_2 = get_nonautomatic_hypernyms(results_2)

100%|██████████| 15/15 [08:16<00:00, 33.13s/it]


In [50]:
synsets_lowered = get_synsets_from_lemma('shark', 3)

In [51]:
synsets_lowered

['bn:00070919n', 'bn:00070920n', 'bn:00070921n']

In [43]:
hypernyms_0

[{'language': 'EN',
  'pointer': {'fSymbol': '@',
   'name': 'Hypernym',
   'shortName': 'is-a',
   'relationGroup': 'HYPERNYM',
   'isAutomatic': False},
  'target': 'bn:00006539n',
  'weight': 0.0,
  'normalizedWeight': 0.0},
 {'language': 'MUL',
  'pointer': {'fSymbol': 'wd21',
   'name': 'subclass_of',
   'shortName': 'subclass_of',
   'relationGroup': 'HYPERNYM',
   'isAutomatic': False},
  'target': 'bn:00059480n',
  'weight': 0.0,
  'normalizedWeight': 0.0}]

In [44]:
hypernyms_1

[{'language': 'EN',
  'pointer': {'fSymbol': '@',
   'name': 'Hypernym',
   'shortName': 'is-a',
   'relationGroup': 'HYPERNYM',
   'isAutomatic': False},
  'target': 'bn:00006539n',
  'weight': 0.0,
  'normalizedWeight': 0.0},
 {'language': 'MUL',
  'pointer': {'fSymbol': 'wd21',
   'name': 'subclass_of',
   'shortName': 'subclass_of',
   'relationGroup': 'HYPERNYM',
   'isAutomatic': False},
  'target': 'bn:00059480n',
  'weight': 0.0,
  'normalizedWeight': 0.0}]