In [2]:
import nltk
from nltk.corpus import wordnet as wn
nltk.download('wordnet')
nltk.download('omw')

[nltk_data] Downloading package wordnet to /home/ivo/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw to /home/ivo/nltk_data...
[nltk_data]   Package omw is already up-to-date!


True

In [3]:
def getSynsets(word, pos, lang):
    return wn.synsets(word, pos=pos, lang=lang)

def getHypernyms(synset, lang):
    # get list of all the broader lemmas
    # of synset in language lang
    result = []
    synsets = synset.hypernyms() 
    for s in synsets:
        result = result + s.lemma_names(lang)
    return result

def getHyponyms(synset, lang):
    # get list of all the narrower lemmas
    # of synset in language lang
    result = []
    synsets = synset.hyponyms() 
    for s in synsets:
        result = result + s.lemma_names(lang)
    return result

def getHyponymsRecursive(synset, lang):
    # get list of all the narrower lemmas (and their narrower lemmas) 
    # of synset in language lang
    result = []
    synsets = synset.hyponyms()
    for s in synsets:
        result = result + s.lemma_names(lang)
        result = result + getHyponymsRecursive(s, lang)
    return result

def showSynset(synset, lang):
    print('-----------')
    print('synset    :', synset)
    print('definition:', synset.definition())
    print('synonyms  :', synset.lemma_names(lang))
    print('hypernyms :', getHypernyms(synset, lang))
    print('hyponyms  :', getHyponyms(synset, lang))
    print('hyponyms+ :', getHyponymsRecursive(synset, lang))

In [10]:
searchTerm = "nederland"
synsets = getSynsets(searchTerm, pos = wn.NOUN, lang = 'nld')
for s in synsets:
    showSynset(s, lang = 'nld')


-----------
synset    : Synset('netherlands.n.01')
definition: a constitutional monarchy in western Europe on the North Sea; half the country lies below sea level
synonyms  : ['Koninkrijk_der_Nederlanden', 'Holland', 'Nederland', 'kikkerland']
hypernyms : []
hyponyms  : []
hyponyms+ : []


In [5]:
topic0 = [['alcohol.n.01', 'glass.n.02', 'cup.n.01', 'drink.v.01', 'toast.v.02'],['jenever']]
topic1 = [['club.n.02', 'associate.n.01', 'brotherhood.n.03', 'friendship.n.01', 'connect.v.03', 'league.n.02'], ['vriendenkring', 'vriendenrij']]
topic2 = [['print.v.03', 'printer.n.01', 'bookbinder.n.01', 'press.n.03', 'printing_concern.n.01'], ['drukkunst', 'boekdrukkunst']]
topic3 = [['sculpture.n.01', 'stone.n.02'],['metaal', 'metalen', 'ijzer', 'ijzeren']]
topic4 = [['freedom.n.01', 'flambeau.n.01', 'shine.v.02'], ['licht', 'verlichting', 'wetenschap', 'duisternis', 'lichtstraal', 'wijsheid']]
topic5 = [['invention.n.03', 'invent.v.01', 'discovery.n.03'], ['uitvinden', 'haarlemmer']]
topic6 = [['fatherland.n.01'], ['nederland', 'holland', 'oranje', 'willem']]
topic7 = [['subscriber.n.03', 'boss.n.02'], ['eerelid', 'eereleden', 'beschermheer']]

topics = [topic0, topic1, topic2, topic3, topic4, topic5, topic6, topic7]

In [6]:
def getVocabulary(topic):
    lang = 'nld'
    result = topic[1]
    for s in topic[0]:
        synset = wn.synset(s)
        result = result + synset.lemma_names(lang)
        result = result + getHyponymsRecursive(synset, lang)  

    return result     

In [7]:
topicList = []
for t in topics:
    topicList.append(getVocabulary(t))

topicList

[['jenever',
  'alcohol',
  'drank',
  'spraakwater',
  'aperitief',
  'biertje',
  'bier',
  'brouwsel',
  'gerstenat',
  'donker_bier',
  'stout',
  'stout',
  'vatbier',
  'pils',
  'pilsener',
  'bokbier',
  'pils',
  'pilsener',
  'mede',
  'appelwijn',
  'cider',
  'eigenstook',
  'koemis',
  'likeur',
  'absint',
  'amaretto',
  'benedictijner',
  'chartreuse',
  'Drambuie',
  'drank',
  'arak',
  'aqua_vitae',
  'brandewijn',
  'brandy',
  'appeldrank',
  'appelbrandewijn',
  'Calvados',
  'cognac',
  'grappa',
  'vuur_water',
  'wie_verre_reizen_doet,_kan_veel_verhalen',
  'gin',
  'ouzo',
  'rum',
  'grog',
  'toddy',
  'Schnaps',
  'tequila',
  'vodka',
  'wodka',
  'whisky',
  'Drambuie',
  'cocktail',
  'bloody_mary',
  'Martini',
  'martini',
  'gin-tonic',
  'grog',
  'toddy',
  'punch',
  'Advocaat',
  'advokaat',
  'bocht',
  '-halve',
  'druivenat',
  'druivennat',
  'wijn',
  'roos',
  'Bourgondië',
  'dessertwijn',
  'Marsala',
  'porto',
  'sherry',
  'xeres',
  'x

In [8]:
# todo: verwijderen diacriten, kapitalen, punctuation