# WordNet Expansion

Download the necessary corpora:

In [1]:
import nltk

nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to /home/av11/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/av11/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

Use WordNet corpus to get the related terms to a given word:

In [2]:
from nltk.corpus import wordnet as wn

If the different relations are wanted (separated):

In [3]:
def get_related_categories(word):
    synonyms = []
    hyponyms = []
    hypernyms = []

    for ss in wn.synsets(word):
        for lemma_name in ss.lemma_names():
            synonyms.append(lemma_name)

        for hypernym in ss.hypernyms():
            for hypernym_lemma_name in hypernym.lemma_names():
                hypernyms.append(hypernym_lemma_name)

        for hyponym in ss.hyponyms():
            for hyponym_lemma_name in hyponym.lemma_names():
                hyponyms.append(hyponym_lemma_name)


    return set(synonyms), set(hyponyms), set(hypernyms)

If only all related words are wanted without much care for the exact relation:

In [4]:
def get_related_words(word):
    synonyms, hyponyms, hypernyms = get_related_categories(word)

    return synonyms | hyponyms | hypernyms

Create string for results:

In [5]:
def stringify_results(word):
    string = ''

    categories = get_related_categories(word)
    for i in range(len(categories)):
        prefix = ''
        if i == 0:
            prefix = 'Synonyms: '
        if i == 1:
            prefix = 'Hyponyms: '
        if i == 2:
            prefix = 'Hypernyms: '

        string += prefix + str(categories[i]) + '\n\n'

    string += 'Total: ' + str(get_related_words(word))
    return string

Some examples:

In [6]:
print(stringify_results('express_emotion'))

Synonyms: {'express_emotion', 'express_feelings'}

Hyponyms: {'express_mirth', 'erupt', 'cry', 'keen', 'express_joy', 'burst', 'break', 'laugh', 'weep', 'lament'}

Hypernyms: set()

Total: {'express_mirth', 'erupt', 'cry', 'keen', 'express_emotion', 'express_joy', 'burst', 'break', 'laugh', 'weep', 'lament', 'express_feelings'}


In [7]:
print(stringify_results('for'))

Synonyms: set()

Hyponyms: set()

Hypernyms: set()

Total: set()


In [8]:
print(stringify_results('all'))

Synonyms: {'all', 'altogether', 'totally', 'completely', 'entirely', 'wholly', 'whole'}

Hyponyms: set()

Hypernyms: set()

Total: {'all', 'altogether', 'totally', 'completely', 'entirely', 'whole', 'wholly'}


In [9]:
print(stringify_results('small'))

Synonyms: {'pocket-size', 'small', 'minuscule', 'belittled', 'little', 'diminished', 'minor', 'modest', 'small-scale', 'low', 'lowly', 'pocket-sized', 'humble'}

Hyponyms: set()

Hypernyms: {'body_part', 'size'}

Total: {'pocket-size', 'small', 'minuscule', 'belittled', 'little', 'diminished', 'minor', 'modest', 'small-scale', 'low', 'size', 'body_part', 'lowly', 'pocket-sized', 'humble'}


In [10]:
print(stringify_results('examples'))

Synonyms: {'object_lesson', 'model', 'good_example', 'exercise', 'illustration', 'instance', 'lesson', 'exemplar', 'representative', 'deterrent_example', 'example', 'case'}

Hyponyms: {'pacesetter', 'apology', 'quintessence', 'template', 'time', 'templet', 'exception', 'lodestar', 'sample', 'holotype', 'case_in_point', 'excuse', 'prefiguration', 'beaut', 'microcosm', 'beauty', 'prodigy', 'clip', 'humiliation', 'piece', 'guide', 'precedent', 'archetype', 'type_specimen', 'specimen', 'image', 'loadstar', 'pilot', 'epitome', 'pacemaker', 'pattern', 'original', 'mortification', 'paradigm', 'bit', 'prototype'}


