In [1]:
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
from collections import OrderedDict
from collections import namedtuple
from textwrap import wrap
WordNetInfo = namedtuple('WordNetInfo', 'base, pos, number, info')
wnl = WordNetLemmatizer()
CATS = OrderedDict((['n', 'N'], ['v', 'V'], ['a', 'Adj'], ['s', 'Sat'], ['r', 'Adv']))
MAIN = ('definition', 'examples')
NYMS = ('synonyms', 'antonyms', 'hypernyms', 'hyponyms',
        'part_meronyms', 'part_holonyms', 'substance_meronyms', 'substance_holonyms',
        'entailments')
TAB = ' ' * 4   # avoid a literal four-space string in case a tabifier alters it

In [2]:
def WNLemmatized(word, language='eng'):
    '''Use a WordNet lemmatizer to get all stems (lemmas) for word, based on POS.
    Return word unchanged for non-existent form-POS combinations.'''
    if language != 'eng':
        return [word]
    lexemes = []
    for pos in CATS.keys():
        lemma = wnl.lemmatize(word, pos)
        if pos == 's' and (word, 'a') in lexemes:
            continue
        lexemes.append((lemma, pos))
    return lexemes

def WNNormalized(word):
    '''Return a WordNet-normalized version of word: all lower-case and no apostrophes'''
    return word.lower().replace("'", '')

def appendOnce(lst, elt):
    '''Append lst to lst only if elt not already in lst'''
    if elt not in lst:
        lst.append(elt)

In [3]:
def WNInfo(word, kinds, language='eng'):
    '''Return a list of WordNetInfo tuples of kinds of information for word'''
    results = []
    for synset in wn.synsets(WNNormalized(word), lang=language):
        #base = lemma.name()
        #synset = lemma.synset()
        synset_base, synset_pos, synset_number = synset.name().split('.')
        info = OrderedDict()
        # Definition + examples -- from synset only
        if 'definition' in kinds:
            for kind in kinds:
                info[kind] = getattr(synset, kind)()
        # Related words
        else:
            # Synonyms and antonyms need special-casing
            synonyms = []
            antonyms = []
            for lemma in synset.lemmas(lang=language):
                # Synonyms
                name = lemma.name()
                appendOnce(synonyms, name)
                # Antonyms
                for antonym in lemma.antonyms():
                    name = antonym.name()
                    appendOnce(antonyms, name)
            info['synonyms'] = synonyms
            info['antonyms'] = antonyms
            # All the others
            for kind in kinds[2:]:
                nyms = []
                for item in getattr(synset, kind)():
                    for lemma in item.lemmas(lang=language):
                        name = lemma.name()
                        appendOnce(nyms, name)
                info[kind] = nyms
        results.append(WordNetInfo(synset_base, CATS[synset_pos], synset_number, info))
    return results
            

In [5]:
def WNDisplay(wnis, indent=0, maxWidth=70):
    '''Return a list of lines containing nicely wrapped and formatted info in wni'''
    def wrapped(ind, s, hanging=False):
        '''Deal with the indentation and wrapping'''
        initialIndent = TAB * ind
        subsequentIndent = initialIndent + '  ' if hanging else initialIndent
        return wrap(s,
                    initial_indent=initialIndent,
                    subsequent_indent=subsequentIndent,
                    width=maxWidth)
    lines = []
    if not isinstance(wnis, list):
        wnis = [wnis]
    for wni in wnis:
        lines.extend(wrapped(indent, f'|{wni.base}, {wni.pos}, {wni.number}|'))
        for kind in wni.info.keys():
            info = wni.info[kind]
            if info:   # skip over empty ones
                lines.extend(wrapped(indent + 1, f'{kind.title().replace("_", " ")}:'))
                if kind == 'definition':
                    lines.extend(wrapped(indent + 2, info, hanging=True))
                elif kind == 'examples':
                    for example in info:
                        lines.extend(wrapped(indent + 2, example, hanging=True))
                else:
                    lines.extend(wrapped(indent + 2, ' '.join(info), hanging=False))
    return lines


In [87]:
def isPrefix(abb):
    '''Return a list of forms of which abb is a prefix'''
    full = ('Albania', 'Algeria', 'Azerbaijan', 'France', 'Frisia', 'Friuli', 'Germany', 'Ghana', 'Ghent')
    results = []
    for name in full:
            if name.startswith(abb):
                results.append(name)
    return results

In [95]:
isPrefix('Fris')

['Frisia']

In [96]:
wn.lemmas('house')

[Lemma('house.n.01.house'),
 Lemma('firm.n.01.house'),
 Lemma('house.n.03.house'),
 Lemma('house.n.04.house'),
 Lemma('house.n.05.house'),
 Lemma('house.n.06.house'),
 Lemma('house.n.07.house'),
 Lemma('sign_of_the_zodiac.n.01.house'),
 Lemma('house.n.09.house'),
 Lemma('family.n.01.house'),
 Lemma('theater.n.01.house'),
 Lemma('house.n.12.house'),
 Lemma('house.v.01.house'),
 Lemma('house.v.02.house')]

In [97]:
wn.synsets('house')

[Synset('house.n.01'),
 Synset('firm.n.01'),
 Synset('house.n.03'),
 Synset('house.n.04'),
 Synset('house.n.05'),
 Synset('house.n.06'),
 Synset('house.n.07'),
 Synset('sign_of_the_zodiac.n.01'),
 Synset('house.n.09'),
 Synset('family.n.01'),
 Synset('theater.n.01'),
 Synset('house.n.12'),
 Synset('house.v.01'),
 Synset('house.v.02')]

In [98]:
wn.lemmas('houses')

[]

In [99]:
wn.synsets('houses')

[Synset('house.n.01'),
 Synset('firm.n.01'),
 Synset('house.n.03'),
 Synset('house.n.04'),
 Synset('house.n.05'),
 Synset('house.n.06'),
 Synset('house.n.07'),
 Synset('sign_of_the_zodiac.n.01'),
 Synset('house.n.09'),
 Synset('family.n.01'),
 Synset('theater.n.01'),
 Synset('house.n.12'),
 Synset('house.v.01'),
 Synset('house.v.02')]

In [100]:
wn.lemmas('good')

[Lemma('good.n.01.good'),
 Lemma('good.n.02.good'),
 Lemma('good.n.03.good'),
 Lemma('commodity.n.01.good'),
 Lemma('good.a.01.good'),
 Lemma('full.s.06.good'),
 Lemma('good.a.03.good'),
 Lemma('estimable.s.02.good'),
 Lemma('beneficial.s.01.good'),
 Lemma('good.s.06.good'),
 Lemma('good.s.07.good'),
 Lemma('adept.s.01.good'),
 Lemma('good.s.09.good'),
 Lemma('dear.s.02.good'),
 Lemma('dependable.s.04.good'),
 Lemma('good.s.12.good'),
 Lemma('good.s.13.good'),
 Lemma('effective.s.04.good'),
 Lemma('good.s.15.good'),
 Lemma('good.s.16.good'),
 Lemma('good.s.17.good'),
 Lemma('good.s.18.good'),
 Lemma('good.s.19.good'),
 Lemma('good.s.20.good'),
 Lemma('good.s.21.good'),
 Lemma('well.r.01.good'),
 Lemma('thoroughly.r.02.good')]

In [6]:
wn.synsets('good')

[Synset('good.n.01'),
 Synset('good.n.02'),
 Synset('good.n.03'),
 Synset('commodity.n.01'),
 Synset('good.a.01'),
 Synset('full.s.06'),
 Synset('good.a.03'),
 Synset('estimable.s.02'),
 Synset('beneficial.s.01'),
 Synset('good.s.06'),
 Synset('good.s.07'),
 Synset('adept.s.01'),
 Synset('good.s.09'),
 Synset('dear.s.02'),
 Synset('dependable.s.04'),
 Synset('good.s.12'),
 Synset('good.s.13'),
 Synset('effective.s.04'),
 Synset('good.s.15'),
 Synset('good.s.16'),
 Synset('good.s.17'),
 Synset('good.s.18'),
 Synset('good.s.19'),
 Synset('good.s.20'),
 Synset('good.s.21'),
 Synset('well.r.01'),
 Synset('thoroughly.r.02')]

In [36]:
#from nltk.corpus import framenet as fn
synset = wn.synsets('vocal')[5]
lemma = synset.lemmas()[0]
#lemma = wn.lemma('vocal.a.01.vocal')
print(lemma.frame_strings())
print(lemma.pertainyms())
print(lemma.derivationally_related_forms())

[]
[]
[]


In [34]:
wn.synsets('vocal')

[Synset('vocal_music.n.01'),
 Synset('song.n.01'),
 Synset('vocal.a.01'),
 Synset('vocal.a.02'),
 Synset('outspoken.s.01'),
 Synset('vocal.s.04')]

In [112]:
good_synset = wn.synsets('good')[3]

In [113]:
good_synset

Synset('commodity.n.01')

In [114]:
good_synset.lemmas()

[Lemma('commodity.n.01.commodity'),
 Lemma('commodity.n.01.trade_good'),
 Lemma('commodity.n.01.good')]

In [116]:
good_lemmas = wn.lemmas('good')

In [132]:
good_lemmas

[Lemma('good.n.01.good'),
 Lemma('good.n.02.good'),
 Lemma('good.n.03.good'),
 Lemma('commodity.n.01.good'),
 Lemma('good.a.01.good'),
 Lemma('full.s.06.good'),
 Lemma('good.a.03.good'),
 Lemma('estimable.s.02.good'),
 Lemma('beneficial.s.01.good'),
 Lemma('good.s.06.good'),
 Lemma('good.s.07.good'),
 Lemma('adept.s.01.good'),
 Lemma('good.s.09.good'),
 Lemma('dear.s.02.good'),
 Lemma('dependable.s.04.good'),
 Lemma('good.s.12.good'),
 Lemma('good.s.13.good'),
 Lemma('effective.s.04.good'),
 Lemma('good.s.15.good'),
 Lemma('good.s.16.good'),
 Lemma('good.s.17.good'),
 Lemma('good.s.18.good'),
 Lemma('good.s.19.good'),
 Lemma('good.s.20.good'),
 Lemma('good.s.21.good'),
 Lemma('well.r.01.good'),
 Lemma('thoroughly.r.02.good')]

In [133]:
for lemma in good_lemmas:
    print(lemma, '-->', lemma.synset())

Lemma('good.n.01.good') --> Synset('good.n.01')
Lemma('good.n.02.good') --> Synset('good.n.02')
Lemma('good.n.03.good') --> Synset('good.n.03')
Lemma('commodity.n.01.good') --> Synset('commodity.n.01')
Lemma('good.a.01.good') --> Synset('good.a.01')
Lemma('full.s.06.good') --> Synset('full.s.06')
Lemma('good.a.03.good') --> Synset('good.a.03')
Lemma('estimable.s.02.good') --> Synset('estimable.s.02')
Lemma('beneficial.s.01.good') --> Synset('beneficial.s.01')
Lemma('good.s.06.good') --> Synset('good.s.06')
Lemma('good.s.07.good') --> Synset('good.s.07')
Lemma('adept.s.01.good') --> Synset('adept.s.01')
Lemma('good.s.09.good') --> Synset('good.s.09')
Lemma('dear.s.02.good') --> Synset('dear.s.02')
Lemma('dependable.s.04.good') --> Synset('dependable.s.04')
Lemma('good.s.12.good') --> Synset('good.s.12')
Lemma('good.s.13.good') --> Synset('good.s.13')
Lemma('effective.s.04.good') --> Synset('effective.s.04')
Lemma('good.s.15.good') --> Synset('good.s.15')
Lemma('good.s.16.good') --> Syns

In [121]:
bon_lemmas = wn.lemmas('bon', lang='fra')

In [122]:
bon_lemmas

[Lemma('well.r.01.bon'),
 Lemma('okay.r.01.bon'),
 Lemma('very_well.r.02.bon'),
 Lemma('thoroughly.r.02.bon'),
 Lemma('beneficial.s.01.bon'),
 Lemma('full.s.06.bon'),
 Lemma('proper.s.04.bon'),
 Lemma('propitious.a.01.bon'),
 Lemma('right.r.04.bon'),
 Lemma('dear.s.02.bon'),
 Lemma('correct.a.01.bon'),
 Lemma('smooth.s.07.bon'),
 Lemma('good.s.21.bon'),
 Lemma('effective.s.04.bon'),
 Lemma('good.s.13.bon'),
 Lemma('good.s.20.bon'),
 Lemma('benevolent.s.03.bon'),
 Lemma('good.s.19.bon'),
 Lemma('good.a.01.bon'),
 Lemma('good.a.03.bon'),
 Lemma('good.s.18.bon'),
 Lemma('good.s.17.bon'),
 Lemma('good.s.16.bon'),
 Lemma('good.s.06.bon'),
 Lemma('good.s.12.bon'),
 Lemma('palatable.a.01.bon'),
 Lemma('good.s.15.bon'),
 Lemma('nice.s.03.bon'),
 Lemma('estimable.s.02.bon'),
 Lemma('decent.s.01.bon'),
 Lemma('right.a.01.bon'),
 Lemma('right.a.04.bon'),
 Lemma('good.s.07.bon'),
 Lemma('all_right.s.01.bon'),
 Lemma('right.s.08.bon'),
 Lemma('fine.a.05.bon'),
 Lemma('smooth.a.01.bon'),
 Lemma('dep

In [156]:
goeds = wn.lemmas('goed', 'n', lang='nld')

In [157]:
goeds

[Lemma('chattel.n.01.goed'),
 Lemma('property.n.04.goed'),
 Lemma('estate.n.02.goed'),
 Lemma('good.n.02.goed'),
 Lemma('good.n.01.goed'),
 Lemma('clothing.n.01.goed'),
 Lemma('possession.n.02.goed'),
 Lemma('drygoods.n.01.goed')]

In [178]:
lang = 'eng'
house = wn.lemmas('house', lang=lang)
for lemma in house:
    synset = lemma.synset()
    for nym in synset.part_meronyms():
        print(nym.name())

library.n.01
loft.n.02
porch.n.01
study.n.05
claque.n.01
box_office.n.02
circle.n.07
dressing_room.n.01
greenroom.n.01
orchestra.n.02
orchestra_pit.n.01
parquet.n.02
parquet_circle.n.01
stage.n.03
standing_room.n.01
theater_stage.n.01
tiered_seat.n.01


In [137]:
goed.synset().name()

'property.n.04'

In [153]:
goed.antonyms()

[]

In [154]:
goed

Lemma('property.n.04.goed')

In [197]:
maison = wn.lemmas('maison', lang='fra')

In [201]:
maison[3].synset().definition(lang='fra')

TypeError: definition() got an unexpected keyword argument 'lang'

In [211]:
len(wn.synsets('see', lang='eng'))

25

In [218]:
WNInfo('housed', MAIN)

[WordNetInfo(base='house', pos='V', number='01', info=OrderedDict([('definition', 'contain or cover'), ('examples', ['This box houses the gears'])])),
 WordNetInfo(base='house', pos='V', number='02', info=OrderedDict([('definition', 'provide housing for'), ('examples', ['The immigrants were housed in a new development outside the town'])]))]

In [3]:
wn.all_lemmas()

AttributeError: 'WordNetCorpusReader' object has no attribute 'all_lemmas'

In [4]:
all = wn.all_synsets()

In [6]:
len(list(all))

117659

In [38]:
from collections import namedtuple
OPTIONS = namedtuple('OPTIONS', 'ICASE IDIAC LANGUAGE LEXICON EXE')

In [39]:
OPT = OPTIONS('icase', 'idiac', 'language', 'lexicon', 'exec')

In [40]:
OPT.ICASE

'icase'

In [10]:
OPTIONS.icase = 'foo'

In [14]:
options = ('icase', 'idiac', 'language', 'lexicon', 'exec')

In [12]:
options

OPTIONS(icase='icase', idiac='idiac', language='language', lexicon='lexicon', exec='exec')

In [15]:
options.icase

AttributeError: 'tuple' object has no attribute 'icase'

In [20]:
from enum import Enum

In [23]:
class option(Enum):
    ICASE = 'icase'
    IDIAC = 'idiac'
    LANGUAGE = 'language'
    LEXICON = 'lexicon'
    EXEC = 'exec'

In [24]:
option.IDIAC

<option.IDIAC: 'idiac'>

In [28]:
(option.IDIAC.value)

'idiac'

In [55]:
PROGID = namedtuple('PROGID', 'NAME VERSION COPYRIGHT '  'THANKS_PLAIN THANKS_RICH')


In [56]:
ID = PROGID(NAME = 'foo', VERSION=1, COPYRIGHT='free', THANKS_PLAIN='Ta!', THANKS_RICH='Thank you very much')

In [38]:
dog = wn.synsets('dog')[0].lemmas()[0]

In [39]:
dog

Lemma('dog.n.01.dog')

In [41]:
dog.derivationally_related_forms()

[]