In [1]:
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
from collections import OrderedDict
from collections import namedtuple
from textwrap import wrap
WordNetInfo = namedtuple('WordNetInfo', 'base, pos, number, info')
wnl = WordNetLemmatizer()
CATS = OrderedDict((['n', 'N'], ['v', 'V'], ['a', 'Adj'], ['s', 'Sat'], ['r', 'Adv']))
MAIN = ('definition', 'examples')
NYMS = ('synonyms', 'antonyms', 'hypernyms', 'hyponyms',
        'part_meronyms', 'part_holonyms', 'substance_meronyms', 'substance_holonyms',
        'entailments')
TAB = ' ' * 4   # avoid a literal four-space string in case a tabifier alters it

In [17]:
def WNLemmatized(word):
    '''Use a WordNet lemmatizer to get all stems (lemmas) for word, based on POS.
    Return word unchanged for non-existent form-POS combinations.'''
    lexemes = []
    for pos in CATS.keys():
        lemma = wnl.lemmatize(word, pos)
        if pos == 's' and (word, 'a') in lexemes:
            continue
        lexemes.append((lemma, pos))
    return lexemes

def WNNormalized(word):
    '''Return a WordNet-normalized version of word: all lower-case and no apostrophes'''
    return word.lower().replace("'", '')

def appendOnce(lst, elt):
    '''Append lst to lst only if elt not already in lst'''
    if elt not in lst:
        lst.append(elt)

In [74]:
def WNInfo(word, kinds, language):
    '''Return a list of WordNetInfo tuples of kinds of information for word'''
    if language == 'eng':
        lexemes = WNLemmatized(WNNormalized(word))
    else:
        lexemes = [word]
    results = []
    for lexeme in lexemes:
        if language == 'eng':
            base, pos = lexeme
            senses = wn.synsets(base, pos, lang=language)
        else:
            base = lexeme
            senses = wn.synsets(base, lang=language)
        for sense in senses:
            sense_base, sense_pos, sense_number = sense.name().split('.')
            info = OrderedDict()
            # Definition + examples
            if 'definition' in kinds:
                if sense_base != base:   # only true lemmas, not synonyms
                    continue
                for kind in kinds:
                    info[kind] = getattr(sense, kind)()
            # Related words
            else:
                # Synonyms and antonyms need special-casing
                synonyms = []
                antonyms = []
                # Synonyms
                if sense_base != base:
                    synonyms.append(sense_base)
                for lemma in sense.lemmas(lang=language):
                    name = lemma.name()
                    if name != base and name != sense_base:
                        appendOnce(synonyms, name)
                    # Antonyms
                    for antonym in lemma.antonyms():
                        name = antonym.name()
                        appendOnce(antonyms, name)
                info['synonyms'] = synonyms
                info['antonyms'] = antonyms
                # All the others
                for kind in kinds[2:]:
                    thing = []
                    #for item in getattr(sense, kind)():
                    for item in getattr(sense, kind)():
                        for lemma in item.lemmas(lang=language):
                            name, *_ = lemma.name().split('.')
                            appendOnce(thing, name)
                    info[kind] = thing
            results.append(WordNetInfo(sense_base, CATS[sense_pos], sense_number, info))
    return results
            

In [75]:
for line in WNDisplay(WNInfo('bon', NYMS, language='fra')): print(line)

well (Adv, 01)
    Synonyms:
        well alors bien puits
okay (Adv, 01)
    Synonyms:
        okay alors bien comme_ça est_bon proprement
very_well (Adv, 02)
    Synonyms:
        very_well bien OK
thoroughly (Adv, 02)
    Synonyms:
        thoroughly absolument complètement entièrement exhaustivement
beneficial (Sat, 01)
    Synonyms:
        beneficial
full (Sat, 06)
    Synonyms:
        full entier meilleur plein
proper (Sat, 04)
    Synonyms:
        proper adéquat approprié apte convenable correct exact juste
        propre
propitious (Adj, 01)
    Synonyms:
        propitious propice
right (Adv, 04)
    Synonyms:
        right droit
dear (Sat, 02)
    Synonyms:
        dear cher coûteux
correct (Adj, 01)
    Synonyms:
        correct corriger
    Antonyms:
        incorrect
smooth (Sat, 07)
    Synonyms:
        smooth lisse
good (Sat, 21)
    Synonyms:
        good meilleur
effective (Sat, 04)
    Synonyms:
        effective efficace
good (Sat, 13)
    Synonyms:
        good 

In [32]:
for synset in wn.synsets('dom', lang='pol'): print(synset.definition())

a dwelling that serves as living quarters for one or more families
aristocratic family line
where you live at a particular time


In [85]:
synsets = wn.synsets('good')
print(synsets)
synset = synsets[3]
dir(synset.lemmas()[0])

[Synset('good.n.01'), Synset('good.n.02'), Synset('good.n.03'), Synset('commodity.n.01'), Synset('good.a.01'), Synset('full.s.06'), Synset('good.a.03'), Synset('estimable.s.02'), Synset('beneficial.s.01'), Synset('good.s.06'), Synset('good.s.07'), Synset('adept.s.01'), Synset('good.s.09'), Synset('dear.s.02'), Synset('dependable.s.04'), Synset('good.s.12'), Synset('good.s.13'), Synset('effective.s.04'), Synset('good.s.15'), Synset('good.s.16'), Synset('good.s.17'), Synset('good.s.18'), Synset('good.s.19'), Synset('good.s.20'), Synset('good.s.21'), Synset('well.r.01'), Synset('thoroughly.r.02')]


['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_frame_ids',
 '_frame_strings',
 '_hypernyms',
 '_instance_hypernyms',
 '_key',
 '_lang',
 '_lex_id',
 '_lexname_index',
 '_name',
 '_related',
 '_synset',
 '_syntactic_marker',
 '_wordnet_corpus_reader',
 'also_sees',
 'antonyms',
 'attributes',
 'causes',
 'count',
 'derivationally_related_forms',
 'entailments',
 'frame_ids',
 'frame_strings',
 'hypernyms',
 'hyponyms',
 'in_region_domains',
 'in_topic_domains',
 'in_usage_domains',
 'instance_hypernyms',
 'instance_hyponyms',
 'key',
 'lang',
 'member_holonyms',
 'member_meronyms',
 'name',
 'part_holonyms',
 'part_meronyms',
 'pertainyms',
 'region_dom

In [65]:
def WNDisplay(wnis, indent=0, maxWidth=70):
    '''Return a list of lines containing nicely wrapped and formatted info in wni'''
    def wrapped(ind, s, hanging=False):
        '''Deal with the indentation and wrapping'''
        initialIndent = TAB * ind
        subsequentIndent = initialIndent + '  ' if hanging else initialIndent
        return wrap(s,
                    initial_indent=initialIndent,
                    subsequent_indent=subsequentIndent,
                    width=maxWidth)
    lines = []
    if not isinstance(wnis, list):
        wnis = [wnis]
    for wni in wnis:
        lines.extend(wrapped(indent, f'{wni.base} ({wni.pos}, {wni.number})'))
        for kind in wni.info.keys():
            info = wni.info[kind]
            if info:   # skip over empty ones
                lines.extend(wrapped(indent + 1, f'{kind.title().replace("_", " ")}:'))
                if kind == 'definition':
                    lines.extend(wrapped(indent + 2, info, hanging=True))
                elif kind == 'examples':
                    for example in info:
                        lines.extend(wrapped(indent + 2, example, hanging=True))
                else:
                    lines.extend(wrapped(indent + 2, ' '.join(info), hanging=False))
    return lines


In [9]:
for line in WNDisplay(WNInfo('good', NYMS)): print(line)

good (N, 01)
    Hypernyms:
        advantage
    Hyponyms:
        common_good
good (N, 02)
    Synonyms:
        goodness
    Antonyms:
        evil evilness
    Hypernyms:
        morality
    Hyponyms:
        beneficence benignity kindness saintliness summum_bonum virtue
good (N, 03)
    Synonyms:
        goodness
    Antonyms:
        bad badness
    Hypernyms:
        quality
    Hyponyms:
        benefit better desirability optimum wisdom worthiness
commodity (N, 01)
    Synonyms:
        commodity trade_good
    Hypernyms:
        artifact
    Hyponyms:
        basic consumer_goods drygoods entrant export fancy_goods
        fungible future import merchandise middling salvage shopping
        sporting_goods worldly_possession
good (Adj, 01)
    Antonyms:
        bad
full (Sat, 06)
    Synonyms:
        full
good (Adj, 03)
    Antonyms:
        evil
estimable (Sat, 02)
    Synonyms:
        estimable honorable respectable
beneficial (Sat, 01)
    Synonyms:
        beneficial
go

In [1]:
apple = 'apple'; pear='pear'; banana='banana'; water='water'

In [2]:
'We have {apple}s, {pear}s and {banana}s but no {water}'.format(apple=apple, pear=pear, banana=banana, water=water) 

'We have apples, pears and bananas but no water'

In [3]:
format('We have {apple}s, {pear}s and {banana}s but no {water}', apple=apple, pear=pear, banana=banana, water=water) 

TypeError: format() takes no keyword arguments

In [7]:
format(1986, ',d')

'1,986'

In [8]:
'{}, {}'.format('Jane', 'Bob')

'Jane, Bob'

In [4]:
pd = open('/home/jason/words/words_prons').readlines()

In [5]:
len(pd)

102305

In [6]:
pd[1000:1200]

['Argo\tˈɑrɡo\n',
 "Argo's\tˈɑrɡoz\n",
 'Argonaut\tˈɑrɡənˌɔt\n',
 "Argonaut's\tˈɑrɡənˌɔts\n",
 'Argonne\tˈɑrɡˌɑn, ˈɑrɡˌon\n',
 "Argonne's\n",
 'Argos\n',
 "Argos's\n",
 'Argus\tˈɑrɡəs\n',
 "Argus's\n",
 'Ariadne\tˌɛriˈædni\n',
 "Ariadne's\n",
 'Arianism\tˈæriənˌɪzəm\n',
 "Arianism's\n",
 'Ariel\tˈɛriəl\n',
 "Ariel's\n",
 'Aries\tˈɛriz\n',
 "Aries's\n",
 'Arieses\n',
 'Ariosto\n',
 "Ariosto's\n",
 'Aristarchus\n',
 "Aristarchus's\n",
 'Aristides\n',
 "Aristides's\n",
 'Aristophanes\tˌærəstˈɔfəniz\n',
 "Aristophanes's\n",
 'Aristotelian\tɝˌɪstətˈiliən\n',
 "Aristotelian's\n",
 'Aristotle\tˈɛrəstˌɑtəl\n',
 "Aristotle's\tˈɛrəstˌɑtəlz\n",
 'Arius\n',
 "Arius's\n",
 'Arizona\tˌɛrɪzˈonə\n',
 "Arizona's\tˌɛrɪzˈonəz\n",
 'Arizonan\tɝˈɪzonən, ɝɪzˈonən\n',
 "Arizonan's\n",
 'Arizonans\tɝˈɪzonənz, ɝɪzˈonənz\n',
 'Arizonian\n',
 "Arizonian's\n",
 'Arizonians\n',
 'Arjuna\n',
 "Arjuna's\n",
 'Arkansan\tˌɑrkˈænzən\n',
 "Arkansan's\n",
 'Arkansas\tˈɑrkənsˌɑ\n',
 "Arkansas's\tˈɑrkənsˌɑz\n",
 'Arkhangel

In [1]:
import configparser
config = configparser.ConfigParser()
config.read('/home/jason/.lexitronrc')

['/home/jason/.lexitronrc']

In [2]:
config.items()

ItemsView(<configparser.ConfigParser object at 0x7fe6a853d438>)

In [4]:
for item in config.items('DEFAULT'):
    print(item)

('lexicon', '/home/jason/words/english-words_prons.txt')
('icase', 'False')
('idiac', 'True')


In [5]:
config.get('DEFAULT', 'lexicon')

'/home/jason/words/english-words_prons.txt'

In [7]:
config.getint('DEFAULT', 'idiac')

ValueError: invalid literal for int() with base 10: 'True'

In [8]:
    from nltk.corpus import words
    wordlist = words.words()

In [9]:
len(wordlist)

236736

In [10]:
wordlist[3000:3100]

['advisably',
 'advisal',
 'advisatory',
 'advise',
 'advised',
 'advisedly',
 'advisedness',
 'advisee',
 'advisement',
 'adviser',
 'advisership',
 'advisive',
 'advisiveness',
 'advisor',
 'advisorily',
 'advisory',
 'advocacy',
 'advocate',
 'advocateship',
 'advocatess',
 'advocation',
 'advocator',
 'advocatory',
 'advocatress',
 'advocatrice',
 'advocatrix',
 'advolution',
 'advowee',
 'advowson',
 'ady',
 'adynamia',
 'adynamic',
 'adynamy',
 'adyta',
 'adyton',
 'adytum',
 'adz',
 'adze',
 'adzer',
 'adzooks',
 'ae',
 'Aeacides',
 'Aeacus',
 'Aeaean',
 'Aechmophorus',
 'aecial',
 'Aecidiaceae',
 'aecidial',
 'aecidioform',
 'Aecidiomycetes',
 'aecidiospore',
 'aecidiostage',
 'aecidium',
 'aeciospore',
 'aeciostage',
 'aecioteliospore',
 'aeciotelium',
 'aecium',
 'aedeagus',
 'Aedes',
 'aedicula',
 'aedile',
 'aedileship',
 'aedilian',
 'aedilic',
 'aedilitian',
 'aedility',
 'aedoeagus',
 'aefald',
 'aefaldness',
 'aefaldy',
 'aefauld',
 'aegagropila',
 'aegagropile',
 'aega

In [13]:
f = open('/home/jason/dev/src/python3/prondict/wordlists/nltk-corpus-words-words.txt', 'w')

In [14]:
for word in wordlist:
    print(word, file=f)

In [1]:
import lexicon
lex = lexicon.Lexicon('/home/jason/words/medium-combined_prons.txt')

In [2]:
lex.stats()

(299537, 35245, 38567)

In [5]:
lex = lexicon.Lexicon('/home/jason/words/medium-combined_prons.txt', diacBlind = True)

In [6]:
lex.contains('houses')

[]

In [7]:
lex.stats()

(299537, 35245, 38567)

In [11]:
lex.contains('housing')

['housing']

In [13]:
lex.contains('fiancé')

['fiance']

In [15]:
lex.definitions('fiancé')

[]

In [4]:
print(wn.subdir)

corpora


In [86]:
help(wn.langs)

Help on method langs in module nltk.corpus.reader.wordnet:

langs() method of nltk.corpus.reader.wordnet.WordNetCorpusReader instance
    return a list of languages supported by Multilingual Wordnet



In [16]:
wn.synsets('dom', lang='pol')

[Synset('house.n.01'), Synset('house.n.06'), Synset('home.n.01')]

In [13]:
for lg in wn.langs():
    print('Language=', lg)
    print(wn.synsets('house', lang=lg))

Language= eng
[Synset('house.n.01'), Synset('firm.n.01'), Synset('house.n.03'), Synset('house.n.04'), Synset('house.n.05'), Synset('house.n.06'), Synset('house.n.07'), Synset('sign_of_the_zodiac.n.01'), Synset('house.n.09'), Synset('family.n.01'), Synset('theater.n.01'), Synset('house.n.12'), Synset('house.v.01'), Synset('house.v.02')]
Language= als
[]
Language= arb
[]
Language= bul
[]
Language= cat
[]
Language= cmn
[]
Language= dan
[]
Language= ell
[]
Language= eus
[]
Language= fas
[]
Language= fin
[]
Language= fra
[]
Language= glg
[]
Language= heb
[]
Language= hrv
[]
Language= ind
[]
Language= ita
[]
Language= jpn
[]
Language= nld
[Synset('house.n.07')]
Language= nno
[]
Language= nob
[]
Language= pol
[]
Language= por
[]
Language= qcn
[]
Language= slv
[]
Language= spa
[]
Language= swe
[]
Language= tha
[]
Language= zsm
[]


In [87]:
def isPrefix(abb):
    '''Return a list of forms of which abb is a prefix'''
    full = ('Albania', 'Algeria', 'Azerbaijan', 'France', 'Frisia', 'Friuli', 'Germany', 'Ghana', 'Ghent')
    results = []
    for name in full:
            if name.startswith(abb):
                results.append(name)
    return results

In [95]:
isPrefix('Fris')

['Frisia']