In [52]:
import spacy

from spacy_wordnet.wordnet_annotator import WordnetAnnotator 

# Load an spacy model (supported models are "es" and "en") 
nlp = spacy.load('en_core_web_sm')
nlp.add_pipe(WordnetAnnotator(nlp.lang), after='tagger')
token = nlp('bank')[0]

# wordnet object link spacy token with nltk wordnet interface by giving acces to
# synsets and lemmas 
token._.wordnet.synsets()
token._.wordnet.lemmas()

# And automatically tags with wordnet domains
token._.wordnet.wordnet_domains()

['geology',
 'skiing',
 'geography',
 'diplomacy',
 'book_keeping',
 'administration',
 'numismatics',
 'politics',
 'betting',
 'banking',
 'insurance',
 'social',
 'money',
 'finance',
 'post',
 'law',
 'commerce',
 'enterprise',
 'time_period',
 'industry',
 'economy',
 'tax',
 'philately',
 'exchange',
 'money',
 'finance',
 'betting',
 'card',
 'transport',
 'skiing',
 'town_planning',
 'money',
 'banking',
 'astronomy',
 'aviation',
 'basketball',
 'gas',
 'basketball',
 'transport',
 'aviation',
 'transport',
 'cycling',
 'banking',
 'economy',
 'book_keeping',
 'enterprise',
 'volleyball',
 'card',
 'betting',
 'banking',
 'badminton',
 'finance',
 'banking',
 'exchange',
 'banking',
 'economy']

In [50]:
# Imagine we want to enrich the following sentence with synonyms
sentence = nlp('colt',)

# spaCy WordNet lets you find synonyms by domain of interest
# for example economy
_domains = ['animals', 'military']
_sentence = []

# For each token in the sentence
for token in sentence:
    # We get those synsets within the desired domains
    synsets = token._.wordnet.wordnet_synsets_for_domain(_domains)
    if synsets:
        lemmas_for_synset = []
        for s in synsets:
            # If we found a synset in the economy domains
            # we get the variants and add them to the enriched sentence
            lemmas_for_synset.extend(s.lemma_names())
            _sentence.append('({})'.format('|'.join(set(lemmas_for_synset))))
    else:
        _sentence.append(token.text)

In [51]:
print(' '.join(_sentence))

colt


In [None]:
"coffee", "apple","crayfish"

In [40]:
from nltk.corpus import wordnet as wn

In [44]:
animals = wn.synset('animal.n.00')

In [45]:
list(set([w for s in animals.closure(lambda s:s.hyponyms()) for w in s.lemma_names()]))

['gallinaceous_bird',
 'miniature_pinscher',
 'leatherfish',
 'megatherian',
 'kangaroo_bear',
 'Asiatic_flying_squirrel',
 'coral_snake',
 'blacksnake',
 'tigon',
 'hoactzin',
 'Coluber_constrictor_flaviventris',
 'Ectopistes_migratorius',
 'prairie_wolf',
 'blenny',
 'alligator_lizard',
 'throstle',
 'yellow_gurnard',
 'big_game',
 'white_admiral',
 'snakefly',
 'sea_squirt',
 'kitty-cat',
 'German_police_dog',
 'raptorial_bird',
 'sheep_ked',
 'horned_lizard',
 'Paralithodes_camtschatica',
 'babirusa',
 'sulfur_butterfly',
 'Diodon_holocanthus',
 'myna_bird',
 'Balanus_balanoides',
 'raccoon_dog',
 'barunduki',
 'Arizona_elegans',
 'shovelhead',
 'bottle-nosed_whale',
 'Calamus_penna',
 'greenfly',
 'pouched_rat',
 'Hylactophryne_augusti',
 'nightjar',
 'upland_plover',
 'Chlorura_chlorura',
 'piglet',
 'sausage_hound',
 'blastocyst',
 'Egretta_thula',
 'Varanus_niloticus',
 'Damaliscus_lunatus',
 'pinche',
 'nurse_shark',
 'Anthus_pratensis',
 'tortricid',
 'Martes_martes',
 'Holst

In [80]:
sport = wn.synset('sport.n.01')

In [81]:
list(set([w for s in sport.closure(lambda s:s.hyponyms()) for w in s.lemma_names()]))

['rowing',
 'netball',
 'showjumping',
 'broad_jump',
 'sparring',
 'softball',
 'piaffe',
 'baseball_game',
 'spar',
 'sumo',
 'three-day_event',
 'stadium_jumping',
 'skateboarding',
 'belly_flop',
 'hockey_game',
 'rugby_football',
 'perfect_game',
 'tauromachy',
 'match_play',
 'water-skiing',
 'greyhound_racing',
 'one-hitter',
 'stroke_play',
 'basketball',
 'squash',
 'surf_casting',
 'roller_skating',
 'fight',
 'rugger',
 'field_game',
 'volleyball',
 'angling',
 'boat_racing',
 'pushball',
 'hunting',
 'day_game',
 'professional_golf',
 'fisticuffs',
 'snorkeling',
 'diving',
 'tumbling',
 'cross-country_skiing',
 'flip',
 'ducking',
 'riding',
 'pugilism',
 'floating',
 'gymnastics',
 'lacrosse',
 'belly_flopper',
 'stickball',
 'pigsticking',
 'foxhunt',
 'singles',
 'fly_casting',
 'equestrian_sport',
 'swimming',
 'American_football_game',
 'professional_football',
 'bait_casting',
 'hardball',
 'three-hitter',
 'swallow_dive',
 'in-fighting',
 'deer_hunt',
 'tobogganing'

In [82]:
person = wn.synset('person.n.01')
list(set([w for s in person.closure(lambda s:s.hyponyms()) for w in s.lemma_names()]))

['screw',
 'epileptic',
 'stoolpigeon',
 'mimicker',
 'tinner',
 'fashion_model',
 'flash_in_the_pan',
 'East-sider',
 'gossiper',
 'pluralist',
 'Aramean',
 'expert_witness',
 'negotiatrix',
 'jilt',
 'orientalist',
 'straw_man',
 'granter',
 'bondsman',
 'fry_cook',
 'behaviourist',
 'tapper',
 'sniveller',
 'survivalist',
 'accused',
 'caffre',
 'nun',
 'aphorist',
 'estimator',
 'lithomancer',
 'Cyprian',
 'rigger',
 'small_fry',
 'Levantine',
 'Grecian',
 'census_taker',
 'cheesemonger',
 'mill-girl',
 'liege_subject',
 'diseased_person',
 'lexicologist',
 'baby_farmer',
 'Dorian',
 'adult_female',
 'psychopath',
 'Ponca',
 'snuffler',
 'co-pilot',
 'recruiter',
 'foster-mother',
 'International_Grandmaster',
 'mole',
 'housekeeper',
 'right_hander',
 'Elamite',
 'landlady',
 'spree_killer',
 'speleologist',
 'lulu',
 'Kampuchean',
 'cigarette_smoker',
 'esquire',
 'paleographer',
 'mining_engineer',
 'stoolie',
 'urinator',
 'Utahan',
 'Methuselah',
 'dog_in_the_manger',
 'minion

In [None]:
# A thousand domains
# A thousand domains + synonyms (explore synonyms better)
# Lematization of words
# Filter out least occuring words 