In [1]:
from nltk.corpus import wordnet as wn
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
import nltk

wordnet_lemmatizer = WordNetLemmatizer()

stop_word_list = stopwords.words('english')

# input should be a string
def nlp_pipeline(text):
    
    # if you want you can split in sentences - i'm usually skipping this step
    # text = nltk.sent_tokenize(text) 
    
    #tokenize words for each sentence
    text = nltk.word_tokenize(text)
    
    # pos tagger
    text = nltk.pos_tag(text)

    # lemmatizer
    text = [wordnet_lemmatizer.lemmatize(token.lower(),"v") if pos[0] == "V" else wordnet_lemmatizer.lemmatize(token.lower()) for token,pos in text]
    
    # remove punctuation and numbers
    text = [token for token in text if token.isalpha()]
    
    # remove stopwords - be careful with this step    
    text = [token for token in text if token not in stop_word_list]

    return text

In [7]:
# let's consider two sentences where "cell" is mentioned

sent1 = "The terrorist cell was neutralized near the southern Russian city of Makhachkala, the capital of the Republic of Dagestan."

sent2 = "The molecule, which uses light energy to move protons across a somatic cell membrane, proved unsuitable for crystallography."

# you clean the sentences using our pipeline
clean_sent1 = nlp_pipeline(sent1)

clean_sent2 = nlp_pipeline(sent2)

print ("clean sent 1 (terrorist cell):", clean_sent1)
print ("clean sent 2 (biological cell):", clean_sent2)
print (" ")

# for each possible sense of "cell" you can, for instance, check the overlap between the definition and the sentence

clean sent 1 (terrorist cell): ['terrorist', 'cell', 'neutralize', 'near', 'southern', 'russian', 'city', 'makhachkala', 'capital', 'republic', 'dagestan']
clean sent 2 (biological cell): ['molecule', 'use', 'light', 'energy', 'move', 'proton', 'across', 'somatic', 'cell', 'membrane', 'prove', 'unsuitable', 'crystallography']
 


In [5]:
# the first thing I tried is looking for the overlap of words between the definition and the sentence

word = "cell"

senses = wn.synsets(word)

for sense in senses:
    # get definition of sense
    definition =  sense.definition()
    
    # you clean the definition with our pipeline
    clean_definition = nlp_pipeline(definition)
    
    # you check the intersection of the two sentences
    inters_1 = set(clean_sent1).intersection(clean_definition)
    inters_2 = set(clean_sent2).intersection(clean_definition)
    
    print (definition)
    print ("clean definition:", clean_definition)
    print ("intersection with terrorist cell sentence:", inters_1)
    print ("intersection with biological cell sentence:", inters_2)
    print (len(inters_1),len(inters_2))
    print (" ")

any small compartment
clean definition: ['small', 'compartment']
intersection with terrorist cell sentence: set()
intersection with biological cell sentence: set()
0 0
 
(biology) the basic structural and functional unit of all organisms; they may exist as independent units of life (as in monads) or may form colonies or tissues as in higher plants and animals
clean definition: ['biology', 'basic', 'structural', 'functional', 'unit', 'organism', 'may', 'exist', 'independent', 'unit', 'life', 'monad', 'may', 'form', 'colony', 'tissue', 'higher', 'plant', 'animal']
intersection with terrorist cell sentence: set()
intersection with biological cell sentence: set()
0 0
 
a device that delivers an electric current as the result of a chemical reaction
clean definition: ['device', 'deliver', 'electric', 'current', 'result', 'chemical', 'reaction']
intersection with terrorist cell sentence: set()
intersection with biological cell sentence: set()
0 0
 
a small unit serving as part of or as the nu

As you can see, that doesn't work very well because the are no words in common between the definitions and the sentences. So I tried a different thing, instead of using only the definition I also took into account hypernyms, hyponyms and synonyms to expand the vocabulary of each sense

This way -- at least -- you can capture a couple of words, but you can see how hard it is when you look simply for words in common. 

In [6]:
for sense in senses:
    # get definition of sense
    definition = sense.definition()    
    
    # take all hypernyms, hyponyms and synonyms - you need to do a bit of cleaning
    hypernyms = [hyper.lemmas()[0].name().replace("_"," ") for hyper in sense.hypernyms()] 
    
    hyponyms = [hypon.lemmas()[0].name().replace("_"," ") for hypon in sense.hyponyms()] 
        
    synonyms = [synon.replace("_"," ") for synon in sense.lemma_names()] 
    
    # you concatenate all of them - check out online how to use "join" to connect elements of a list
    sense_text = sense.definition() + " ".join(sense.examples())  + " ".join(hypernyms)+ " ".join(hyponyms)+ " ".join(synonyms)

    # now you have a very long definition, which uses all these pieces of information
    clean_definition = nlp_pipeline(sense_text)
    

    inters_1 = set(clean_sent1).intersection(clean_definition)
    inters_2 = set(clean_sent2).intersection(clean_definition)    
    
    print (definition)
    print ("clean definition:", clean_definition)
    print ("intersection with terrorist cell sentence:", inters_1)
    print ("intersection with biological cell sentence:", inters_2)
    print (len(inters_1),len(inters_2))
    print (" ")

any small compartment
clean definition: ['small', 'compartmentthe', 'cell', 'honeycombcompartmentcell']
intersection with terrorist cell sentence: {'cell'}
intersection with biological cell sentence: {'cell'}
1 1
 
(biology) the basic structural and functional unit of all organisms; they may exist as independent units of life (as in monads) or may form colonies or tissues as in higher plants and animals
clean definition: ['biology', 'basic', 'structural', 'functional', 'unit', 'organism', 'may', 'exist', 'independent', 'unit', 'life', 'monad', 'may', 'form', 'colony', 'tissue', 'higher', 'plant', 'animalsliving', 'thingakaryocyte', 'archespore', 'arthrospore', 'arthrospore', 'beta', 'cell', 'blastema', 'blastomere', 'daughter', 'cell', 'embryonic', 'cell', 'fiber', 'flagellate', 'cell', 'gametocyte', 'kupffer', 'cell', 'leydig', 'cell', 'mother', 'cell', 'parthenote', 'plant', 'cell', 'polar', 'body', 'recombinant', 'reproductive', 'cell', 'sertoli', 'cell', 'somatic', 'cell', 'zygotec