In [1]:
# importing required libraries

import nltk
from nltk.corpus import wordnet
from nltk.corpus import stopwords

In [63]:
def remove_stpwords(sent):
#{
    stp_wrds = set(stopwords.words('english'))    # retrieving unique stop words in english.
    wrd_tkns = nltk.word_tokenize(sent)       # tokenize sentence passed to this fn.
    
    filtered_sentence = []                  # initializing an empty list for storing key words (excluding stop words).
    
    for wrd in wrd_tkns:
    #{
        if wrd not in stp_wrds:             # if the word is a stop word it will not be added to the filtered list.
            if len(wrd) > 2:
                filtered_sentence.append(wrd)
    #} End of 'wrd' for-loop
            
    return filtered_sentence                # returning the list without stop-words of english language.
#}

In [48]:
def get_sense_def(wrd_lst):
#{
    sense_def_wrd_lst = []
    lst_1d = []
    
    for wrd in wrd_lst:
    #{
        for sense in wordnet.synsets(wrd.lower()):
            sense_def_wrd_lst.append(remove_stpwords(sense.definition()))
    #}
          
    # creating 1 non nested list
    for lst in sense_def_wrd_lst:
    #{
        for w in lst:
            lst_1d.append(w)
    #}
    
    return set(lst_1d)
#}

In [73]:
def get_best_sense(wrd, company):
#{
    sense_def_wrd_set = get_sense_def(company)
    
    max_overlap = -1                   # initialized as -1, cause overlap may remain 0 after finding the 
                                       # intersection b/w sense definition and the word, for all the senses.
                                       # In such a case no sense definition will be returned. Hence, initializing 
                                       # it as -1 so that the 1st sense is returned. 
                                       # (value of overlap = 0 and max_overlap = -1).
    best_sense = ''                    # initializing variable.
    
    #for sense in wordnet.synsets('jump'):
    #    print(sense.definition())
        
    for sense in wordnet.synsets(wrd.lower()):
    #{
        overlap = len(set(remove_stpwords(sense.definition())).intersection(sense_def_wrd_set))
        
        #if wrd.lower() == 'frog':
        #    print('%s \n %s \n %s \n %s \n %i' % (wrd, company, sense.definition(), sense_def_wrd_set, overlap))
        
        if overlap > max_overlap:            # if another sense has more common words, then new sense is 
            max_overlap = overlap            # assigned to the best_sense variable to hold the sense till a 
            best_sense = sense.definition()  # better one is identified.
    #} End of 'sense' for-loop
            
    return best_sense
#}

In [49]:
# LESK algo returns the best definition of the sense in which the words are supposed to be used.
# it returns a list which contains Tuples with word and its definition.
# E.g.
# [
#     (word1, best definition of word1), - Tuple 1
#     (word2, best definition of word2), - Tuple 2
#     ...
# ]

# List of parameters passed to the LESK algo fn.
# sents: the sentence in which the association b/w word and sense will be found.

def lesk_algo_sent(sent):
#{
    wrd_def = []                           # var in which word and its definition will be stored.
    split_sent = remove_stpwords(sent.lower())     # splitting the sentence and removing the stop words.
    
    wrd_lst = []
    prev_next_wrd = []
    
    for key, wrd in enumerate(split_sent):
    #{
        if key == 0:
            prev_next_wrd = [split_sent[key+1]]
            
        elif key > 0 and key < (len(split_sent)-1):
            prev_next_wrd = [split_sent[key-1], split_sent[key+1]]
            
        elif key == (len(split_sent)-1):
            prev_next_wrd = [split_sent[key-1]]
            
        wrd_lst.append(tuple([prev_next_wrd, wrd]))
    #} End of 'enumeration' for-loop
    
    for word in wrd_lst:
    # {
        wrd_def.append(get_best_sense(word[1], word[0]))
    # } End of 'word' for-loop
    
    return list(zip(split_sent, wrd_def))
#}

In [77]:
sentence = "The frog is jumping around the bank of the river"
words = ['bank', 'jumping', 'leaping', 'frog']

In [79]:
# ['frog', 'jumping', 'around', 'bank', 'river']
lesk_algo_sent(sentence)

[('frog',
  'any of various tailless stout-bodied amphibians with long hind limbs for leaping; semiaquatic and terrestrial species'),
 ('jumping',
  'the act of participating in an athletic competition in which you must jump'),
 ('around', 'in the area or vicinity'),
 ('bank', 'sloping land (especially the slope beside a body of water)'),
 ('river', 'a large natural stream of water (larger than a creek)')]