In [1]:
# importing required libraries

import nltk
from nltk.corpus import wordnet
from nltk.corpus import stopwords

In [8]:
def remove_stpwords(sent):
    stp_wrds = set(stopwords.words('english'))    # retrieving unique stop words in english.
    wrd_tkns = nltk.word_tokenize(sentence)       # tokenize sentence passed to this fn.
    
    filtered_sentence = []                  # initializing an empty list for storing key words (excluding stop words).
    
    for wrd in wrd_tkns:
        if wrd not in stp_wrds:             # if the word is a stop word it will not be added to the filtered list.
            filtered_sentence.append(wrd)
            
    return filtered_sentence                # returning the list without stop-words of english language.

In [34]:
# LESK algo returns the best definition of the sense in which the words are supposed to be used.
# it returns a list which contains Tuples with word and its definition.
# E.g.
# [
#     (word1, best definition of word1), - Tuple 1
#     (word2, best definition of word2), - Tuple 2
#     ...
# ]

# List of parameters passed to the LESK algo fn.
# words: the list of words for which sense definition needs to be found.
# sents: the sentence in which the association b/w word and sense will be found.

def lesk_algo(words, sent):
    wrd_def = []                           # var in which word and its definition will be stored.
    split_sent = remove_stpwords(sent.lower())
        
    for word in words:                     # finding synsets for each word passed in the 'words' variable.
    # {
        max_overlap = -1                   # initializing variable.
        best_sense = ''                    # initializing variable.
        
        for sense in wordnet.synsets(word.lower()):
        # {
            overlap = len(                    # finding the length of intersected words and assigning it to a var.
                set(                          
                    nltk.word_tokenize(sense.definition()) # finding the length of unique words in the sense 
                                                           # definition text.
                ).intersection(                            # finding the words which intersets with the words
                    split_sent                             # retrieved from the sentences passed to this fn.
                )
            )

            if overlap > max_overlap:            # if another sense has more common words, then new sense is 
                max_overlap = overlap            # assigned to the best_sense variable to hold the sense till a 
                best_sense = sense.definition()  # better one is identified.
        # } End of sense for-loop.
        
        wrd_def.append(best_sense)
    # } End of word for-loop
    
    return list(zip(words, wrd_def))


In [27]:
sentence = "The frog is jumping around the bank of the river"
words = ['bank', 'jumping', 'leaping', 'frog']

In [35]:
lesk_algo(words, sentence)

[('bank', 'enclose with a bank'),
 ('jumping', 'the act of jumping; propelling yourself off the ground'),
 ('leaping', 'a light, self-propelled movement upwards or forwards'),
 ('frog',
  'any of various tailless stout-bodied amphibians with long hind limbs for leaping; semiaquatic and terrestrial species')]