# ILN3: WSD. El Algoritmo de Lesk

In [1]:
# Alejandro Marco Palomares y Daniela Márquez

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.corpus import wordnet as wn

def wsd(sentence):
    
    #set of words in the sentence
    context = set(word_tokenize(sentence))
    
    #remove stop_words from sentence
    non_stop_words_context = []
    for word in context:
        if word.lower() not in stopwords.words('english'):
            non_stop_words_context.append(word.lower())
    
    for word in non_stop_words_context:
        
        #by default best sense is null
        best_sense = None
        #by default overlap is 0 
        max_overlap = 0
        #obtain senses for a word
        word_senses = wn.synsets(word)

        for sense in word_senses:
            
            signature = obtain_Signature(sense)
            
            overlap = do_Overlap(signature, context)
            
            if overlap > max_overlap:
                max_overlap = overlap
                best_sense = sense
        
        if(best_sense is not None):
            print("... Analizando " + word + " (" + str(len(word_senses)) + " sentidos) " +
                  "--> " + str(best_sense) + " con solape " + str(max_overlap))
            print(str(best_sense.definition()))
        
        print("                                                      ")
            
            
def obtain_Signature(sense):
    #signature -> considering definitions and examples of synsets 
    definition = set(word_tokenize(sense.definition().lower()))
    for example in sense.examples(): 
        definition = definition.union(set(word_tokenize(example.lower())))
    return definition

def do_Overlap(signature, context):
    #get length of overlaps:
    overlaps = signature.intersection(context)
    return len(overlaps)


In [2]:
wsd("We can be heroes just for one day")
     
#Solución para: "We can be heroes just for one day"
#... Analizando heroes (7 sentidos) --> hero.n.01 con solape 1
#a man distinguished by exceptional courage and nobility and strength
#... Analizando one (9 sentidos) --> one.s.05 con solape 3
#indefinite in time or position
#... Analizando day (10 sentidos) --> day.n.05 con solape 3
#the recurring hours when you are not sleeping (especially those when you are working)

... Analizando one (9 sentidos) --> Synset('one.s.05') con solape 2
indefinite in time or position
                                                      
... Analizando heroes (7 sentidos) --> Synset('hero.n.01') con solape 1
a man distinguished by exceptional courage and nobility and strength
                                                      
... Analizando day (10 sentidos) --> Synset('sidereal_day.n.01') con solape 3
the time for one complete rotation of the earth relative to a particular star, about 4 minutes shorter than a mean solar day
                                                      


In [3]:
wsd("I saw a man who is 98 years old and can still walk and tell jokes")

... Analizando old (9 sentidos) --> Synset('old.a.01') con solape 3
(used especially of persons) having lived for a relatively long time or attained a specific age
                                                      
... Analizando man (13 sentidos) --> Synset('man.n.01') con solape 4
an adult person who is male (as opposed to a woman)
                                                      
... Analizando still (18 sentidos) --> Synset('still.n.03') con solape 3
an apparatus used for the distillation of liquids; consists of a vessel in which a substance is vaporized by heat and a condenser where the vapor is condensed
                                                      
... Analizando tell (9 sentidos) --> Synset('assure.v.02') con solape 5
inform positively and with certainty and confidence
                                                      
... Analizando jokes (6 sentidos) --> Synset('joke.v.01') con solape 3
tell a joke; speak humorously
                                      