In [1]:
import nltk
nltk.download('wordnet')

%pip install pywsd
from pywsd.lesk import simple_lesk


[nltk_data] Downloading package wordnet to /Users/chloe/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Note: you may need to restart the kernel to use updated packages.


Warming up PyWSD (takes ~10 secs)... took 6.238192081451416 secs.


In [2]:
from nltk.corpus import wordnet as wn
from nltk.wsd import lesk

def lesk(context_sentence, ambiguous_word, pos=None, synsets=None):
    context = set(context_sentence)
    if synsets is None:
        synsets = wn.synsets(ambiguous_word)

    if pos:
        synsets = [ss for ss in synsets if str(ss.pos()) == pos]

    if not synsets:
        return None

    _, sense = max(
    (len(context.intersection(ss.definition().split())), ss) for ss in synsets
    )
    return sense

In [3]:
ambiguous_word1 = 'raise'
ambiguous_word2 = 'raise'
context_sentence1 = "They agreed to raise an annual sum of £200 for the expenses of their commonwealth; they assigned their governor a salary of £20."
context_sentence2 = "Sweeney was raised in northwestern Idaho in the state's Panhandle region along the Washington border, at a rural lakeside home that her family has inhabited for five generations."

print(lesk(context_sentence1, ambiguous_word1, pos=None, synsets=None))
print(lesk(context_sentence2, ambiguous_word2, pos=None, synsets=None))


Synset('raise.v.23')
Synset('raise.v.24')


In [4]:
for ss in wn.synsets('raise'):
    print(ss, ss.definition())

Synset('raise.n.01') the amount a salary is increased
Synset('ascent.n.01') an upward slope or grade (as in a road)
Synset('raise.n.03') increasing the size of a bet (as in poker)
Synset('lift.n.12') the act of raising something
Synset('raise.v.01') raise the level or amount of something
Synset('raise.v.02') raise from a lower to a higher position
Synset('raise.v.03') cause to be heard or known; express or utter
Synset('raise.v.04') collect funds for a specific purpose
Synset('grow.v.07') cultivate by growing, often involving improvements by means of agricultural techniques
Synset('rear.v.02') bring up
Synset('raise.v.07') summon into action or bring into existence, often as if by magic
Synset('lift.v.03') move upwards
Synset('raise.v.09') construct, build, or erect
Synset('arouse.v.01') call forth (emotions, feelings, and responses)
Synset('raise.v.11') create a disturbance, especially by making a great noise
Synset('lift.v.10') raise in rank or condition
Synset('enhance.v.01') increa

In [5]:
raise_sents = ["They agreed to raise an annual sum of £200 for the expenses of their commonwealth; they assigned their governor a salary of £20",
"She was born and raised in northwestern Idaho in the state's Panhandle region along the Washington border."]

print("======== TESTING simple_lesk ===========\n")
from pywsd.lesk import simple_lesk
print("#TESTING simple_lesk() ...")
print("Context:", raise_sents[0])
answer = simple_lesk(raise_sents[0],'raise')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("#TESTING simple_lesk() with POS ...")
print("Context:", raise_sents[1])
answer = simple_lesk(raise_sents[1],'raise','v')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("======== TESTING adapted_lesk ===========\n")
from pywsd.lesk import adapted_lesk
print("#TESTING adapted_lesk() ...")
print("Context:", raise_sents[0])
answer = adapted_lesk(raise_sents[0],'raise')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("#TESTING adapted_lesk() with pos, stem, nbest and scores.")
print("Context:", raise_sents[0])
answer = adapted_lesk(raise_sents[0],'raise','v', True, \
                     nbest=True, keepscore=True)
print("Senses ranked by #overlaps:", answer)
best_sense = answer[0][1]
definition = best_sense.definition()
print("Definition:", definition)
print()

print("======== TESTING adapted_lesk ===========\n")
from pywsd.lesk import adapted_lesk
print("#TESTING adapted_lesk() ...")
print("Context:", raise_sents[1])
answer = adapted_lesk(raise_sents[1],'raise')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("#TESTING adapted_lesk() with pos, stem, nbest and scores.")
print("Context:", raise_sents[1])
answer = adapted_lesk(raise_sents[1],'raise','v', True, \
                     nbest=True, keepscore=True)
print("Senses ranked by #overlaps:", answer)
best_sense = answer[0][1]
definition = best_sense.definition()
print("Definition:", definition)
print()


#TESTING simple_lesk() ...
Context: They agreed to raise an annual sum of £200 for the expenses of their commonwealth; they assigned their governor a salary of £20
Sense: Synset('raise.v.01')
Definition: raise the level or amount of something

#TESTING simple_lesk() with POS ...
Context: She was born and raised in northwestern Idaho in the state's Panhandle region along the Washington border.
Sense: Synset('grow.v.07')
Definition: cultivate by growing, often involving improvements by means of agricultural techniques


#TESTING adapted_lesk() ...
Context: They agreed to raise an annual sum of £200 for the expenses of their commonwealth; they assigned their governor a salary of £20
Sense: Synset('raise.v.01')
Definition: raise the level or amount of something

#TESTING adapted_lesk() with pos, stem, nbest and scores.
Context: They agreed to raise an annual sum of £200 for the expenses of their commonwealth; they assigned their governor a salary of £20
Senses ranked by #overlaps: [(2, Sy

In [6]:
print("======== TESTING cosine_lesk ===========\n")
from pywsd.lesk import cosine_lesk

print("#TESTING cosine_lesk() ...")
print("Context:", raise_sents[0])
answer = cosine_lesk(raise_sents[0],'raise')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("#TESTING cosine_lesk() with nbest results...")
print("Context:", raise_sents[0])
answer = cosine_lesk(raise_sents[0],'raise', nbest=True)
print("Senses ranked by #overlaps:", answer)
best_sense = answer[0][1]
definition = best_sense.definition()
print("Definition:", definition)
print()

print("======== TESTING baseline ===========\n")
from pywsd.baseline import random_sense, first_sense
from pywsd.baseline import max_lemma_count as most_frequent_sense

print("#TESTING random_sense() ...")
print("Context:", raise_sents[0])
answer = random_sense('raise')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("#TESTING first_sense() ...")
print("Context:", raise_sents[0])
answer = first_sense('raise')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("#TESTING most_frequent_sense() ...")
print("Context:", raise_sents[0])
answer = most_frequent_sense('raise')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("======== TESTING similarity ===========\n")
from pywsd.similarity import max_similarity

for sim_choice in ["path", "lch", "wup", "res", "jcn", "lin"]:
    print("Context:", raise_sents[1])
    print("Similarity:", sim_choice)
    answer = max_similarity(raise_sents[1], 'raise', sim_choice, pos="n")
    print("Sense:", answer)
    definition = answer.definition()
    print("Definition:", definition)
    print()

print("#TESTING cosine_lesk() ...")
print("Context:", raise_sents[1])
answer = cosine_lesk(raise_sents[1],'raise')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("#TESTING cosine_lesk() with nbest results...")
print("Context:", raise_sents[1])
answer = cosine_lesk(raise_sents[1],'raise', nbest=True)
print("Senses ranked by #overlaps:", answer)
best_sense = answer[0][1]
definition = best_sense.definition()
print("Definition:", definition)
print()

print("======== TESTING baseline ===========\n")
from pywsd.baseline import random_sense, first_sense
from pywsd.baseline import max_lemma_count as most_frequent_sense

print("#TESTING random_sense() ...")
print("Context:", raise_sents[1])
answer = random_sense('raise')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("#TESTING first_sense() ...")
print("Context:", raise_sents[1])
answer = first_sense('raise')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("#TESTING most_frequent_sense() ...")
print("Context:", raise_sents[1])
answer = most_frequent_sense('raise')
print("Sense:", answer)
definition = answer.definition()
print("Definition:", definition)
print()

print("======== TESTING similarity ===========\n")
from pywsd.similarity import max_similarity

for sim_choice in ["path", "lch", "wup", "res", "jcn", "lin"]:
    print("Context:", raise_sents[1])
    print("Similarity:", sim_choice)
    answer = max_similarity(raise_sents[1], 'raise', sim_choice, pos="n")
    print("Sense:", answer)
    definition = answer.definition()
    print("Definition:", definition)
    print()    


#TESTING cosine_lesk() ...
Context: They agreed to raise an annual sum of £200 for the expenses of their commonwealth; they assigned their governor a salary of £20
Sense: Synset('raise.v.22')
Definition: establish radio communications with

#TESTING cosine_lesk() with nbest results...
Context: They agreed to raise an annual sum of £200 for the expenses of their commonwealth; they assigned their governor a salary of £20
Senses ranked by #overlaps: [(0.12666009927622474, Synset('raise.v.22')), (0.057670983608159435, Synset('arouse.v.01')), (0.03993615319154358, Synset('raise.v.03')), (0.0335012605086404, Synset('raise.n.01')), (0.024140227479263383, Synset('promote.v.02')), (0.0, Synset('resurrect.v.01')), (0.0, Synset('recruit.v.03')), (0.0, Synset('rear.v.02')), (0.0, Synset('raise.v.25')), (0.0, Synset('raise.v.24')), (0.0, Synset('raise.v.23')), (0.0, Synset('raise.v.21')), (0.0, Synset('raise.v.20')), (0.0, Synset('raise.v.19')), (0.0, Synset('raise.v.17')), (0.0, Synset('raise.v.1

In [None]:
# L'algorithme de Lesk compare la similarité des contextes en ne se basant que sur la définition des 2 mots à comparer
# alors que Cosine Lesk se base sur une mesure de similarité qui prend en compte la similarité cosinus entre le vecteur des contextes de mots. 
# Cosine Lesk peut être plus précis que Lesk, mais à condition que la représentation vectorielle utilisée soit de bonne qualité et qu'il y ait assez de données d'entraînement pour effectuer le test.
# Comme par exemple dans les tests effectués ci-dessus, nous pouvons voir que Simple Lesk n'a fourni que de brèves informations sur le mot "raise", c'est-à-dire une définition ainsi qu'un exemple de synset similaire. 
# D'autant plus qu'il a également fourni des synsets qui n'étaient pas forcément cohérents avec le sens du mot dans le contexte de la phrase.
# Alors que Cosine Lesk a fourni des informations plus précises sur le mot "raise", c'est-à-dire une définition ainsi qu'un exemple de synset similaire, et finalement des synsets plus cohérents.

In [None]:
# La désambiguisation lexicale est un processus visant à donner le sens attribué à un mot enn fonction du contexte dans lequel il est utilisé. 
# Word Net est une base de donnée qui organise les mots en synsets et donne des définitions ainsi que des exemples de relations sémantiques entre les synsets. 
# Cet outil est utile pour la classification sémantique, la recherche de défintions, de synonymes, d'hyponymes et d'hyperonymes. 
# Un thésaurus est aussi une base de données linguistiques, sauf qu'elle est souvent créée de manière plus subjective et peut contenir des informations quand au contexte, 
# aux connotations etc. 
# La différence avec Wordnet est que Wordnet est plus objectif que le Thesaurus, il est plus basé sur les données lexicales que le Thesaurus qui est plus basé sur les connotations.