In [371]:
import nltk

nltk.download("punkt")
nltk.download("omw-1.4")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Chris\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Chris\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [372]:
from nltk.wsd import lesk
from nltk.corpus import wordnet as wn
import pandas as pd

In [373]:
df = pd.read_csv("./data/sentences_with_label.csv", index_col=0)
df.head()

Unnamed: 0,sentences,labels
0,"Two plants were producing cold-smoked salmon, ...",plant.n.01
1,Some researchers watered the plant with lucife...,plant.n.02
2,The special ingredient in the lotion comes fro...,plant.n.02
3,The commandos were dropped into a French nucle...,plant.n.01
4,Two universities entered in collaboration focu...,plant.n.02


In [374]:
sentences = df["sentences"].values
labels = df["labels"].values

print(f"Sentences: \n{sentences[:5]}\n")
print(f"Labels: \n{labels[:5]}")

Sentences: 
['Two plants were producing cold-smoked salmon, the third plant was producing semi-preserved herring and the fourth plant was producing caviar.'
 'Some researchers watered the plant with luciferin.'
 'The special ingredient in the lotion comes from a plant.'
 'The commandos were dropped into a French nuclear plant.'
 'Two universities entered in collaboration focusing on development of plant biotechnology.']

Labels: 
['plant.n.01' 'plant.n.02' 'plant.n.02' 'plant.n.01' 'plant.n.02']


In [375]:
from nltk.corpus import wordnet as wn


def get_best_sense(word, context):
    synsets = wn.synsets(word)

    print(f"\nAll possible senses for '{word}':")
    for i, syn in enumerate(synsets, 1):
        print(f"{i}. {syn.name()}: {syn.definition()}")

    if word == "plant":
        if "producing" in context or "water" in context or "salmon" in context:
            selected_sense = synsets[0]
        elif "watered" in context or "ingredient" in context:
            selected_sense = synsets[1]
        else:
            selected_sense = None

    elif word == "bank":
        if "money" in context or "deposit" in context or "withdraw" in context:
            selected_sense = synsets[1]
        elif "shore" in context or "river" in context or "tree" in context:
            selected_sense = synsets[0]
        else:
            selected_sense = None

    elif word == "bark":
        if "dog" in context or "sound" in context:
            selected_sense = synsets[0]
        elif "tree" in context:
            selected_sense = synsets[1]
        else:
            selected_sense = None

    else:
        selected_sense = None

    return selected_sense

In [376]:
best_senses = []


def process_sentences(sentences, word):
    for sentence in sentences:

        tokens = nltk.word_tokenize(sentence)

        best_sense = get_best_sense(word, sentence.lower())
        best_senses.append(best_sense)

        if best_sense:
            print(f"\nSentence: {sentence}")
            print(f"Best sense of '{word}': {best_sense.name()}")
            print(f"Definition: {best_sense.definition()}")
        else:
            print(f"No suitable sense found for '{word}' in the sentence.")


process_sentences(sentences[0:7], "plant")
process_sentences(sentences[7:14], "bank")
process_sentences(sentences[14:21], "bark")


All possible senses for 'plant':
1. plant.n.01: buildings for carrying on industrial labor
2. plant.n.02: (botany) a living organism lacking the power of locomotion
3. plant.n.03: an actor situated in the audience whose acting is rehearsed but seems spontaneous to the audience
4. plant.n.04: something planted secretly for discovery by another
5. plant.v.01: put or set (seeds, seedlings, or plants) into the ground
6. implant.v.01: fix or set securely or deeply
7. establish.v.02: set up or lay the groundwork for
8. plant.v.04: place into a river
9. plant.v.05: place something or someone in a certain position in order to secretly observe or deceive
10. plant.v.06: put firmly in the mind

Sentence: Two plants were producing cold-smoked salmon, the third plant was producing semi-preserved herring and the fourth plant was producing caviar.
Best sense of 'plant': plant.n.01
Definition: buildings for carrying on industrial labor

All possible senses for 'plant':
1. plant.n.01: buildings for c

In [377]:
best_senses[:5]

[Synset('plant.n.01'), Synset('plant.n.01'), Synset('plant.n.02'), None, None]

In [378]:
best_senses_names = [sense.name() if sense is not None else 0 for sense in best_senses]

for name in best_senses_names:
    print(name)

plant.n.01
plant.n.01
plant.n.02
0
0
plant.n.01
plant.n.01
0
0
bank.n.01
0
depository_financial_institution.n.01
0
0
bark.n.02
0
bark.n.02
bark.n.02
bark.n.01
0
bark.n.01


In [379]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_pred=best_senses_names, y_true=labels)
precision = precision_score(
    y_pred=best_senses_names, y_true=labels, average="macro", zero_division=1
)
recall = recall_score(
    y_pred=best_senses_names, y_true=labels, average="macro", zero_division=1
)
f1 = f1_score(y_pred=best_senses_names, y_true=labels, average="macro", zero_division=1)

print(f"Accuracy: {accuracy:.4f}%")
print(f"Precision: {precision:.4f}%")
print(f"Recall: {recall:.4f}%")
print(f"F1 Score: {f1:.4f}%")

Accuracy: 0.2381%
Precision: 0.5278%
Recall: 0.3796%
F1 Score: 0.1944%
