In [1]:
import nltk
from nltk.corpus import framenet as fn
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet as wn
from nltk.corpus import stopwords
import re
from nltk.stem import WordNetLemmatizer
from pprint import pprint

Id estratti dai cognomi per i seguenti frame: 
- 'Chemical_potency', 'Fullness', 'Causation', 'Disgraceful_situation', 'Obviousness'
- 'Infrastructure', 'Product_line', 'Gusto', 'Military', 'Terrorism'

In [2]:
ids = [2724, 244, 5, 1612, 1360, 1481, 2524, 2569, 1514, 1750]

#### Metodi utili per fare pre-processing

In [3]:
def lemmatize_words(text):
    result = []
    lemmatizer = WordNetLemmatizer()
    for word in text:
        result.append(lemmatizer.lemmatize(word))
    return result

def remove_punctuation(s):
    return re.sub(r'[^\w\s]', '', s)

def remove_stop_words(row):
    stop_words = set(stopwords.words('english'))
    filtered_sentence = [w for w in row if not w.lower() in stop_words]
    return filtered_sentence

#### Estrapolazione del contesto dal synset
Obiettivo è creare una lista che contenga i termini rilevanti per creare un contesto del synset. I termini rilevanti sono all'interno di:
- definizione del synset;
- esempi del synset;
- lemmi;
- definizioni di iponimi e iperonimi.
- esempi di iponimi e iperonimi

In [4]:
def get_synset_ctx(synset):
    defs = remove_punctuation(synset.definition())
    exs = remove_punctuation(' '.join(synset.examples()))
    defs = remove_stop_words(lemmatize_words(defs.split()))
    exs = remove_stop_words(lemmatize_words(exs.split()))

    hypo = synset.hyponyms()
    hyper = synset.hypernyms()
    lemmas = synset.lemmas()
    
    lems = []
    for l in lemmas:
        lems = lems + l.name().split("_")

    for h in hypo:
        defs = defs + remove_stop_words(lemmatize_words(remove_punctuation(h.definition()).split()))
        exs = exs + remove_stop_words(lemmatize_words(remove_punctuation(' '.join(h.examples())).split()))
    
    for h in hyper:
        defs = defs + remove_stop_words(lemmatize_words(remove_punctuation(h.definition()).split()))
        exs = exs + remove_stop_words(lemmatize_words(remove_punctuation(' '.join(h.examples())).split()))

    return defs + exs + lems

#### Estrapolazione del contesto dal frame
Obiettivo è creare una lista che contenga i termini rilevanti per creare un contesto del frame. I termini rilevanti sono presi sia dalla definizione del frame sia dalla definizione dei FEs

In [5]:
def get_frame_ctx(id):
    ctx = []
    frame = fn.frame_by_id(id)
    ctx = ctx + frame.name.split(' ')
    defs = remove_punctuation(frame.definition)
    defs = remove_stop_words(defs.split(" "))
    ctx = ctx + lemmatize_words(defs)
    
    for fe in frame.FE:
        fe_defs = remove_punctuation(frame.FE[fe].definition)
        fe_defs = remove_stop_words(fe_defs.split(" "))
        ctx = ctx + lemmatize_words(fe_defs)
    return ctx

#### Approccio Bag of Words

In [6]:
def bag_of_words(frame_context, synset_context):
    return len([word for word in frame_context if word in synset_context]) + 1

#### Metodo per valutazione sistema

In [7]:
def get_evaluation(result_dict, annotation_dict):
    count_eval = 0
    len_total = 0
    for k in result_dict.keys():
        len_total += len(result_dict[k].keys())
        for el in result_dict[k].keys():
            if result_dict[k][el] == annotation_dict[k][el]:
                count_eval += 1

    return count_eval / len_total

#### Esecuzione

In [8]:
annotation_dict = {
    'Chemical_potency': {
        'Chemical_potency':'chemical.n.01',
        'Chemical_entity':'chemical.n.01',
        'Degree':'degree.n.02',
        'Time':'time.n.03',
        'Circumstances':'circumstance.n.01',
        'Place':'place.n.02',
        'strong.a':'strong.a.01',
        'potent.a':'potent.s.02',
        'stiff.a':'potent.a.03'
    },
    'Fullness': {
        'Fullness': 'fullness.n.03',
        'Container' : 'container.n.01',
        'Contents': 'content.n.01',
        'Degree': 'degree.n.01',
        'Time': 'time.n.03',
        'Frequency': 'frequency.n.01',
        'Duration': 'duration.n.01',
        'full.a': 'full.a.01',
        'empty.a': 'empty.a.01',
        'emptiness.n': 'emptiness.n.01',
        'fullness.n': 'fullness.n.03'
    },
    'Causation': {
        'Causation': 'causing.n.01',
        'Cause': 'cause.n.01',
        'Affected': 'affected.a.01',
        'Effect': 'consequence.n.01',
        'Place': 'place.n.02',
        'Time': 'time.n.03',
        'Actor': 'actor.n.02',
        'Circumstances': 'context.n.02',
        'Manner': 'manner.n.01',
        'Explanation': 'explanation.n.01',
        'Means': 'means.n.01',
        'Frequency': 'frequency.n.01',
        'Concessive': 'concessive.a.01',
        'cause.v':'cause.v.01',
        'cause.n':'cause.n.01',
        'make.v':'make.v.03',
        'lead (to).v':'lead.v.03',
        'reason.n':'cause.n.02',
        'send.v':'send.v.01',
        'bring about.v':'bring.v.03',
        'precipitate.v':'precipitate.v.01',
        'causative.a':'causative.a.1',
        'render.v':'render.v.01',
        'bring.v':'bring.v.02',
        'bring on.v':'bring.v.02',
        'induce.v':'induce.v.01',
        'wreak.v':'bring.v.03',
        'put.v':'put.v.02',
        'since.c': None,
        'because.c': None,
        'because of.prep': None,
        'raise.v':'raise.v.03',
        'result (in).v':'result.v.01'
    },
    'Disgraceful_situation': {
        'Disgraceful_situation': 'disgraceful.s.01',
        'State_of_affairs': 'state.n.02',
        'Protagonist': 'protagonist.n.02',
        'Degree': 'degree.n.01',
        'Explanation': 'explanation.n.01',
        'Judge': None,
        'disgraceful.a': 'disgraceful.s.01',
        'shameful.a': 'disgraceful.s.01'
    },
    'Obviousness': {
        'Obviousness': 'obviousness.n.01',
        'Phenomenon': 'phenomenon.n.01',
        'Attribute': 'property.n.04',
        'Degree': 'degree.n.01',
        'Time': 'time.n.03',
        'Circumstances': 'circumstance.n.01',
        'Perceiver': 'perceiver.n.01',
        'Evidence': 'evidence.n.02',
        'Group': 'group.n.01',
        'Location_of_protagonist': 'location.n.01',
        'Particular_iteration': 'particular.s.06',
        'Direction': 'direction.n.03',
        'obvious.a': 'obvious.a.01',
        'evident.a': 'apparent.s.01',
        'manifest.a': 'apparent.s.01',
        'visible.a': 'visible.a.01',
        'audible.a': 'audible.a.01',
        'unclear.a': 'unclear.a.02',
        'clear.a': 'clear.a.01',
        'clearly.adv': 'clearly.r.01',
        'obviously.adv': 'obviously.r.01',
        'clarity.n': 'clarity.n.01',
        'show.v': 'show.v.04',
        'show up.v': 'show.v.04',
        'stand out.v': None,
        'noticeable.a': 'noticeable.a.01'
    },
    'Infrastructure': {
        'Infrastructure':'infrastructure.n.02',
        'Activity':'activity.n.01',
        'Place':'topographic_point.n.01',
        'Possessor':'owner.n.02',
        'Resource':'resource.n.02',
        'User':'user.n.01',
        'Descriptor':'descriptor.n.02',
        'Infrastructure':'infrastructure.n.01',
        'infrastructure.n':'infrastructure.n.01',
        'base.n':'basis.n.02'
    },
    'Product_line': {
        'Product_line': 'merchandise.n.01',
        'Brand' : 'trade_name.n.01',
        'Collection': 'collection.n.01',
        'Products': 'merchandise.n.01',
        'Descriptor': 'descriptor.n.02',
        'Collection_name': 'collection.n.01',
        'Designer': 'couturier.n.01',
        'line.n': 'line.n.22',
        'collection.n': 'collection.n.01'
    },
    'Gusto': {
        'Gusto': 'gusto.n.01',
        'Person': 'person.n.01',
        'Degree': 'degree.n.01',
        'life.n': 'liveliness.n.02',
        'vim.n': 'energy.n.05',
        'spirit.n': 'spirit.n.03'
    },
    'Military': {
        'Military': 'military.n.01',
        'Force': 'force.n.04',
        'Possessor': 'owner.n.02',
        'Descriptor': None,
        'Members': 'member.n.04',
        'Domain': 'domain.n.02',
        'Goal': 'goal.n.01',
        'Period_of_existence': 'time_period.n.01',
        'military.n': 'military.n.01',
        'force.n': 'force.n.04',
        'navy.n': 'navy.n.01',
        'air force.n': None,
        'army.n': 'army.n.01',
        'naval.a': 'naval.a.01',
        'armed forces.n': None,
        'military.a': 'military.a.01',
        'military forces.n': 'military.n.01',
        'militia.n': 'militia.n.01',
        'national guard.n': None,
        'marines.n': 'marines.n.01',
        'coast guard.n': None
    },
    'Terrorism': {
        'Terrorism': 'terrorism.n.01',
        'Terrorist': 'terrorist.n.01',
        'Act': 'act.n.02',
        'Victim': 'victim.n.01',
        'Organization': 'organization.n.01',
        'Descriptor': 'descriptor.n.02',
        'Manner': 'manner.n.01',
        'Means': 'means.n.01',
        'Time': 'time.n.01',
        'Place': 'topographic_point.n.01',
        'Purpose': 'purpose.n.01',
        'Instrument': 'instrument.n.02',
        'terrorism.n': 'terrorism.n.01',
        'terrorist.n': 'terrorist.n.01',
        'ecoterrorism [environmentalism].n': 'ecoterrorism.n.01',
        'ecoterrorist [environmentalist].n': None,
        'bioterrorism.n': 'bioterrorism.n.01',
        'bioterrorist.n': None,
        'ecoterrorist.n': None,
        'ecoterrorism.n': 'ecoterrorism.n.01',
        'obviously.adv': 'obviously.r.01',
        'terror.n': 'terror.n.04'
    }
}

result_dict = {}
for id in ids:
    frame = fn.frame_by_id(id)
    result_dict[frame.name] = {}
    frame_context = get_frame_ctx(id)

    synsets = wn.synsets(frame.name.split('_')[0])
    res_max = 0
    best_syn = None
    
    for syn in synsets:
        synset_context = get_synset_ctx(syn)
        res = bag_of_words(frame_context, synset_context)
        if res > res_max:
            res_max = res
            best_syn = syn
    if best_syn is not None:
        result_dict[frame.name][frame.name] = best_syn.name()
    else:
        result_dict[frame.name][frame.name] = None
    
    for fe in frame.FE:
        res_max = 0
        best_syn = None
        for syn in wn.synsets(fe.split("_")[0]):
            synset_context = get_synset_ctx(syn)
            res = bag_of_words(frame_context, synset_context)
            if res > res_max:
                res_max = res
                best_syn = syn
        if best_syn is not None:
            result_dict[frame.name][fe] = best_syn.name()
        else:
            result_dict[frame.name][fe] = None
    i = 0
    for lu in frame.lexUnit.keys():
        res_max = 0
        best_syn = None
        if i<20:
            for syn in wn.synsets(lu.split(".")[0].split(" ")[0]):
                synset_context = get_synset_ctx(syn)
                res = bag_of_words(frame_context, synset_context)
                if res > res_max:
                    res_max = res
                    best_syn = syn
            i += 1
            if best_syn is not None:        
                result_dict[frame.name][lu] = best_syn.name()
            else:
                result_dict[frame.name][lu] = None

print(f"La valutazione del sistema è del: {round(get_evaluation(result_dict, annotation_dict)*100, 2)}%")


La valutazione del sistema è del: 49.67%
