In [1]:
from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize
import string

In [2]:
def pre_processing(sentence):
    return remove_stopwords(remove_punctuation(tokenize(sentence)))

In [3]:
def remove_stopwords(words_list):
    stopwords_list = get_stopwords()
    new_words_list = []
    for word in words_list:
        word_lower = word.lower()
        if word_lower not in stopwords_list:
            new_words_list.append(word_lower)
    return new_words_list

In [4]:
def remove_punctuation(words_list):
    new_words_list = []
    for word in words_list:
        temp = word
        if not temp.strip(string.punctuation) == "":
            new_word = word.lower()
            new_word = new_word.replace("'", "")
            new_words_list.append(new_word)
    return new_words_list

In [5]:
def tokenize(sentence):
    return word_tokenize(sentence)


In [6]:
def remove_stopwords(words_list):
    
    stopwords = open("stop_words_FULL.txt", "r")
    stopwords_list = []
    for word in stopwords:
        stopwords_list.append(word.replace('\n', ''))
    stopwords.close()
    
    return [value.lower() for value in words_list if value.lower() not in stopwords_list]

In [7]:
def all_hypernym_paths(word):
    
    def_lens = []
    
    for syn in wn.synsets(word):

        single_path = []

        hyp_path = syn.hypernym_paths()

        for i in range (0, len(hyp_path[0])):
            single_path.append(len((hyp_path[0][i].definition()).split()))

        def_lens.append(single_path)
        
    return def_lens

In [8]:
def calculate_distance_root(synset):
    return (min([len(path) for path in synset.hypernym_paths()]))

def distance_root(word):
    
    output = dict()
    
    for syn in wn.synsets(word):
        
        actual_syn_dis = calculate_distance_root(syn)
        output[syn] = {word :actual_syn_dis} 
                
        syn_definition_processed = pre_processing(syn.definition())
        for def_word in syn_definition_processed:
            for def_syn in wn.synsets(def_word):
                output[syn].update({def_word : calculate_distance_root(def_syn)})
                        
    return output



In [16]:
from nltk.translate.bleu_score import sentence_bleu

from bleu import multi_list_bleu

from rouge import Rouge

rouge = Rouge()

def definition_overlap(word):
    
    for syn in wn.synsets(word):
        
        bleu_count = 0
        f_count = 0
        
        actual_def_processed = syn.definition()
        
        print ("\n--------------------\nDefinition of", syn,  "=", actual_def_processed)
        print ()
        
        hyper_list = syn.hypernyms()
        
        for hy in hyper_list:
            hy_def = hy.definition()
            
            bleu_count += sentence_bleu([actual_def_processed], hy_def, weights=(1, 0, 0, 0))
            #print("BLEU score: ", sentence_bleu([actual_def_processed], hy_def, weights=(1, 0, 0, 0)))
            
            rouge_scores = rouge.get_scores(' '.join(hy_def), ' '.join(actual_def_processed))
            #print("Rogue scores: ", rouge_scores)
            f_count += rouge_scores[0]['rouge-1']['f']

        if (len(hyper_list) != 0):
            print ("Bleu score for hypernyms (1-gram):", bleu_count / len(hyper_list))
            print ("Rogue f1 for hypernyms (1-gram):", f_count / len(hyper_list))
        else:
            print("No hypernyms")

               
        print ()
        
        bleu_count = 0
        f_count = 0

        hypo_list = syn.hyponyms()
               
        for hy in hypo_list:
            hy_def = hy.definition()
            
            bleu_count += sentence_bleu([actual_def_processed], hy_def, weights=(1, 0, 0, 0))
            #print("BLEU score: ", sentence_bleu([actual_def_processed], hy_def, weights=(1, 0, 0, 0)))
            
            rouge_scores = rouge.get_scores(' '.join(hy_def), ' '.join(actual_def_processed))
            #print("Rogue scores: ", rouge_scores)
            f_count += rouge_scores[0]['rouge-1']['f']

        if (len(hypo_list) != 0):
            print ("Bleu score for hyponyms (1-gram):", bleu_count / len(hypo_list))
            print ("Rogue f1 for hyponyms (1-gram):", f_count / len(hypo_list))
        else:
            print("No hyponyms")



        

In [17]:
definition_overlap("courage")


--------------------
Definition of Synset('courage.n.01') = a quality of spirit that enables you to face danger or pain without showing fear

Bleu score for hypernyms (1-gram): 0.6990638273805326
Rogue f1 for hypernyms (1-gram): 0.7164179054488752

Bleu score for hyponyms (1-gram): 0.32899310492431155
Rogue f1 for hyponyms (1-gram): 0.5992050036619138


In [11]:
distance_root("paper")

{Synset('paper.n.01'): {'paper': 6,
  'material': 1,
  'cellulose': 9,
  'pulp': 6,
  'derived': 1,
  'wood': 11,
  'rags': 5,
  'grasses': 5},
 Synset('composition.n.08'): {'paper': 7,
  'essay': 3,
  'written': 1,
  'assignment': 9},
 Synset('newspaper.n.01'): {'paper': 10,
  'daily': 1,
  'weekly': 1,
  'publication': 9,
  'folded': 2,
  'sheets': 2,
  'news': 7,
  'articles': 5,
  'advertisements': 6},
 Synset('paper.n.04'): {'paper': 8,
  'medium': 1,
  'written': 1,
  'communication': 5},
 Synset('paper.n.05'): {'paper': 9,
  'scholarly': 1,
  'article': 5,
  'describing': 1,
  'observations': 8,
  'stating': 3,
  'hypotheses': 6},
 Synset('newspaper.n.02'): {'paper': 10,
  'business': 8,
  'firm': 1,
  'publishes': 3,
  'newspapers': 7},
 Synset('newspaper.n.03'): {'paper': 8,
  'physical': 1,
  'object': 2,
  'product': 6,
  'newspaper': 7,
  'publisher': 9},
 Synset('paper.v.01'): {'paper': 2, 'cover': 4},
 Synset('wallpaper.v.01'): {'paper': 2, 'cover': 4, 'wallpaper': 2}}