In [1]:
import json
import csv
import re
# data is taken from 'https://github.com/nltk/nltk_data/blob/gh-pages/packages/corpora/wordnet31.zip'
# the package 'wordnet31' was renamed to 'wordnet' due to parsing errors
# the script to get definition and examples was edited (see 'nltk/nltk/corpus/reader/wordnet.py') to retrieve examples
from nltk.corpus import wordnet as wn

In [None]:
with open('query_terms_cont_en.json','r') as jf:
    query_terms_cont_en = json.load(jf)

In [None]:
# there are 75 English query terms
len(query_terms_cont_en)

In [None]:
wn_results_en = {}

for lemma, forms in query_terms_cont_en.items():
    
    list_of_query_terms = [] # lemmas and forms of query terms
    list_of_query_terms.append(lemma)
    list_of_query_terms.extend(forms)
    
    for query_term in list_of_query_terms:
        
        results = []

        # searching in lemmata
        # getting synset_id, lemmata (synonyms), definition, examples

        for synset in wn.synsets(query_term):
            for le in synset.lemmas():
                # exact match between query term and lemma name
                if query_term == le.name().lower(): # lemmas can be capitalized
                    result_dict = {}
                    result_dict['query_term'] = query_term
                    result_dict['synset_id'] = synset.name()
                    result_dict['lemmata'] = [l.name() for l in synset.lemmas()]
                    result_dict['definition'] = synset.definition()
                    result_dict['examples'] = synset.examples()
                    result_dict['found_in'] = 'lemmata'
                    results.append(result_dict)

        # searching in all definitions
    
        for synset in list(wn.all_synsets()):
            if len(re.findall(f'\\b{query_term}\\b',synset.definition(),re.IGNORECASE)) > 0:
                result_dict = {}
                result_dict['query_term'] = query_term
                result_dict['synset_id'] = synset.name()
                result_dict['lemmata'] = [l.name() for l in synset.lemmas()]
                result_dict['definition'] = synset.definition()
                result_dict['examples'] = synset.examples()
                result_dict['found_in'] = 'definition'
                results.append(result_dict)

            # searching in all examples
            for example in synset.examples():
                if len(re.findall(f'\\b{query_term}\\b',example,re.IGNORECASE)) > 0:
                    result_dict = {}
                    result_dict['query_term'] = query_term
                    result_dict['synset_id'] = synset.name()
                    result_dict['lemmata'] = [l.name() for l in synset.lemmas()]
                    result_dict['definition'] = synset.definition()
                    result_dict['examples'] = synset.examples()
                    result_dict['found_in'] = 'examples'
                    results.append(result_dict)

        wn_results_en[query_term] = results

In [None]:
# saving the query results

with open('princeton_wordnet31_query_results.json', 'w') as jf:
    json.dump(wn_results_en, jf)