In [None]:
import json
import csv
import re
# the directory 'wordnet' actually contains wordnet31
# the script to get definition and examples was edited (see 'wordnet.py')
from nltk.corpus import wordnet as wn

In [None]:
with open('query_terms_cont_en.json','r') as jf:
    query_terms_cont_en = json.load(jf)

In [None]:
# there are 75 English query terms
len(query_terms_cont_en)

In [None]:
wn_results_en = {}

for lemma, forms in query_terms_cont_en.items():
    
    results = []
    
    list_of_query_terms = [] # searching for lemmas and forms of query terms
    list_of_query_terms.append(lemma)
    list_of_query_terms.extend(forms)
    
    for query_term in list_of_query_terms:

        # searching in lemmata
        # getting synset_id, lemmata (synonyms), definition, examples

        for synset in wn.synsets(query_term):
            for le in synset.lemmas():
                # exact match between query term and lemma name
                if query_term == le.name().lower(): # lemmas can be capitalized
                    result_dict = {}
                    result_dict['query_term'] = query_term
                    result_dict['synset_id'] = synset.name()
                    result_dict['lemmata'] = [l.name() for l in synset.lemmas()]
                    result_dict['definition'] = synset.definition()
                    result_dict['examples'] = synset.examples()
                    result_dict['found_in'] = 'lemmata'
                    results.append(result_dict)

    # searching in all definitions
    
        for synset in list(wn.all_synsets()):
            if len(re.findall(f'\\b{query_term}\\b',synset.definition(),re.IGNORECASE)) > 0:
                result_dict = {}
                result_dict['query_term'] = query_term
                result_dict['synset_id'] = synset.name()
                result_dict['lemmata'] = [l.name() for l in synset.lemmas()]
                result_dict['definition'] = synset.definition()
                result_dict['examples'] = synset.examples()
                result_dict['found_in'] = 'definition'
                results.append(result_dict)

            # searching in all examples
            for example in synset.examples():
                if len(re.findall(f'\\b{query_term}\\b',example,re.IGNORECASE)) > 0:
                    result_dict = {}
                    result_dict['query_term'] = query_term
                    result_dict['synset_id'] = synset.name()
                    result_dict['lemmata'] = [l.name() for l in synset.lemmas()]
                    result_dict['definition'] = synset.definition()
                    result_dict['examples'] = synset.examples()
                    result_dict['found_in'] = 'examples'
                    results.append(result_dict)

    wn_results_en[lemma] = results

In [None]:
# saving the query results

with open('Princeton_WordNet/princeton_wordnet31_query_results.json', 'w') as jf:
    json.dump(wn_results_en, jf)

#### Count by query term

In [None]:
with open('Princeton_WordNet/princeton_wordnet31_count_by_query_term.csv','w') as csv_file:
    writer = csv.writer(csv_file)
    header = ['lemma','query_term','synsets','definitions','examples','total_per_query_term']
    writer.writerow(header)

    for key, forms in query_terms_cont_en.items():

        list_of_query_terms = [] # lemmas and forms of query terms
        list_of_query_terms.append(key)
        list_of_query_terms.extend(forms)

        for query_term in list_of_query_terms:

            lemmata_count = 0
            definition_count = 0
            examples_count = 0

            for lemma, results in wn_results_en.items():
                for result in results:
                    if result['query_term'] == query_term:
                        if result['found_in'] == 'lemmata':
                            lemmata_count += 1
                        if result['found_in'] == 'definition':
                            definition_count += 1
                        if result['found_in'] == 'examples':
                            examples_count += 1
            total_count = lemmata_count + definition_count + examples_count

            writer.writerow([key,query_term,lemmata_count,definition_count,examples_count,total_count])

#### Count by lemma

In [None]:
with open('Princeton_WordNet/princeton_wordnet31_count_by_lemma.csv','w') as csv_file:
    writer = csv.writer(csv_file)
    header = ['lemma','synsets','definitions','examples','total_per_lemma']
    writer.writerow(header)

    for lemma, results in wn_results_en.items():
        lemmata_count = 0
        definition_count = 0
        examples_count = 0
        total_count = 0
        for result in results:
            if result['found_in'] == 'lemmata':
                lemmata_count += 1
            if result['found_in'] == 'definition':
                definition_count += 1
            if result['found_in'] == 'examples':
                examples_count += 1
        total_count = len(results)
        
        writer.writerow([lemma,lemmata_count,definition_count,examples_count,total_count])