In [1]:
from OpenDutchWordnet import Wn_grid_parser
import json
import csv
import re

In [2]:
instance = Wn_grid_parser(Wn_grid_parser.odwn)

In [3]:
# loading NL query terms

with open('/Users/anesterov/reps/words-matter/labels_in_LOD/query_terms_cont_nl.json','r') as jf:
    query_terms_cont_nl = json.load(jf)

In [4]:
# there are 82 lemmas of the Dutch query terms 
len(query_terms_cont_nl)

82

In [5]:
# loading synset definitions from a json file

with open('/Users/anesterov/reps/words-matter/labels_in_LOD/odwn/odnw_synset_glosses.json', 'r') as jf:
    all_synset_definitions = json.load(jf)

In [6]:
def search_results_generator(query_term:str,le:'le.Le',all_synset_definitions:dict,found_in:str,example="") -> dict:
    
    """
    Returns a dict of search results
    This function does not search the query terms,
    But only shapes the search results in a dict
    """
    
    result_dict = {}
    result_dict['query_term'] = query_term
    result_dict['le_id'] = le.get_id()
    result_dict['le_written_form'] = le.get_lemma()
    result_dict['sense_id'] = le.get_sense_id()
    result_dict['sense_definition'] = le.get_definition()
    result_dict['sense_examples'] = le.get_sense_example()

    if le.get_synset_id() != None:
        synset_id = le.get_synset_id()
        result_dict['synset_id'] = synset_id
        result_dict['synonyms'] = [les.get_lemma() for les in instance.les_all_les_of_one_synset(synset_id)]
        if synset_id in all_synset_definitions.keys():
            result_dict['synset_definitions'] = all_synset_definitions[synset_id]
        else:
            result_dict['synset_definitions'] = []
    else:
        result_dict['synset_id'] = ""
        result_dict['synonyms'] = []
        result_dict['synset_definitions'] = []
        
    result_dict['found_in'] = found_in
    
    if found_in == 'sense_examples':
        result_dict['found_in_example'] = example
    
    return result_dict

In [7]:
### replace 'lemma' with a query terms and move the results list;
### indentation of the resulting dict
### generate a new results file!

In [8]:
odwn_results_nl = {}

for lemma, forms in query_terms_cont_nl.items():

    list_of_query_terms = []
    list_of_query_terms.append(lemma)
    list_of_query_terms.extend(forms)

    for query_term in list_of_query_terms:
        
        results = []

        # searching in lemmas
        for le in instance.lemma_get_generator(query_term,ignore_case=True):
            results.append(search_results_generator(query_term,le,all_synset_definitions,"le"))

        # Iterating over all Lexical Entries
        
        # searching in sense definitions
        for le in instance.les_get_generator():
            if len(re.findall(f'\\b{query_term}\\b',le.get_definition(),re.IGNORECASE)) > 0:
                results.append(search_results_generator(query_term,le,all_synset_definitions,"sense_definition"))

            # searching in sense examples
            for example in le.get_sense_example():
                if len(re.findall(f'\\b{query_term}\\b',example,re.IGNORECASE)) > 0:
                    results.append(search_results_generator(query_term,le,all_synset_definitions,"sense_examples",example))

        # searching in synset definitions
        for synset_id, definitions in all_synset_definitions.items():
            for d in definitions:
                 if len(re.findall(f'\\b{query_term}\\b',d,re.IGNORECASE)) > 0:
                        # results for synsets are different, so we don't use the function
                        result_dict = {}
                        result_dict['query_term'] = query_term
                        result_dict['synset_id'] = synset_id
                        result_dict['synonyms'] = [les.get_lemma() for les in instance.les_all_les_of_one_synset(synset_id)]
                        result_dict['synset_definitions'] = all_synset_definitions[synset_id]
                        result_dict['found_in'] = "synset_definitions"
                        result_dict['found_in_synset_definition'] = d
                        results.append(result_dict)

        odwn_results_nl[query_term] = results

In [9]:
# new file
# saving the query results in a json file

with open('odwn_query_results_new.json', 'w') as jf:
    json.dump(odwn_results_nl, jf)

In [4]:
# reading results
with open('odwn_query_results.json','r') as jf:
    odwn_results_nl = json.load(jf)

#### Count by query term

In [6]:
with open('ODWN/odwn_count_by_query_term.csv','w') as csv_file:
    writer = csv.writer(csv_file)
    header = ['lemma','query_term','le','sense_definition','sense_examples','synset_definition','total_per_query_term']
    writer.writerow(header)

    for key, forms in query_terms_cont_nl.items():

        list_of_query_terms = [] # lemmas and forms of query terms
        list_of_query_terms.append(key)
        list_of_query_terms.extend(forms)

        for query_term in list_of_query_terms:

            le_count = 0
            sense_definition_count = 0
            sense_examples_count = 0
            synset_definition_count = 0

            for lemma, results in odwn_results_nl.items():
                for result in results:
                    if result['query_term'] == query_term:
                        if result['found_in'] == 'le':
                            le_count += 1
                        if result['found_in'] == 'sense_definition':
                            sense_definition_count += 1
                        if result['found_in'] == 'sense_examples':
                            sense_examples_count += 1
                        if result['found_in'] == 'synset_definitions':
                            synset_definition_count += 1
            total_count = le_count + sense_definition_count + sense_examples_count + synset_definition_count

            writer.writerow([key,query_term,le_count,sense_definition_count,sense_examples_count,synset_definition_count,total_count])

#### Count by lemma

In [7]:
with open('ODWN/odwn_count_by_lemma.csv','w') as csv_file:
    writer = csv.writer(csv_file)
    header = ['lemma','le','sense_definition','sense_examples','synset_definition','total_per_lemma']
    writer.writerow(header)

    for lemma, results in odwn_results_nl.items():
        
        le_count = 0
        sense_definition_count = 0
        sense_examples_count = 0
        synset_definition_count = 0
        total_count = 0
        
        for result in results:
            if result['found_in'] == 'le':
                le_count += 1
            if result['found_in'] == 'sense_definition':
                sense_definition_count += 1
            if result['found_in'] == 'sense_examples':
                sense_examples_count += 1
            if result['found_in'] == 'synset_definitions':
                synset_definition_count += 1
                
        total_count = len(results)
        
        writer.writerow([lemma,le_count,sense_definition_count,sense_examples_count,synset_definition_count,total_count])