In [None]:
from OpenDutchWordnet import Wn_grid_parser
import json
import csv
import re

In [None]:
instance = Wn_grid_parser(Wn_grid_parser.odwn)

In [None]:
# loading NL query terms

with open('query_terms_cont_nl.json','r') as jf:
    query_terms_cont_nl = json.load(jf)

In [None]:
# there are 82 lemmas of the Dutch query terms 
len(query_terms_cont_nl)

In [None]:
# loading synset definitions from a json file

with open('ODWN/odwn_synset_glosses.json', 'r') as jf:
    all_synset_definitions = json.load(jf)

In [None]:
def search_results_generator(query_term:str,le:'le.Le',all_synset_definitions:dict,found_in:str,example="") -> dict:
    
    """
    Returns a dict of search results
    This function does not search the query terms,
    But only shapes the search results in a dict
    """
    
    result_dict = {}
    result_dict['query_term'] = query_term
    result_dict['le_id'] = le.get_id()
    result_dict['le_written_form'] = le.get_lemma()
    result_dict['sense_id'] = le.get_sense_id()
    result_dict['sense_definition'] = le.get_definition()
    result_dict['sense_examples'] = le.get_sense_example()

    if le.get_synset_id() != None:
        synset_id = le.get_synset_id()
        result_dict['synset_id'] = synset_id
        result_dict['synonyms'] = [les.get_lemma() for les in instance.les_all_les_of_one_synset(synset_id)]
        if synset_id in all_synset_definitions.keys():
            result_dict['synset_definitions'] = all_synset_definitions[synset_id]
        else:
            result_dict['synset_definitions'] = []
    else:
        result_dict['synset_id'] = ""
        result_dict['synonyms'] = []
        result_dict['synset_definitions'] = []
        
    result_dict['found_in'] = found_in
    
    if found_in == 'sense_examples':
        result_dict['found_in_example'] = example
    
    return result_dict

In [None]:
odwn_results_nl = {}

for lemma, forms in query_terms_cont_nl.items():

    list_of_query_terms = []
    list_of_query_terms.append(lemma)
    list_of_query_terms.extend(forms)

    for query_term in list_of_query_terms:
        
        results = []

        # searching in lemmas
        for le in instance.lemma_get_generator(query_term,ignore_case=True):
            results.append(search_results_generator(query_term,le,all_synset_definitions,"le"))

        # Iterating over all Lexical Entries
        
        # searching in sense definitions
        for le in instance.les_get_generator():
            if len(re.findall(f'\\b{query_term}\\b',le.get_definition(),re.IGNORECASE)) > 0:
                results.append(search_results_generator(query_term,le,all_synset_definitions,"sense_definition"))

            # searching in sense examples
            for example in le.get_sense_example():
                if len(re.findall(f'\\b{query_term}\\b',example,re.IGNORECASE)) > 0:
                    results.append(search_results_generator(query_term,le,all_synset_definitions,"sense_examples",example))

        # searching in synset definitions
        for synset_id, definitions in all_synset_definitions.items():
            for d in definitions:
                 if len(re.findall(f'\\b{query_term}\\b',d,re.IGNORECASE)) > 0:
                        # results for synsets are different, so we don't use the function
                        result_dict = {}
                        result_dict['query_term'] = query_term
                        result_dict['synset_id'] = synset_id
                        result_dict['synonyms'] = [les.get_lemma() for les in instance.les_all_les_of_one_synset(synset_id)]
                        result_dict['synset_definitions'] = all_synset_definitions[synset_id]
                        result_dict['found_in'] = "synset_definitions"
                        result_dict['found_in_synset_definition'] = d
                        results.append(result_dict)

        odwn_results_nl[query_term] = results

In [None]:
# saving the query results in a json file

with open('odwn_query_results.json', 'w') as jf:
    json.dump(odwn_results_nl, jf)