In [None]:
import json
import csv
import pandas as pd

In [None]:
# importing query results
with open("pwn31_query_results.json","r") as jf:
    pwn31_query_results = json.load(jf)

In [None]:
# importing query terms
with open("LODlit/query_terms.json","r") as jf:
    query_terms = json.load(jf)

### 1. N synsets by query term

In [None]:
with open("pwn31_synsets_by_query_term.csv","w") as csv_file:
    writer = csv.writer(csv_file)
    header = ["lemma","query_term","n_synsets"]
    writer.writerow(header)
    
    for term, results in pwn31_query_results.items():
        # taking only English terms
        for l, wordforms in query_terms["en"].items():
                if term in wordforms:
                    lemma = l
        n_synsets = len(set([hit["synset_id"] for hit in results]))            
        row = [lemma,term,n_synsets]
        writer.writerow(row)

### 2. N synsets by lemma

In [None]:
df = pd.read_csv("pwn31_synsets_by_query_term.csv")

In [None]:
with open("pwn31_synsets_by_lemma.csv","w") as csv_file:
    writer = csv.writer(csv_file)
    header = ["lemma","n_synsets"]
    writer.writerow(header)

    for group in df.groupby("lemma"):
        row = [group[0],sum(group[1]["n_synsets"])]
        writer.writerow(row)

### 3. N hits (occurences) by query term

In [None]:
with open("pwn31_hits_by_query_term.csv","w") as csv_file:
    writer = csv.writer(csv_file)
    header = ["lemma","query_term","synset_lemmas","definitions","examples","total_per_query_term"]
    writer.writerow(header)
    
    for term, results in pwn31_query_results.items():
        lemmata_count = 0
        definition_count = 0
        examples_count = 0
        
        # getting a lemma for the query term
        for l, wordforms in query_terms["en"].items():
            if term in wordforms:
                lemma = l
                
        # checking where the query term is found
        for hit in results:
            if hit["found_in"] == "lemmata":
                lemmata_count += 1
            if hit["found_in"] == "definition":
                definition_count += 1
            if hit["found_in"] == "examples":
                examples_count += 1
        total_count = lemmata_count + definition_count + examples_count
            
        row = [lemma, term, lemmata_count, definition_count, examples_count, total_count]
        writer.writerow(row)

### 4. N hits (occurences) by lemma

In [None]:
df = pd.read_csv("pwn31_hits_by_query_term.csv")

In [None]:
with open("pwn31_hits_by_lemma.csv","w") as csv_file:
    writer = csv.writer(csv_file)
    header = ["lemma","synset_lemmas","definitions","examples","total_lemma"]
    writer.writerow(header)

    for group in df.groupby("lemma"):
        row = [group[0],sum(group[1]["synset_lemmas"]),sum(group[1]["definitions"]),\
              sum(group[1]["examples"]),sum(group[1]["total_per_query_term"])]
        writer.writerow(row)

## 5. All PWN 3.1
This numbers are used for Table 1

In [None]:
from nltk.corpus import wordnet as wn

In [None]:
wn.get_version()

In [None]:
count_lemma_name = 0
for synset in list(wn.all_synsets()):
    for le in synset.lemmas():
        if le.name() != None:
            count_lemma_name = count_lemma_name + 1
print(count_lemma_name)

In [None]:
count_synset_definition = 0
for synset in list(wn.all_synsets()):
    if synset.definition() != None:
        count_synset_definition = count_synset_definition + 1
print(count_synset_definition)

In [None]:
count_synset_examples = 0
for synset in list(wn.all_synsets()):
    if synset.examples() != []:
        count_synset_examples = count_synset_examples + 1
print(count_synset_examples)