In [None]:
import nltk
# WordNet 3.1
from nltk.corpus import wordnet as wn
from collections import Counter
import csv

In [None]:
# Checking the following relations of synsets in PWN as potential explicit markers:
# usage domains
# hypernyms
# hyponyms
# in_topic_domains()
# in_region_domains()
# in_usage_domains()
# topic_domains()
# region_domains()
# also_sees
# similar_tos()

In [None]:
def pwn_explicit_markers(synset:str) -> dict:
    results_dict = {}
    results_dict["usage_domains"] = wn.synset(synset).usage_domains()
    results_dict["in_usage_domains"] = wn.synset(synset).in_usage_domains()
    results_dict["topic_domains"] = wn.synset(synset).topic_domains()
    results_dict["in_topic_domains"] = wn.synset(synset).in_topic_domains()
    results_dict["region_domains"] = wn.synset(synset).region_domains()
    results_dict["in_region_domains"] = wn.synset(synset).in_region_domains()
    results_dict["also_sees"] = wn.synset(synset).also_sees()
    results_dict["similar_tos"] = wn.synset(synset).similar_tos()
    results_dict["hypernyms"] = wn.synset(synset).hypernyms()
    results_dict["hyponyms"] = wn.synset(synset).hyponyms()
    
    return results_dict

In [None]:
# getting all (potentially) explicit markers of a synset
pwn_explicit_markers("white.s.05")

In [None]:
### Collect all usage domains
all_usage_domains = []
for synset in list(wn.all_synsets()):
    all_usage_domains.extend(synset.usage_domains())

In [None]:
### Collect all topic domains
all_topic_domains = []
for synset in list(wn.all_synsets()):
    all_topic_domains.extend(synset.topic_domains())

In [None]:
Counter(all_topic_domains)

In [None]:
pwn_usage_domains = {}
for synset_key, num in dict(Counter(all_usage_domains)).items():
    pwn_usage_domains[synset_key.name()] = num

In [None]:
pwn_usage_domains

In [None]:
# Collect all synsets with selected usage domains

usage_domains = ['disparagement.n.01','ethnic_slur.n.01','archaism.n.0',
                'colloquialism.n.01','slang.n.02','obscenity.n.02']

with open('explicit/pwn_usage_domain_synsets_all.csv','w') as csv_file:
    writer = csv.writer(csv_file)
    header = ["synset_id","usage_domains"]
    writer.writerow(header)
    

    for synset in list(wn.all_synsets()):
        # check overlap between two lists
        synset_usage_domains = [s.name() for s in synset.usage_domains()]
        if len(synset_usage_domains) > 0:
            intersection = [s for s in usage_domains if s in synset_usage_domains]
            # if overlap > 0, write the synset
            if len(intersection) > 0:
                data = [synset.name(),[s.name() for s in synset.usage_domains()]]
                writer.writerow(data)