## Competency questions

In [1]:
import rdflib
from rdflib import Graph
from rdflib.namespace import Namespace
from rdflib.namespace import SKOS, RDF

In [2]:
culco = Namespace("http://example.nl/ontology#")
skosxl = Namespace("http://www.w3.org/2008/05/skos-xl#")
dcterms = Namespace("http://purl.org/dc/terms/")

In [3]:
wm = Graph()
wm.parse("https://raw.githubusercontent.com/cultural-ai/wordsmatter/main/glossary.ttl", format="turtle")

<Graph identifier=Ne94c41d9873145108ee482037cf9f0ff (<class 'rdflib.graph.Graph'>)>

In [4]:
# N triples
len(wm)

2490

### 1. Which entries have references to other entries?

In [5]:
q_1 = wm.query(
    """SELECT ?CI ?cont_issue (GROUP_CONCAT(?CI_2;SEPARATOR=",") AS ?CI_2_list)
        
        {
            ?CI dcterms:references ?CI_2 .
            ?CI dcterms:title ?cont_issue .
        }
         GROUP BY ?CI
         """)

for row in q_1:
    print(f"'{row.cont_issue}'({row.CI.split('#')[-1]}): {[e.split('#')[-1] for e in row.CI_2_list.split(',')]}")

'Negro'(129): ['135', '98']
'Berber'(93): ['92']
'Bosneger'(102_nl): ['127_nl', '133_nl']
'Descent'(101): ['136', '91']
'Western'(143): ['139']
'Blank'(100_nl): ['147_nl', '149_nl', '109_nl']
'Mongool'(130_nl): ['124_nl', '133_nl']
'Indisch'(118): ['119']
'Bush Negro'(97): ['129', '122']
'Black'(94): ['129']
'Race'(135): ['98', '129']
'Westers'(146_nl): ['103_nl']
'Zwart'(149_nl): ['133_nl']
'Wit'(147_nl): ['100_nl']
'Neger'(133_nl): ['124_nl', '140_nl']
'Aboriginal'(90): ['105', '116']
'Koppensneller'(126_nl): ['137_nl']
'Blank'(95): ['144']
'Ras'(140_nl): ['124_nl', '133_nl']
'Etniciteit'(106_nl): ['140_nl']
'Halfbloed'(110_nl): ['140_nl']
'Mulat'(132_nl): ['110_nl']
'Half-blood'(110): ['135']
'Indisch'(117_nl): ['118_nl']
'Headhunter'(111): ['132']
'White'(144): ['95']
'Gay'(108_nl): ['113_nl']
'Berber'(99_nl): ['97_nl']
'Aboriginal'(94_nl): ['116_nl', '105_nl']
'Mulatto'(127): ['110']


### 2. How do entries in English and Dutch glossaries correspond to each other?

In [6]:
q_2 = wm.query(
    """SELECT ?CI_en ?CI_en_title ?CI_nl ?CI_nl_title
    WHERE {
    ?CI_nl skos:exactMatch ?CI_en .
    ?CI_en dcterms:title ?CI_en_title .
    ?CI_nl dcterms:title ?CI_nl_title .
    }
       """)

for row in q_2:
    print(f"{row.CI_en} ('{row.CI_en_title}'({row.CI_en_title.language})) = {row.CI_nl} ('{row.CI_nl_title}'({row.CI_nl_title.language}))")

https://w3id.org/culco/wordsmatter#94 ('Black'(en)) = https://w3id.org/culco/wordsmatter#149_nl ('Zwart'(nl))
https://w3id.org/culco/wordsmatter#116 ('Indian'(en)) = https://w3id.org/culco/wordsmatter#116_nl ('Indiaan'(nl))
https://w3id.org/culco/wordsmatter#135 ('Race'(en)) = https://w3id.org/culco/wordsmatter#140_nl ('Ras'(nl))
https://w3id.org/culco/wordsmatter#137 ('Servant'(en)) = https://w3id.org/culco/wordsmatter#98_nl ('Bediende'(nl))
https://w3id.org/culco/wordsmatter#104 ('Dwarf'(en)) = https://w3id.org/culco/wordsmatter#104_nl ('Dwerg'(nl))
https://w3id.org/culco/wordsmatter#90 ('Aboriginal'(en)) = https://w3id.org/culco/wordsmatter#94_nl ('Aboriginal'(nl))
https://w3id.org/culco/wordsmatter#112 ('Hermaphrodite'(en)) = https://w3id.org/culco/wordsmatter#112_nl ('Hermafrodiet'(nl))
https://w3id.org/culco/wordsmatter#126 ('Moor'(en)) = https://w3id.org/culco/wordsmatter#131_nl ('Moor'(nl))
https://w3id.org/culco/wordsmatter#92 ('Barbarian'(en)) = https://w3id.org/culco/wordsma

### 3. Get all suggested labels for every contentious label

In [7]:
q_3 = wm.query(
    """SELECT ?cont_label (GROUP_CONCAT(?sug_label;SEPARATOR=",") AS ?sug_label_list)
    WHERE {
    ?Suggestion culco:suggestedFor / skosxl:literalForm ?cont_label ;
        culco:hasSuggestedLabel / skosxl:literalForm ?sug_label .}
    GROUP BY ?cont_label """)

for row in q_3:
    print(f"'{row.cont_label}' ({row.cont_label.language}): {[s for s in row.sug_label_list.split(',')]}")

'Negro' (en): ['Black']
'Jappenkamp' (nl): ['Japanese camp', 'Japans kamp voor krijgsgevangenen', 'Japanese internment camp', 'Japans kamp voor militaire gevangenen (in Birma en Thailand)', 'Japans concentratiekamp', 'Japans kamp', 'Japanese military prison camp (in Burma and Thailand)', 'Japanese concentration camp', 'Japanese prisoners of war camp', 'Japans interneringskamp']
'Jappenkampen' (nl): ['Japanese camp', 'Japans kamp voor krijgsgevangenen', 'Japanese internment camp', 'Japans kamp voor militaire gevangenen (in Birma en Thailand)', 'Japans concentratiekamp', 'Japans kamp', 'Japanese military prison camp (in Burma and Thailand)', 'Japanese concentration camp', 'Japanese prisoners of war camp', 'Japans interneringskamp']
'travestiet' (nl): ['crossdresser']
'Trans' (nl): ['crossdresser']
'Hottentot' (nl): ['Khoisan']
'Slaaf' (nl): ['Tot slaaf gemaakt', 'mensen in slavernij']
'Disabled' (en): ['Differently abled', 'People with disabilities', 'Disabled people']
'Slave' (en): ['En

### 4. Get all contentious labels without suggestions

In [8]:
q_4 = wm.query(
    """SELECT DISTINCT ?cont_label_lit 
    WHERE {
    ?CI culco:hasContentiousLabel ?cont_label .
    ?cont_label skosxl:literalForm ?cont_label_lit .
    FILTER NOT EXISTS {
    ?Suggestion culco:suggestedFor ?cont_label } }""")

for row in q_4:
    print(f"'{row.cont_label_lit}' ({row.cont_label_lit.language})")

'mestizo' (en)
'Zuid­-Rhodesië' (nl)
'etnische groep' (nl)
'Calcutta' (nl)
'Inuit' (en)
'Métis' (en)
'Handicap' (en)
'Burma' (en)
'Madras' (nl)
'Boslandcreool' (nl)
'Batavia' (nl)
'Jap' (nl)
'half-breed' (en)
'Southern Rhodesia' (en)
'immigrant' (en)
'Inuit' (nl)
'full blood' (en)
'Birma' (nl)
'volbloed' (nl)
'lilliputter' (nl)
'Métis' (nl)
'Batavia' (en)
'lilliputian' (en)
'Madras' (en)
'Calcutta' (en)


### 5. Which Contentious Issues are unique to the Dutch version of the glossary?

In [9]:
q_5 = wm.query(
    """SELECT ?CI_nl_title 
    WHERE {
    ?CI_nl dcterms:title ?CI_nl_title ;
           dcterms:description ?CI_nl_description .
    FILTER (lang(?CI_nl_description) = "nl")      
    FILTER NOT EXISTS {
    ?CI_nl skos:exactMatch ?CI_en .}}""")

for row in q_5:
    print(f"'{row.CI_nl_title}'")

'Inlander'
'Islamiet'


### 6. Which contentious terms share similar suggestions?

In [10]:
q_6 = wm.query(
    """SELECT ?Suggestion (GROUP_CONCAT(?cont_label_lit;SEPARATOR=",") AS ?cont_label_lit_list)
    WHERE {
        ?Suggestion a culco:Suggestion ;
                    culco:suggestedFor ?cont_label .
        ?cont_label skosxl:literalForm ?cont_label_lit .
    }
    GROUP BY ?Suggestion
    """)

for row in q_6:
    cont_label_lit = row.cont_label_lit_list.split(',')
    if len(cont_label_lit) > 1 :
        print(f"{row.Suggestion}: {cont_label_lit}")

https://w3id.org/culco/wordsmatter#110_nl_s_01: ['Mulat', 'Halfbloed', 'mesties']
https://w3id.org/culco/wordsmatter#137_nl_s_01: ['Primitief', 'Primitivisme']
https://w3id.org/culco/wordsmatter#98_nl_s_01: ['baboe', 'page', 'Bediende', 'knecht']
https://w3id.org/culco/wordsmatter#120_nl_s_01: ['Inboorling', 'Inlander']
https://w3id.org/culco/wordsmatter#98_nl_s_02: ['Bediende', 'knecht', 'baboe', 'page']
https://w3id.org/culco/wordsmatter#145_nl_s_02: ['travestiet', 'Trans']
https://w3id.org/culco/wordsmatter#3_nl_s: ['Gay', 'Trans', 'Queer', 'Homo']
https://w3id.org/culco/wordsmatter#110_nl_s_02: ['Mulat', 'Halfbloed', 'mesties']
https://w3id.org/culco/wordsmatter#147_nl_s_01: ['Blank', 'Wit']
https://w3id.org/culco/wordsmatter#139_s_02: ['Developing nations', 'First World', 'Second World', 'Third World', 'low-income countries']
https://w3id.org/culco/wordsmatter#0_s: ['Colored', 'Tribe', 'Pygmy', 'Allochtoon', 'Indian', 'Aboriginal', 'Descent']
https://w3id.org/culco/wordsmatter#104

### 7. Which Contentious Issues mention more than one contentious term?

In [11]:
q_7 = wm.query(
"""
SELECT ?CI ?CI_title ?cont_label_lit (GROUP_CONCAT(?cont_label_lit;SEPARATOR=",") AS ?cont_label_lit_list)
    WHERE {
    ?CI a culco:ContentiousIssue ;
        dcterms:title ?CI_title ;
        culco:hasContentiousLabel / skosxl:literalForm ?cont_label_lit .
    }
    GROUP BY ?CI
""")

for row in q_7:
    labels = row.cont_label_lit_list.split(',')
    if len(labels) > 1 :
        print(f"'{row.CI_title}'({row.CI_title.language}): {labels}")

'Bediende'(nl): ['baboe', 'page', 'Bediende', 'knecht']
'(De) Derde Wereld'(nl): ['Tweede Wereld', 'Derde Wereld', 'Eerste Wereld', 'ontwikkelingslanden', 'lagelonenlanden']
'Jappenkampen'(nl): ['Jappenkamp', 'Jap', 'Jappenkampen']
'Inboorling'(nl): ['Inboorling', 'Inlander']
'Third World'(en): ['Third World', 'Developing nations', 'low-income countries', 'First World', 'Second World']
'Homo'(nl): ['Homoseksueel', 'Homo', 'Homoseksualiteit']
'Bush Negro'(en): ['Maroon', 'Bush Negro', 'Negro', 'Boslandcreool']
'Gay'(en): ['Gay', 'homo', 'Homosexual']
'Homosexual'(en): ['homo', 'Homosexual']
'Disabled'(en): ['Handicap', 'Disabled']
'Blank'(nl): ['Black', 'Blank', 'White']
'Headhunter'(en): ['Primitive', 'Headhunter']
'Oriental'(en): ['Oriental', 'Exotic']
'Handicap'(nl): ['Handicap', 'gehandicapten']
'Aboriginal'(nl): ['Indiaan', 'Métis', 'Inuit', 'Aboriginal']
'Allochtoon'(en): ['immigrant', 'Allochtoon']
'Dwarf'(en): ['lilliputian', 'Pygmy', 'Dwarf']
'Trans'(nl): ['travestiet', 'Trans'

### 8. Which Contentious Issues in the English glossary have titles in Dutch? 

In [12]:
q_8 = wm.query(
"""
SELECT ?CI ?CI_title
WHERE {
?CI a culco:ContentiousIssue ;
    dcterms:title ?CI_title ;
    dcterms:description ?CI_description .
    
    FILTER(lang(?CI_title) = "nl")
    FILTER(lang(?CI_description) = "en")
}

""")

for row in q_8:
    print(f"'{row.CI_title}'({row.CI_title.language})")

'Indisch'(nl)
'Blank'(nl)
'Inboorling'(nl)
'Politionele actie'(nl)
'Jappenkampen'(nl)


### 9. Which contentious labels are used in the Contentious Issue "Aboriginal"(en) and its related CIs?

In [13]:
def get_cont_labels(title:str, lang='en') -> str:
    '''
    Get contentious labels mentioned in a Contentious Issue and its referenced Contentious Issues
    Return str
    title: str title of Contnetious Issue
    lang: str language of Contnetious Issue: 'en' or 'nl'; default 'en'
    '''
    
    search_str = f"'{title}'@{lang}"
    
    query_str = """SELECT ?CI ?cont_label_lit ?cont_label_lit_referenced 
    WHERE{
    ?CI a culco:ContentiousIssue ;
        dcterms:title ?CI_title ;
        culco:hasContentiousLabel / skosxl:literalForm ?cont_label_lit .

        OPTIONAL {?CI dcterms:references / culco:hasContentiousLabel / skosxl:literalForm ?cont_label_lit_referenced .}

    FILTER(?CI_title = """ + search_str + """)}"""
    
    q_9 = wm.query(query_str)

    all_labels = []
    CI = q_9.bindings[0]['CI']
    for row in q_9:
        all_labels.append(row.cont_label_lit.value)
        if row.cont_label_lit_referenced != None:
            all_labels.append(row.cont_label_lit_referenced.value)

    result_str = f"{CI}: {set(all_labels)}"
        
    return result_str

In [14]:
get_cont_labels('Aboriginal','en')

"https://w3id.org/culco/wordsmatter#90: {'Métis', 'Inuit', 'Eskimo', 'Indian', 'Aboriginal'}"

### 10. Which Contentious Issues mention the term "white"(en) in the description text?

In [15]:
q_10 = wm.query(
    """
    SELECT ?CI_title
    WHERE {
    ?CI a culco:ContentiousIssue ;
        dcterms:title ?CI_title ;
        dcterms:description ?CI_desc .
        
    FILTER (regex(?CI_desc,'white','i'))
    }
    """)

for row in q_10:
    print(f"'{row.CI_title}'")

'Race'
'Caucasian'
'Blank'
'Allochtoon'
'Half-blood'
'Colored'
'Roots'
'Mulatto'
'Black'
'White'
