In [60]:
from rdflib import SKOS, DCTERMS
from qlit.thesaurus import Thesaurus

# Load compiled, version-controlled thesaurus data
t = Thesaurus().parse('qlit.nt')

BASE = 'https://queerlit.dh.gu.se/qlit/v1/'

print('Terms:', len(t.refs()))

Terms: 880


### Matchingar mot SAO/Barn

In [2]:
for s, o in t.subject_objects(SKOS.exactMatch):
    if str(o).startswith('https://id.kb.se/term/'):
        print(s, '\t', o)

https://queerlit.dh.gu.se/qlit/v1/ac83ik65 	 https://id.kb.se/term/barn/Kl%C3%A4der
https://queerlit.dh.gu.se/qlit/v1/ac83ik65 	 https://id.kb.se/term/sao/Kl%C3%A4der
https://queerlit.dh.gu.se/qlit/v1/ad73kh62 	 https://id.kb.se/term/barn/Bordeller
https://queerlit.dh.gu.se/qlit/v1/ad73kh62 	 https://id.kb.se/term/sao/Bordeller
https://queerlit.dh.gu.se/qlit/v1/ai60cz27 	 https://id.kb.se/term/sao/Polygami
https://queerlit.dh.gu.se/qlit/v1/aq16rl33 	 https://id.kb.se/term/sao/Prostituerade
https://queerlit.dh.gu.se/qlit/v1/av87kj76 	 https://id.kb.se/term/sao/Fr%C3%A5nskilda%20personer
https://queerlit.dh.gu.se/qlit/v1/aw95bz40 	 https://id.kb.se/term/barn/Polyamori
https://queerlit.dh.gu.se/qlit/v1/aw95bz40 	 https://id.kb.se/term/sao/Polyamori
https://queerlit.dh.gu.se/qlit/v1/ax27ow53 	 https://id.kb.se/term/sao/Striptease
https://queerlit.dh.gu.se/qlit/v1/bk48ga51 	 https://id.kb.se/term/barn/Alkoholism
https://queerlit.dh.gu.se/qlit/v1/bk48ga51 	 https://id.kb.se/term/sao/Alkoholi

### Close/exact matchningar mot Homosaurus

In [16]:
# Just count them
homosaurus_matches = [(s, p, o) for (s, p, o) in t if str(o).startswith('https://homosaurus.org/')]

print('Homosaurus matches:', len(homosaurus_matches))

n_has_homosaurus_match = len(set(s for (s, p, o) in homosaurus_matches))
print('Terms with Homosaurus matches:', n_has_homosaurus_match)

Homosaurus matches: 774
Terms with Homosaurus matches: 757


### Set collection

In [45]:
collection_map = dict()
collection = None
with open('../collection-kategorier.txt') as f:
    for line in f.readlines():
        if not line.strip():
            continue
        if not line.startswith('\t'):
            collection = line.strip()
            collection_map[collection] = []
        else:
            collection_map[collection].append(line.strip())
        
print(collection_map.keys())
print(collection_map['Tema: Övrigt (HBTQI)'])

dict_keys(['Tema: Identiteter och praktiker (HBTQI)', 'Tema: Sex, intimitet och kroppslighet (HBTQI)', 'Tema: Medicin (HBTQI)', 'Tema: Rörelser och rättigheter (HBTQI)', 'Tema: Relationer (HBTQI)', 'Tema: Diskriminering, hat och våld (HBTQI)', 'Tema: Kultur och fritid (HBTQI)', 'Tema: Livsåskådning, tro och teorier (HBTQI)', 'Tema: Övrigt (HBTQI)'])
['Djur (HBTQI)', 'Droganvändare', 'Droger', 'Informationsförmedling', 'Juridik', 'Kläder', 'Offentliga hygieninrättningar', 'Poliser', 'Sexarbetare (HBTQI)', 'Sexarbete', 'Sexindustri', 'Sexköpare', 'Skolan (HBTQI)', 'Sociala medier', 'Sociala normer', 'Symbolik (HBTQI)', 'Äldreboenden (HBTQI)']


In [63]:
from rdflib import Graph

def concept_by_label(label):
    refs = list(t[:SKOS.prefLabel:Literal(label)])
    if not refs:
        print(f'WARNING: No concept has label "{label}"')
    return refs[0]

for i, collection_label in enumerate(collection_map):
    
    identifier = 'c' + str(i)
    ref = URIRef(BASE + identifier)
    concept_refs = [concept_by_label(concept_label) for concept_label in collection_map[collection_label]]

    g = Graph(base=BASE)
    g.add((ref, DCTERMS.identifier, Literal(identifier)))
    g.add((ref, RDF.type, SKOS.Collection))
    g.add((ref, SKOS.inScheme, t.scheme))
    g.add((ref, SKOS.prefLabel, Literal(collection_label)))
    
    for concept_ref in concept_refs:
        g.add((ref, SKOS.member, concept_ref))
    
    print(g.serialize())

@base <https://queerlit.dh.gu.se/qlit/v1/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<c0> a skos:Collection ;
    dcterms:identifier "c0" ;
    skos:inScheme <https://queerlit.dh.gu.se/qlit/v1> ;
    skos:member <by72wh90>,
        <dl13tg08>,
        <dt87hy25>,
        <ga70uy71>,
        <gz65sa45>,
        <he13pb69>,
        <iy93lr17>,
        <oh06mw89>,
        <ot53zk29>,
        <pf53mb18>,
        <pj39xm88>,
        <pk82yt24>,
        <qj48lh67>,
        <ss55ax39>,
        <wv53po26>,
        <zq29sy82>,
        <zv66nv87> ;
    skos:prefLabel "Tema: Identiteter och praktiker (HBTQI)" .


@base <https://queerlit.dh.gu.se/qlit/v1/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<c1> a skos:Collection ;
    dcterms:identifier "c1" ;
    skos:inScheme <https://queerlit.dh.gu.se/qlit/v1> ;
    skos:member <ah90te46>,
        <cb66jw88>,
        <hi88il01>,
