In [1]:
from rdflib import Graph, SKOS, DCTERMS
from qlit.thesaurus import Thesaurus

# Load compiled, version-controlled thesaurus data
t = Thesaurus().parse('qlit.nt')

BASE = 'https://queerlit.dh.gu.se/qlit/v1/'

print('Terms:', len(t.refs()))

Terms: 900


### Matchingar mot SAO/Barn

In [5]:
lines = []
for s, o in t.subject_objects(SKOS.exactMatch):
    if str(o).startswith('https://id.kb.se/term/'):
        label = t.value(s, SKOS.prefLabel)
        lines.append(f'{o}\t{s}\t{label}')
for line in sorted(lines):
    print(line)

https://id.kb.se/term/barn/%C3%84ktenskap	https://queerlit.dh.gu.se/qlit/v1/vh55tv72	Äktenskap
https://id.kb.se/term/barn/Asexualitet	https://queerlit.dh.gu.se/qlit/v1/bw78bx64	Asexualitet
https://id.kb.se/term/barn/BDSM	https://queerlit.dh.gu.se/qlit/v1/oj77yj15	BDSM
https://id.kb.se/term/barn/Barn	https://queerlit.dh.gu.se/qlit/v1/gz65sa45	Barn
https://id.kb.se/term/barn/Bisexualitet	https://queerlit.dh.gu.se/qlit/v1/ty61wt09	Bisexualitet
https://id.kb.se/term/barn/Bloggar	https://queerlit.dh.gu.se/qlit/v1/uy98au05	Bloggar
https://id.kb.se/term/barn/Bordeller	https://queerlit.dh.gu.se/qlit/v1/ad73kh62	Bordeller
https://id.kb.se/term/barn/Br%C3%B6st	https://queerlit.dh.gu.se/qlit/v1/um68yd69	Bröst
https://id.kb.se/term/barn/Crossdressing	https://queerlit.dh.gu.se/qlit/v1/hb81xa89	Crossdressing
https://id.kb.se/term/barn/Feminism	https://queerlit.dh.gu.se/qlit/v1/kb69pe59	Feminism
https://id.kb.se/term/barn/Flickor	https://queerlit.dh.gu.se/qlit/v1/nf67vq64	Flickor
https://id.kb.se/ter

### Close/exact matchningar mot Homosaurus

In [6]:
# Just count them
homosaurus_matches = [(s, p, o) for (s, p, o) in t if str(o).startswith('https://homosaurus.org/')]

print('Homosaurus matches:', len(homosaurus_matches))

n_has_homosaurus_match = len(set(s for (s, p, o) in homosaurus_matches))
print('Terms with Homosaurus matches:', n_has_homosaurus_match)


Homosaurus matches: 766
Terms with Homosaurus matches: 749


In [2]:
# Show mapping
homosaurus = Graph().parse('homosaurus.ttl')
qlit_homo = t + homosaurus
print(f'{len(t)} + {len(homosaurus)} = {len(qlit_homo)}')

from itertools import chain
lines = []
matches = list(chain(
    ((s, 'exactMatch', o) for (s, o) in qlit_homo.subject_objects(SKOS.exactMatch)),
    ((s, 'closeMatch', o) for (s, o) in qlit_homo.subject_objects(SKOS.closeMatch))
))
print(len(matches))
for (s, match, o) in matches:
    if (not str(s).startswith('https://queerlit') or not str(o).startswith('https://homosaurus')):
        continue
    q_label = qlit_homo.value(s, SKOS.prefLabel)
    h_label = qlit_homo.value(o, SKOS.prefLabel)
    lines.append(f'{o}\t{h_label}\t{match}\t{s}\t{q_label}')
print('\n'.join(lines))

13387 + 32575 = 45962
1771
https://homosaurus.org/v3/homoit0001055	Older gay men	exactMatch	https://queerlit.dh.gu.se/qlit/v1/me57jv39	Äldre bögar
https://homosaurus.org/v3/homoit0000377	Driver's licenses	exactMatch	https://queerlit.dh.gu.se/qlit/v1/up70lp68	Körkort
https://homosaurus.org/v3/homoit0001265	Self-insemination	exactMatch	https://queerlit.dh.gu.se/qlit/v1/wv84rg97	Självinseminering
https://homosaurus.org/v3/homoit0001306	Sexuality	exactMatch	https://queerlit.dh.gu.se/qlit/v1/jy58jm84	Sexualitet
https://homosaurus.org/v3/homoit0001007	Medication	exactMatch	https://queerlit.dh.gu.se/qlit/v1/ud60wo50	Läkemedel
https://homosaurus.org/v3/homoit0002289	Chemsex	exactMatch	https://queerlit.dh.gu.se/qlit/v1/yt55tr32	Kemsex
https://homosaurus.org/v3/homoit0000608	Handcuffs	exactMatch	https://queerlit.dh.gu.se/qlit/v1/an00td75	Handklovar
https://homosaurus.org/v3/homoit0001120	Police entrapment	exactMatch	https://queerlit.dh.gu.se/qlit/v1/rc46qr59	Brottsprovokation
https://homosaurus.

### Set collection

In [45]:
collection_map = dict()
collection = None
with open('../collection-kategorier.txt') as f:
    for line in f.readlines():
        if not line.strip():
            continue
        if not line.startswith('\t'):
            collection = line.strip()
            collection_map[collection] = []
        else:
            collection_map[collection].append(line.strip())
        
print(collection_map.keys())
print(collection_map['Tema: Övrigt (HBTQI)'])

dict_keys(['Tema: Identiteter och praktiker (HBTQI)', 'Tema: Sex, intimitet och kroppslighet (HBTQI)', 'Tema: Medicin (HBTQI)', 'Tema: Rörelser och rättigheter (HBTQI)', 'Tema: Relationer (HBTQI)', 'Tema: Diskriminering, hat och våld (HBTQI)', 'Tema: Kultur och fritid (HBTQI)', 'Tema: Livsåskådning, tro och teorier (HBTQI)', 'Tema: Övrigt (HBTQI)'])
['Djur (HBTQI)', 'Droganvändare', 'Droger', 'Informationsförmedling', 'Juridik', 'Kläder', 'Offentliga hygieninrättningar', 'Poliser', 'Sexarbetare (HBTQI)', 'Sexarbete', 'Sexindustri', 'Sexköpare', 'Skolan (HBTQI)', 'Sociala medier', 'Sociala normer', 'Symbolik (HBTQI)', 'Äldreboenden (HBTQI)']


In [63]:
from rdflib import Graph

def concept_by_label(label):
    refs = list(t[:SKOS.prefLabel:Literal(label)])
    if not refs:
        print(f'WARNING: No concept has label "{label}"')
    return refs[0]

for i, collection_label in enumerate(collection_map):
    
    identifier = 'c' + str(i)
    ref = URIRef(BASE + identifier)
    concept_refs = [concept_by_label(concept_label) for concept_label in collection_map[collection_label]]

    g = Graph(base=BASE)
    g.add((ref, DCTERMS.identifier, Literal(identifier)))
    g.add((ref, RDF.type, SKOS.Collection))
    g.add((ref, SKOS.inScheme, t.scheme))
    g.add((ref, SKOS.prefLabel, Literal(collection_label)))
    
    for concept_ref in concept_refs:
        g.add((ref, SKOS.member, concept_ref))
    
    print(g.serialize())

@base <https://queerlit.dh.gu.se/qlit/v1/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<c0> a skos:Collection ;
    dcterms:identifier "c0" ;
    skos:inScheme <https://queerlit.dh.gu.se/qlit/v1> ;
    skos:member <by72wh90>,
        <dl13tg08>,
        <dt87hy25>,
        <ga70uy71>,
        <gz65sa45>,
        <he13pb69>,
        <iy93lr17>,
        <oh06mw89>,
        <ot53zk29>,
        <pf53mb18>,
        <pj39xm88>,
        <pk82yt24>,
        <qj48lh67>,
        <ss55ax39>,
        <wv53po26>,
        <zq29sy82>,
        <zv66nv87> ;
    skos:prefLabel "Tema: Identiteter och praktiker (HBTQI)" .


@base <https://queerlit.dh.gu.se/qlit/v1/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<c1> a skos:Collection ;
    dcterms:identifier "c1" ;
    skos:inScheme <https://queerlit.dh.gu.se/qlit/v1> ;
    skos:member <ah90te46>,
        <cb66jw88>,
        <hi88il01>,


## Profiling search

In [12]:
from time import time

class Stopwatch:
    def __init__(self, label):
        self.label = label
    def __enter__(self):
        self.tic = time()
    def __exit__(self, type, value, tb):
        toc = time() - self.tic
        print("%s: %.2fs" % (self.label, toc))

with Stopwatch("Build graph"):
    th = Thesaurus()
    th += t + homosaurus

fields = [SKOS.prefLabel, SKOS.altLabel, SKOS.hiddenLabel]

with Stopwatch("Iterate"):
    refs = []
    for s, p, o in th:
        if p not in fields: continue
        refs.append(s)
    print(len(refs))

Build graph: 1.40s
6884
Iterate: 0.11s
