In [1]:
import rdflib, mimeparse, requests
from rdflib import URIRef
from rdflib.namespace import RDF, RDFS, VOID

In [2]:
abr = rdflib.Namespace("https://wsburroughs.link/anything-but-routine/")
abrc = rdflib.Namespace("https://wsburroughs.link/anything-but-routine/4.0/classification/")
abri = rdflib.Namespace("https://wsburroughs.link/anything-but-routine/4.0/instance/")
abrw = rdflib.Namespace("https://wsburroughs.link/anything-but-routine/4.0/work/")
bf = rdflib.Namespace("http://id.loc.gov/ontologies/bibframe/")
arm = rdflib.Namespace("https://w3id.org/arm/core/ontology/0.1/")

In [3]:
def initialize_abr_graph():
    g = rdflib.Graph()
    g.bind("abr", "https://wsburroughs.link/anything-but-routine/")
    g.bind("abrc", "https://wsburroughs.link/anything-but-routine/4.0/classification/")
    g.bind("abri", "https://wsburroughs.link/anything-but-routine/4.0/instance/")
    g.bind("abrw", "https://wsburroughs.link/anything-but-routine/4.0/work/")
    g.bind("bf", "http://id.loc.gov/ontologies/bibframe/")
    g.bind("arm", "https://w3id.org/arm/core/ontology/0.1/")
    return g

In [4]:
headers = { 'Accept': 'text/turtle, application/rdf+xml' }

In [5]:
def parse_response(graph, res):
    (media_type, media_subtype, params) = mimeparse.parse_mime_type(res.headers.get('content-type'))
    graph.parse(data=res.text, format=f'{media_type}/{media_subtype}')

In [6]:
def load_void_resources(graph, uri):
    res = requests.get(uri, headers=headers)
    if res.status_code  == requests.codes.ok:
        parse_response(graph, res)
        dump_file = graph.value(URIRef(uri), VOID.dataDump)
        print(dump_file)
        if dump_file:
            res = requests.get(dump_file, headers=headers)
            if res.status_code  == requests.codes.ok:
                parse_response(graph, res)
        else:
            for o in graph.objects(URIRef(uri), VOID.rootResource):
                find_and_follow(graph, o)

In [7]:
def find_and_follow(graph, uri, level=3):
    if not uri in graph.subjects():
        res = requests.get(uri, headers=headers)
        if res.status_code  == requests.codes.ok:
            parse_response(graph, res)
            if level > 0:
                for o in graph.objects(uri, None):
                    if type(o) == rdflib.term.URIRef:
                        find_and_follow(graph, o, level=level-1)

In [8]:
%%time
graph = initialize_abr_graph()
load_void_resources(graph, 'https://wsburroughs.link/anything-but-routine/')

https://wsburroughs.link/anything-but-routine/dump
CPU times: user 4.02 s, sys: 150 ms, total: 4.17 s
Wall time: 7.57 s


In [9]:
n_entities = len([ cl for cl in graph.subjects(RDF.type, bf.Classification) ])
n_entities += len([ w for w in graph.subjects(RDF.type, bf.Work) ])
n_entities += len([ i for i in graph.subjects(RDF.type, bf.Instance) ])

n_entities

324

In [10]:
len([ w for w in graph.subjects(RDF.type, bf.Work) ])

85

In [11]:
len([ i for i in graph.subjects(RDF.type, bf.Instance) ])

229

In [12]:
len([ cl for cl in graph.subjects(RDF.type, bf.Classification) ])

10