In [1]:
from pydbsp.algorithms import rdfs
from pydbsp.zset import ZSet
from typing import cast

def load_graph(file_path: str) -> rdfs.RDFGraph:
    out = []

    with open(file_path, mode="r") as file:
        lines = file.readlines()
        out = {
            edge: 1
            for edge in map(
                lambda line: tuple(map(lambda node: int(node), line.split()[0:3])),
                lines,
            )
        }

    return cast(rdfs.RDFGraph, ZSet(out))

In [2]:
tbox = load_graph("data/lubm1tbox.ntenc")
abox = load_graph("data/lubm1abox.ntenc")

In [3]:
from pydbsp.stream.functions.linear import stream_introduction
from pydbsp.stream import StreamHandle
from pydbsp.algorithms.rdfs import IncrementalRDFSMaterialization, RDFTuple
from pydbsp.zset import ZSetAddition

tbox_stream = stream_introduction(tbox, ZSetAddition[RDFTuple]())
tbox_stream_h = StreamHandle(lambda: tbox_stream)
abox_stream = stream_introduction(abox, ZSetAddition[RDFTuple]())
abox_stream_h = StreamHandle(lambda: abox_stream)

rdfs_reasoner = IncrementalRDFSMaterialization(tbox_stream_h, abox_stream_h, None)

In [4]:
from pydbsp.stream import step_until_fixpoint

In [5]:
%time step_until_fixpoint(rdfs_reasoner)

CPU times: user 616 ms, sys: 24.5 ms, total: 640 ms
Wall time: 638 ms


In [6]:
from pydbsp.stream.functions.linear import stream_elimination

len(stream_elimination(rdfs_reasoner.output()).inner)

126417

In [7]:
from pydbsp.algorithms.datalog import IncrementalDatalog, Variable, Rule, Program
from pydbsp.algorithms.rdfs import SCO, SPO, TYPE, DOMAIN, RANGE

sco_rec = Rule = (
    ("T", (Variable("x"), SCO, Variable("z"))),
    ("T", (Variable("x"), SCO, Variable("y"))),
    ("T", (Variable("y"), SCO, Variable("z"))))
spo_rec = Rule = (
    ("T", (Variable("x"), SPO, Variable("z"))),
    ("T", (Variable("x"), SPO, Variable("y"))),
    ("T", (Variable("y"), SPO, Variable("z"))))
prop_rec = (
    ("A", (Variable("x"), Variable("b"), Variable("y"))),
    ("T", (Variable("a"), SPO, Variable("b"))),
    ("A", (Variable("x"), Variable("a"), Variable("y"))))
domain_nonrec = (
    ("A", (Variable("y"), TYPE, Variable("x"))),
    ("T", (Variable("a"), DOMAIN, Variable("x"))),
    ("A", (Variable("y"), Variable("a"), Variable("z"))))
range_nonrec = (
    ("A", (Variable("z"), TYPE, Variable("x"))),
    ("T", (Variable("a"), RANGE, Variable("x"))),
    ("A", (Variable("y"), Variable("a"), Variable("z"))))
class_rec = (
    ("A", (Variable("z"), TYPE, Variable("y"))),
    ("T", (Variable("x"), SCO, Variable("y"))),
    ("A", (Variable("z"), TYPE, Variable("x"))))
program = Program({ sco_rec: 1, spo_rec: 1, prop_rec: 1, domain_nonrec: 1, range_nonrec: 1, class_rec: 1 })
program_s = stream_introduction(program, ZSetAddition())
program_s_h = StreamHandle(lambda: program_s)

abox_facts = ZSet({})
tbox_facts = ZSet({})

for abox_triple, weight in abox.items():
    abox_facts[("A", abox_triple)] = weight

for tbox_triple, weight in tbox.items():
    tbox_facts[("T", tbox_triple)] = weight

rdf_facts = ZSetAddition().add(abox_facts, tbox_facts)
rdf_fact_s = stream_introduction(rdf_facts, ZSetAddition())
rdf_fact_s_h = StreamHandle(lambda: rdf_fact_s)

datalog_reasoner = IncrementalDatalog(rdf_fact_s_h, program_s_h, None) 

In [8]:
%time step_until_fixpoint(datalog_reasoner)

CPU times: user 25.4 s, sys: 112 ms, total: 25.5 s
Wall time: 25.5 s


In [9]:
# There's more triples inferred here because it includes the TBox as well
len([ x for x in stream_elimination(datalog_reasoner.output()).inner.keys() ] )

126732