In [None]:
from owlready2 import *
import rdflib
from rdflib import URIRef
from tqdm import tqdm
from src.utils import *

In [None]:
# dataset_name = 'OWL2DL-1'
dataset_name = 'family'

**Step 1: Load ontology**

In [None]:
ontology = get_ontology(f'datasets/{dataset_name}.owl').load()

In [None]:
g = rdflib.Graph()
g.parse(f'datasets/{dataset_name}.owl') 
print(f'# Triples: {len(g)}')

In [None]:
g_tbox = rdflib.Graph()
g_tbox.parse(f'datasets/{dataset_name}_TBOX.owl')  
print(f'# Triples: {len(g_tbox)}')

**Step 2: Get all subject-resources (individuals)**

In [None]:
subject_resources = list(ontology.individuals())
print(f'# Subject-Resources: {len(subject_resources)}')

**Step 3: Generate g = description graph for each r**

In [None]:
def get_classes_for_individual(individual):
    classes = [cls.name for cls in individual.is_a]  
    return classes

In [None]:
_, individuals, _ = get_individuals(g)

In [None]:
# 1 hop without TBox
for i, subject in tqdm(enumerate(subject_resources)):

    describe_graph = rdflib.Graph()

    # First hop
    for triple in g.triples((None, None, URIRef(subject.iri))):
        describe_graph.add(triple)
    for triple in g.triples((URIRef(subject.iri), None, None)):
        describe_graph.add(triple)

    classes = get_classes_for_individual(subject)
    classes = "_".join(classes)
    describe_graph.serialize(destination=f'datasets/{dataset_name}_input_graphs_filtered_1hop/{classes}_{subject.name}.ttl', format="ttl")

In [None]:
# 2 hop with TBox
for i, subject in enumerate(subject_resources):

    describe_graph = rdflib.Graph()

    # First hop
    for triple in g.triples((None, None, URIRef(subject.iri))):
        describe_graph.add(triple)
    for triple in g.triples((URIRef(subject.iri), None, None)):
        describe_graph.add(triple)

    # Second hop
    for triple in list(describe_graph.triples((None, None, None))):
        if triple[0] in individuals and triple[2] in individuals: # restrict subject and object to be an individual   
            for second_hop_triple in g.triples((triple[0], None, None)):
                describe_graph.add(second_hop_triple)
            for second_hop_triple in g.triples((triple[2], None, None)):
                describe_graph.add(second_hop_triple)
            for second_hop_triple in g.triples((None, None, triple[0])):
                describe_graph.add(second_hop_triple)
            for second_hop_triple in g.triples((None, None, triple[2])):
                describe_graph.add(second_hop_triple)

    # Add TBox
    describe_graph += g_tbox

    classes = get_classes_for_individual(subject)
    classes = "_".join(classes)
    describe_graph.serialize(destination=f'datasets/{dataset_name}_input_graphs/{classes}_{subject.name}.ttl', format="ttl")

In [None]:
# 2 hop without TBox
for i, subject in enumerate(subject_resources):

    describe_graph = rdflib.Graph()

    # First hop
    for triple in g.triples((None, None, URIRef(subject.iri))):
        describe_graph.add(triple)
    for triple in g.triples((URIRef(subject.iri), None, None)):
        describe_graph.add(triple)

    # Second hop
    for triple in list(describe_graph.triples((None, None, None))):
        if triple[0] in individuals and triple[2] in individuals: # restrict subject and object to be an individual   
            for second_hop_triple in g.triples((triple[0], None, None)):
                describe_graph.add(second_hop_triple)
            for second_hop_triple in g.triples((triple[2], None, None)):
                describe_graph.add(second_hop_triple)
            for second_hop_triple in g.triples((None, None, triple[0])):
                describe_graph.add(second_hop_triple)
            for second_hop_triple in g.triples((None, None, triple[2])):
                describe_graph.add(second_hop_triple)

    classes = get_classes_for_individual(subject)
    classes = "_".join(classes)
    describe_graph.serialize(destination=f'datasets/{dataset_name}_input_graphs_filtered/{classes}_{subject.name}.ttl', format="ttl")