In [37]:
import pandas as pd
from rdflib import Graph, RDF
from src.gnn import get_namespace

In [42]:
def get_statistics(dataset_name: str):

    def stats_for_split(name, path, NS):
        g = Graph()
        g.parse(path)

        triples = len(g)
        membership = len(list(g.triples((None, RDF.type, None))))
        obj_prop = sum(1 for s, p, o in g if str(p).startswith(str(NS)))
        remaining = triples - (membership + obj_prop)

        # proportions
        prop_membership = membership / triples if triples else 0
        prop_obj_prop = obj_prop / triples if triples else 0
        prop_remaining = remaining / triples if triples else 0

        # ---- PRINT BLOCK ----
        print(f"--- {name} ---")
        print(f"Triples: {triples}")
        print(f"Membership: {membership} ({prop_membership:.3f})")
        print(f"Object Property Assertions: {obj_prop} ({prop_obj_prop:.3f})")
        print(f"Remaining Triples: {remaining} ({prop_remaining:.3f})")
        print()

        # ---- DATAFRAME ROW ----
        return {
            "Split": name,
            "Triples": triples,
            "Membership": membership,
            "Membership %": round(prop_membership, 3),
            "Object Property Assertions": obj_prop,
            "Object Property Assertions %": round(prop_obj_prop, 3),
            "Remaining Triples": remaining,
            "Remaining %": round(prop_remaining, 3)
        }

    NS = get_namespace(dataset_name)

    print(f"\nDataset: {dataset_name}\n")

    stats = [
        stats_for_split("Train", f"datasets/{dataset_name}_train.owl", NS),
        stats_for_split("Test",  f"datasets/{dataset_name}_test.owl",  NS),
        stats_for_split("Val",   f"datasets/{dataset_name}_val.owl",   NS)
    ]

    df = pd.DataFrame(stats)
    return df

In [43]:
df = get_statistics('pizza')


Dataset: pizza

--- Train ---
Triples: 3839
Membership: 1011 (0.263)
Object Property Assertions: 1288 (0.336)
Remaining Triples: 1540 (0.401)

--- Test ---
Triples: 2300
Membership: 592 (0.257)
Object Property Assertions: 168 (0.073)
Remaining Triples: 1540 (0.670)

--- Val ---
Triples: 2295
Membership: 589 (0.257)
Object Property Assertions: 166 (0.072)
Remaining Triples: 1540 (0.671)

