Copyright (c) 2025 Graphcore Ltd. All rights reserved.

# Compute KG topological metrics with KGTopologyToolbox

https://github.com/graphcore-research/kg-topology-toolbox

In [1]:
import os.path as osp
import pickle

import pandas as pd
import torch
from kg_topology_toolbox import KGTopologyToolbox

In [None]:
for ds in ["hetionet", "openbiolink", "pharmkg", "pharmebinet", "primekg", "fb15k-237"]:
    data_dir = f"../datasets/data/{ds}"

    # Load KG triples (generated from preprocessing notebooks in ../datasets/)
    df_triples = pd.DataFrame(
        torch.load(osp.join(data_dir, "triples.pt"), weights_only=False),
        columns=["h", "r", "t"],
    )

    # Initialize Topology Toolbox for the KG
    kgtt = KGTopologyToolbox(df_triples)

    # Compute and save node metrics
    node_graph_metrics = {"node_degree_summary": kgtt.node_degree_summary()}
    pd.to_pickle(node_graph_metrics, osp.join(data_dir, f"node_graph_metrics.pkl"))

    # Compute and save edge metrics
    edge_graph_metrics = {
        "edge_degree_cardinality_summary": kgtt.edge_degree_cardinality_summary(),
        "edge_pattern_summary": kgtt.edge_pattern_summary(),
    }
    pd.to_pickle(edge_graph_metrics, osp.join(data_dir, f"edge_graph_metrics.pkl"))

In [None]:
pickle.load(open("../datasets/data/hetionet/node_graph_metrics.pkl", "rb"))[
    "node_degree_summary"
]

Unnamed: 0,h_degree,t_degree,tot_degree,h_unique_rel,t_unique_rel,n_loops
0,11869,7,11876,3,1,0
1,24,9,33,1,1,0
2,5015,6,5021,1,1,0
3,15961,10,15971,3,1,0
4,293,9,302,1,1,0
...,...,...,...,...,...,...
45153,0,2,2,0,1,0
45154,0,1,1,0,1,0
45155,0,1,1,0,1,0
45156,0,7,7,0,1,0
