In [5]:
# -------------------------------------------------------------
# Export a large TTL (≈125 k triples) to colored GraphML for Gephi
# -------------------------------------------------------------
# pip install rdflib networkx matplotlib             # first run only

import rdflib                             # RDF parser
import networkx as nx                     # Graph container + GraphML writer
from matplotlib import cm                 # Colormap to pick distinct colors
from matplotlib.colors import to_hex
import rdflib.namespace as rns

# --- 1. Load TTL -----------------------------------------------------------
TTL_FILE = "../linkml/data/rdf/epd_rdf_instance_datastore_canonical_skos_din_bki_shacl.ttl"

rdf = rdflib.Graph()
rdf.parse(TTL_FILE, format="turtle")

# --- 2. Build a directed NetworkX graph ------------------------------------
G = nx.DiGraph()

for s, p, o in rdf:
    # Parse URIs as plain strings; literals become str(literal)
    su, ob = str(s), str(o)

    G.add_node(su)
    G.add_node(ob)
    # predicate label kept if you want to filter later
    G.add_edge(su, ob, predicate=str(p))

# --- 3. Assign deterministic colors by namespace prefix --------------------
# Helper: get prefix (falls back to “misc”)
ns_mgr = rdf.namespace_manager
def qprefix(uri: str) -> str:
    try:
        return ns_mgr.compute_qname(rdflib.URIRef(uri))[0]
    except Exception:
        return "misc"

# List unique prefixes
prefixes = sorted({qprefix(n) for n in G.nodes})
# Map each prefix to a distinct color from matplotlib's tab20 colormap
cmap = cm.get_cmap("tab20", len(prefixes))
prefix2color = {pref: to_hex(cmap(i)) for i, pref in enumerate(prefixes)}

# Write the color attribute (hex) to each node
for n in G.nodes:
    G.nodes[n]["color"] = prefix2color[qprefix(n)]

# --- 4. Export to GraphML ---------------------------------------------------
OUT_FILE = "ttl_graph_ready_for_gephi.graphml"
nx.write_graphml(G, OUT_FILE)
print(f"✔ GraphML written to {OUT_FILE} — import in Gephi and run a layout.")


source data set does not look like a valid URI, trying to serialize this will break.
compressive strength does not look like a valid URI, trying to serialize this will break.
unit group data set does not look like a valid URI, trying to serialize this will break.
Building products made of concrete and concrete elements - Firmengruppe Max Bögl - Concrete C30/37 fly ash does not look like a valid URI, trying to serialize this will break.
Eutrophication potential - terrestrial (EP-terrestrial) does not look like a valid URI, trying to serialize this will break.
Product flow does not look like a valid URI, trying to serialize this will break.
Beton C20/25 X0 C1 22 L EcoPact, Rezept Nummer DU3312-AHZS, Transportbetonwerk Büttelborn, Germany does not look like a valid URI, trying to serialize this will break.
Beton C20/25 XC1 XC2 F3 16 M ECOPact, Rezept Nummer DI3234-BHFS Version 1, Transportbetonwerk Balve, Germany does not look like a valid URI, trying to serialize this will break.
Global 

✔ GraphML written to ttl_graph_ready_for_gephi.graphml — import in Gephi and run a layout.


In [6]:
# -------------------------------------------------------------
# TTL ➜ GraphML with hybrid coloring scheme for Gephi
#   • ilcd branches colored by section
#   • every other namespace gets its own color
# -------------------------------------------------------------
# pip install rdflib networkx matplotlib

import rdflib
import networkx as nx
from collections import deque
from matplotlib import cm
from matplotlib.colors import to_hex

# --- 1. File paths ---------------------------------------------------------
TTL_FILE = "../linkml/data/rdf/epd_rdf_instance_datastore_canonical_skos_din_bki_shacl.ttl"
OUT_FILE = "ttl_graph_hybrid_colors.graphml"

# --- 2. Section predicates (ilcd) -----------------------------------------
SECTION_PREDICATES = {
    "administrativeInformation": "admin",
    "exchanges": "exchanges",
    "lciaResults": "lcia",
    "modellingAndValidation": "modelling",
    "processInformation": "process",
}

# --- 3. Load RDF -----------------------------------------------------------
rdf = rdflib.Graph()
rdf.parse(TTL_FILE, format="turtle")

# Namespace URIs we care about
ns_mgr = rdf.namespace_manager
prefix_uri = {p: str(u) for p, u in ns_mgr.namespaces()}
ILCD_URI = prefix_uri["ilcd"]

# --- 4. Build NetworkX graph ----------------------------------------------
G = nx.DiGraph()
for s, p, o in rdf:
    G.add_edge(str(s), str(o), predicate=str(p))

# --- 5. Tag ilcd branches by section --------------------------------------
node_tag = {}                       # node ➜ tag string (section OR prefix)

# Collect start nodes for each ilcd section
start_nodes = []
for s, p, o in rdf:
    pred = str(p)
    if pred.startswith(ILCD_URI):
        local = pred[len(ILCD_URI):]
        if local in SECTION_PREDICATES:
            start_nodes.append((str(o), SECTION_PREDICATES[local]))

# Flood-fill from each start node (successors only)
for root, sec in start_nodes:
    if root in node_tag:            # already claimed by another section
        continue
    queue = deque([root])
    while queue:
        n = queue.popleft()
        if n in node_tag:           # visited via another root
            continue
        node_tag[n] = sec
        queue.extend(G.successors(n))

# --- 6. Tag everything else by namespace prefix ---------------------------
for n in G.nodes:
    if n in node_tag:               # already tagged by section
        continue
    if n.startswith("http"):
        try:
            pref = ns_mgr.compute_qname(rdflib.URIRef(n))[0]
        except Exception:
            pref = "misc"
    else:
        pref = "literal"
    node_tag[n] = pref

# --- 7. Build color palette ----------------------------------------------
all_tags = sorted(set(node_tag.values()))
cmap = cm.get_cmap("tab20", len(all_tags))
TAG2COLOR = {tag: to_hex(cmap(i)) for i, tag in enumerate(all_tags)}

# --- 8. Write node attributes ---------------------------------------------
for n in G.nodes:
    tag = node_tag[n]
    G.nodes[n]["group"] = tag       # human-readable
    G.nodes[n]["color"] = TAG2COLOR[tag]  # hex for Gephi partition

# --- 9. Export -------------------------------------------------------------
nx.write_graphml(G, OUT_FILE)
print(f"✔ GraphML with hybrid colors -> {OUT_FILE}")


  cmap = cm.get_cmap("tab20", len(all_tags))


✔ GraphML with hybrid colors -> ttl_graph_hybrid_colors.graphml
