# Offline Test: CDI Generation (No Service)

This notebook directly imports `generate_cdi` and inspects JSON-LD using PyLD. No helper wrappers are used.


In [1]:
import os
import sys
import json
from typing import Optional

# Ensure repo root is in sys.path (parent of notebooks/)
REPO_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)

# Ensure api/ is importable
API_DIR = os.path.join(REPO_ROOT, "api")
if API_DIR not in sys.path:
    sys.path.insert(0, API_DIR)

print("Repo root:", REPO_ROOT)
print("API dir:", API_DIR)


Repo root: /Users/vyacheslavtykhonov/projects/cdi-xas
API dir: /Users/vyacheslavtykhonov/projects/cdi-xas/api


In [2]:
import importlib, cdi_generator
importlib.reload(cdi_generator)
from cdi_generator import generate_cdi

# Inputs (override via env if needed)
SITE_URL = os.environ.get("DV_SITE", "https://dataverse.dev.codata.org")
FILE_ID = os.environ.get("DV_FILEID", "25")
PERSISTENT_ID = os.environ.get("DV_PERSISTENT_ID", "doi:10.5072/FK2/4ZSKVU")

source_url = f"{SITE_URL.rstrip('/')}/api/access/datafile/{FILE_ID}"
print("Source URL:", source_url)
print("Persistent ID:", PERSISTENT_ID)

# Build graph
cdi_graph = generate_cdi(
    source_url=source_url,
    export_path="/tmp/export.json",
    export_format="flattened",
    resources_dir=None,
    dataset_type="xas",
    datasetid=PERSISTENT_ID,
)
print("Triples:", len(cdi_graph))


Source URL: https://dataverse.dev.codata.org/api/access/datafile/25
Persistent ID: doi:10.5072/FK2/4ZSKVU
xas xas_metadata_amended.jsonld
xas xas_core_amended.jsonld
Compound: Spectrum.license = CC0-4.0
Compound: Publication.authors = Matthew Newville
Compound: Publication.affiliation = Center for Advanced Radiation Sources, University of Chicago
Compound: Sample.reference = none
Compound: Facility.name = APS
Compound: Sample.temperature = r.t.
Compound: Sample.prep = N/A
Compound: Sample.formula = Fe3O4
Compound: Publication.DOI__https = https://doi.org/10.71622/XASLIB_535X-8J62
Compound: Sample.name = Iron (II-III) oxide
Compound: Scan.number = Scan3
Compound: Sample.id = 32vmh2
Compound: Scan.start_time = 2002-03-16
Compound: Element.symbol = Fe
Compound: Facility.energy = 7.0 GeV
Compound: Facility.xray_source = Bending Magnet
Compound: Beamline.name = 13-BM-D (GSECARS)
Compound: Beamline.website_https = https://gsecars.uchicago.edu/gsecars-facility/sector-13-beamlines/13-bmd/
Comp

In [3]:
# Compact with PyLD context
from pyld import jsonld as jsonldlib

doc = json.loads(cdi_graph.serialize(format="json-ld"))
context = {
    "@vocab": "http://ddialliance.org/Specification/DDI-CDI/1.0/RDF/",
    "schema": "https://schema.org/",
    "dcterms": "http://purl.org/dc/terms/",
    "geosparql": "http://www.opengis.net/ont/geosparql#",
    "spdx": "http://spdx.org/rdf/terms#",
    "cdi": "http://ddialliance.org/Specification/DDI-CDI/1.0/RDF/",
    "time": "http://www.w3.org/2006/time#",
    "skos": "http://www.w3.org/2004/02/skos/core#",
    "nx": "https://xas.org/dictionary/",
    "cdifq": "https://cdif.codata.org/concept/",
    "prov": "http://www.w3.org/ns/prov#"
}
compacted = jsonldlib.compact(doc, context)
print(json.dumps(compacted, indent=2, ensure_ascii=False))


{
  "@context": {
    "@vocab": "http://ddialliance.org/Specification/DDI-CDI/1.0/RDF/",
    "schema": "https://schema.org/",
    "dcterms": "http://purl.org/dc/terms/",
    "geosparql": "http://www.opengis.net/ont/geosparql#",
    "spdx": "http://spdx.org/rdf/terms#",
    "cdi": "http://ddialliance.org/Specification/DDI-CDI/1.0/RDF/",
    "time": "http://www.w3.org/2006/time#",
    "skos": "http://www.w3.org/2004/02/skos/core#",
    "nx": "https://xas.org/dictionary/",
    "cdifq": "https://cdif.codata.org/concept/",
    "prov": "http://www.w3.org/ns/prov#"
  },
  "@graph": [
    {
      "@id": "http://ddialliance.org/Specification/XAS/Sample.prep",
      "http://ddialliance.org/Specification/XAS/format": "free-format string",
      "http://ddialliance.org/Specification/XAS/units": "none",
      "skos:definition": "A string summarizing the method of sample preparation",
      "skos:prefLabel": "Sample.prep"
    },
    {
      "@id": "http://ddialliance.org/Specification/XAS/Detector.i

In [4]:
# Inline blank-node references in compacted JSON-LD
import json as _json

def _collect_nodes(obj, store):
    if isinstance(obj, dict):
        node_id = obj.get("@id")
        if node_id and isinstance(node_id, str) and node_id.startswith("_:"):
            store[node_id] = obj
        for v in obj.values():
            _collect_nodes(v, store)
    elif isinstance(obj, list):
        for v in obj:
            _collect_nodes(v, store)

def _deep_clone(o):
    try:
        return _json.loads(_json.dumps(o))
    except Exception:
        return o

def _inline_refs(obj, node_map, seen_ids):
    if isinstance(obj, dict):
        if set(obj.keys()) == {"@id"} and isinstance(obj.get("@id"), str) and obj["@id"].startswith("_:"):
            ref_id = obj["@id"]
            target = node_map.get(ref_id)
            if target and ref_id not in seen_ids:
                seen_ids.add(ref_id)
                inlined = _inline_refs(_deep_clone(target), node_map, seen_ids)
                seen_ids.discard(ref_id)
                return inlined
            return obj
        return {k: _inline_refs(v, node_map, seen_ids) for k, v in obj.items()}
    if isinstance(obj, list):
        return [_inline_refs(v, node_map, seen_ids) for v in obj]
    return obj

_node_map = {}
_collect_nodes(compacted, _node_map)
inlined_compacted = _inline_refs(compacted, _node_map, set())

# Remove any @id fields that are blank-node identifiers (e.g., "_:...")
def _strip_blank_ids(obj):
    if isinstance(obj, dict):
        new_obj = {}
        for k, v in obj.items():
            if k == "@id" and isinstance(v, str) and v.startswith("_:"):
                continue  # skip blank-node @id
            new_obj[k] = _strip_blank_ids(v)
        return new_obj
    if isinstance(obj, list):
        return [_strip_blank_ids(v) for v in obj]
    return obj

inlined_compacted = _strip_blank_ids(inlined_compacted)

with open("/tmp/test.json", "w") as _f:
    _f.write(_json.dumps(inlined_compacted, indent=2, ensure_ascii=False))
print("Wrote /tmp/test.json")


Wrote /tmp/test.json
