# 02 — Build bridge ontology (PPMI ↔ PDON/PMDO)

This notebook creates a non-destructive **bridge ontology** that:

- imports PDON and PMDO (as-is)
- defines a minimal T-box for Subject/Visit/Observation
- records the PPMI→ontology mapping from `mapping/ppmi_pdon_pmdo_mapping.csv`
- serialises the bridge ontology to `ontologies/ppmi_bridge.ttl`

It is Drive-friendly and shareable: it will mount Drive and use the same `ppmi-ontology-alignment` folder.


In [None]:
!pip -q install rdflib pandas owlrl


## 1) Project root (Drive recommended)

This ensures paths remain stable even if you open this notebook in a fresh Colab runtime.


In [None]:
from pathlib import Path
import os

USE_DRIVE = True  # set False to work in /content only

if USE_DRIVE:
    from google.colab import drive
    drive.mount('/content/drive')
    PROJECT_DIR = Path('/content/drive/MyDrive/ppmi-ontology-alignment')
else:
    PROJECT_DIR = Path('/content/ppmi-ontology-alignment')

PROJECT_DIR.mkdir(parents=True, exist_ok=True)
ONT_DIR = PROJECT_DIR / 'ontologies'
MAP_DIR = PROJECT_DIR / 'mapping'
OUT_DIR = PROJECT_DIR / 'output'

for p in [ONT_DIR, MAP_DIR, OUT_DIR]:
    p.mkdir(parents=True, exist_ok=True)

MAP_PATH = MAP_DIR / 'ppmi_pdon_pmdo_mapping.csv'
OUT_PATH = ONT_DIR / 'ppmi_bridge.ttl'

PDON_PATH = ONT_DIR / 'pdon.xrdf'
PMDO_PATH = ONT_DIR / 'pmdo.xrdf'

print('PROJECT_DIR:', PROJECT_DIR)
print('Exists PDON   :', PDON_PATH.exists(), PDON_PATH)
print('Exists PMDO   :', PMDO_PATH.exists(), PMDO_PATH)
print('Exists mapping:', MAP_PATH.exists(), MAP_PATH)


## 2) Load mapping CSV

If the file is missing, upload it and it will be moved into `mapping/`.


In [None]:
import pandas as pd
from google.colab import files
import shutil

if not MAP_PATH.exists():
    print('Mapping CSV not found. Please upload ppmi_pdon_pmdo_mapping.csv')
    uploaded = files.upload()
    for fname in uploaded.keys():
        src = Path('/content') / fname
        dst = MAP_PATH if fname == 'ppmi_pdon_pmdo_mapping.csv' else (MAP_DIR / fname)
        shutil.move(str(src), str(dst))
    print('Uploaded to:', MAP_DIR)

mapping = pd.read_csv(MAP_PATH)
mapping.head(10)


## 3) Load PDON and PMDO graphs (robust parser)


In [None]:
from rdflib import Graph

def load_graph(path: Path):
    g = Graph()
    tried = []
    for fmt in ['xml', 'application/rdf+xml', 'turtle', 'n3', 'nt']:
        try:
            g.parse(str(path), format=fmt)
            return g, fmt, tried
        except Exception as e:
            tried.append((fmt, str(e)[:220]))
            continue
    g.parse(str(path))
    return g, 'auto', tried

pdon_g, pdon_fmt, _ = load_graph(PDON_PATH)
pmdo_g, pmdo_fmt, _ = load_graph(PMDO_PATH)

print('PDON triples:', len(pdon_g), 'format:', pdon_fmt)
print('PMDO triples:', len(pmdo_g), 'format:', pmdo_fmt)


## 4) Extract ontology IRI(s)

We use the IRI declared as `owl:Ontology` when present; otherwise we fall back to common defaults.


In [None]:
from rdflib.namespace import RDF, OWL

def get_ontology_iris(g: Graph):
    return sorted({str(s) for s in g.subjects(RDF.type, OWL.Ontology)})

pdon_iris = get_ontology_iris(pdon_g)
pmdo_iris = get_ontology_iris(pmdo_g)

PDON_IMPORT_IRI = pdon_iris[0] if pdon_iris else 'http://www.semanticweb.org/ontologies/2011/1/Ontology1296772722296.owl'
PMDO_IMPORT_IRI = pmdo_iris[0] if pmdo_iris else 'http://www.case.edu/PMDO'

print('PDON import IRI:', PDON_IMPORT_IRI)
print('PMDO import IRI:', PMDO_IMPORT_IRI)


## 5) Create bridge ontology header + imports

Set your final namespace later; for now we keep a stable placeholder.


In [None]:
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, RDFS, OWL, XSD

bridge = Graph()

PPMI = Namespace('http://example.org/ppmi-ontology-alignment#')  # replace later
bridge.bind('ppmi', PPMI)
bridge.bind('owl', OWL)
bridge.bind('rdfs', RDFS)
bridge.bind('rdf', RDF)
bridge.bind('xsd', XSD)

BRIDGE_IRI = URIRef('http://example.org/ppmi-ontology-alignment')  # replace later
bridge.add((BRIDGE_IRI, RDF.type, OWL.Ontology))
bridge.add((BRIDGE_IRI, RDFS.label, Literal('PPMI–PDON/PMDO bridge ontology')))
bridge.add((BRIDGE_IRI, RDFS.comment, Literal('Non-destructive bridge ontology for mapping PPMI longitudinal variables to PDON and PMDO concepts.')))

bridge.add((BRIDGE_IRI, OWL.imports, URIRef(PDON_IMPORT_IRI)))
bridge.add((BRIDGE_IRI, OWL.imports, URIRef(PMDO_IMPORT_IRI)))


## 6) Minimal T-box (Subject, Visit, Observation)

These are **your** classes/properties, independent from PDON/PMDO. PDON/PMDO are reused via imports.


In [None]:
# Classes
for cls, label in [
    ('Subject', 'Subject (PPMI participant)'),
    ('Visit', 'Visit (PPMI timepoint)'),
    ('Observation', 'Observation / measurement at a visit'),
    ('DiagnosisObservation', 'Diagnosis observation (coded)'),
    ('ImagingObservation', 'Imaging-derived observation'),
]:
    c = PPMI[cls]
    bridge.add((c, RDF.type, OWL.Class))
    bridge.add((c, RDFS.label, Literal(label)))

def add_objprop(local, domain, range_, label):
    p = PPMI[local]
    bridge.add((p, RDF.type, OWL.ObjectProperty))
    bridge.add((p, RDFS.domain, PPMI[domain]))
    bridge.add((p, RDFS.range, range_))
    bridge.add((p, RDFS.label, Literal(label)))
    return p

add_objprop('hasVisit', 'Subject', PPMI['Visit'], 'has visit')
add_objprop('hasObservation', 'Visit', PPMI['Observation'], 'has observation')
add_objprop('observesConcept', 'Observation', OWL.Thing, 'observes concept (PDON/PMDO/MDO class)')
add_objprop('refersToPdonConcept', 'DiagnosisObservation', OWL.Thing, 'refers to PDON concept')
add_objprop('relatesToRegion', 'ImagingObservation', OWL.Thing, 'relates to anatomical region (e.g., MDO)')

def add_dataprop(local, domain, range_, label):
    p = PPMI[local]
    bridge.add((p, RDF.type, OWL.DatatypeProperty))
    bridge.add((p, RDFS.domain, PPMI[domain]))
    bridge.add((p, RDFS.range, range_))
    bridge.add((p, RDFS.label, Literal(label)))
    return p

# Keep value as string at bridge level; you can specialise later per variable
add_dataprop('hasValue', 'Observation', XSD.string, 'has value (literal)')
add_dataprop('hasCode', 'Observation', XSD.string, 'has code (categorical/ordinal)')
add_dataprop('hasDecode', 'Observation', XSD.string, 'has decode (human label)')

# Visit metadata
add_dataprop('visitDate', 'Visit', XSD.string, 'visit date (raw)')
add_dataprop('visitYear', 'Visit', XSD.integer, 'visit year index')
add_dataprop('ageAtVisit', 'Visit', XSD.decimal, 'age at visit')


## 7) Embed mapping as annotation triples

We store mapping rows as resources under `ppmi:mapping/<Variable>`.

This keeps provenance and avoids forcing premature OWL modelling decisions.


In [None]:
# Annotation properties
mapsVariable = PPMI['mapsVariable']
bridge.add((mapsVariable, RDF.type, OWL.AnnotationProperty))
bridge.add((mapsVariable, RDFS.label, Literal('maps PPMI variable to concept IRI')))

ppmiVariable = PPMI['ppmiVariable']
bridge.add((ppmiVariable, RDF.type, OWL.AnnotationProperty))
bridge.add((ppmiVariable, RDFS.label, Literal('PPMI variable name')))

mappingBucket = PPMI['mappingBucket']
bridge.add((mappingBucket, RDF.type, OWL.AnnotationProperty))
bridge.add((mappingBucket, RDFS.label, Literal('mapping bucket')))

mappingConfidence = PPMI['mappingConfidence']
bridge.add((mappingConfidence, RDF.type, OWL.AnnotationProperty))
bridge.add((mappingConfidence, RDFS.label, Literal('mapping confidence')))

mappingHow = PPMI['mappingHow']
bridge.add((mappingHow, RDF.type, OWL.AnnotationProperty))
bridge.add((mappingHow, RDFS.label, Literal('mapping notes / how')))

mappingCode = PPMI['mappingCode']
bridge.add((mappingCode, RDF.type, OWL.AnnotationProperty))
bridge.add((mappingCode, RDFS.label, Literal('PPMI code value (if categorical)')))

mappingDecode = PPMI['mappingDecode']
bridge.add((mappingDecode, RDF.type, OWL.AnnotationProperty))
bridge.add((mappingDecode, RDFS.label, Literal('PPMI decode label (if categorical)')))

def _clean(v):
    if v is None:
        return ''
    s = str(v).strip()
    return '' if s.lower() == 'nan' else s

for _, r in mapping.iterrows():
    var = _clean(r.get('Variable'))
    if not var:
        continue

    target = _clean(r.get('TargetIRI'))
    bucket = _clean(r.get('MappingBucket'))
    conf = _clean(r.get('Confidence'))
    how = _clean(r.get('How'))
    code = _clean(r.get('Code'))
    decode = _clean(r.get('Decode'))

    node = URIRef(f"{PPMI}mapping/{var}")
    bridge.add((node, ppmiVariable, Literal(var)))
    if bucket:
        bridge.add((node, mappingBucket, Literal(bucket)))
    if conf:
        bridge.add((node, mappingConfidence, Literal(conf)))
    if how:
        bridge.add((node, mappingHow, Literal(how)))
    if code:
        bridge.add((node, mappingCode, Literal(code)))
    if decode:
        bridge.add((node, mappingDecode, Literal(decode)))
    if target:
        bridge.add((node, mapsVariable, URIRef(target)))

print('Bridge triples:', len(bridge))


## 8) Serialise bridge ontology


In [None]:
bridge.serialize(destination=str(OUT_PATH), format='turtle')
print('Wrote:', OUT_PATH)
