In [1]:
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS, DCTERMS, VOID
import json
import re

# Custom namespaces
MCRO = Namespace("http://purl.obolibrary.org/obo/mcro.owl#")
DUL = Namespace("http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#")

def load_triples(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

def create_ttl(triples):
    g = Graph(identifier=URIRef("http://purl.obolibrary.org/obo/mcro.owl"))
    
    # Bind namespaces
    g.bind("mcro", MCRO)
    g.bind("rdf", RDF)
    g.bind("rdfs", RDFS)
    g.bind("dul", DUL)
    g.bind("dct", DCTERMS)
    g.bind("void", VOID)

    # First pass: collect classes
    classes = set()
    for triple in triples:
        if triple['p'] == 'rdf:type':
            classes.add(triple['o'])
        elif triple['p'] == 'rdfs:subClassOf':
            classes.add(triple['o'])
            classes.add(triple['s'])

    # Process all triples
    for triple in triples:
        s = process_term(triple['s'], classes)
        p = process_predicate(triple['p'])
        o = process_object(triple['o'], classes)
        
        # Force dul:hasParameterDataValue to literals
        if p == DUL.hasParameterDataValue:
            o = Literal(str(o)) if not isinstance(o, Literal) else o

        g.add((s, p, o))
    
    return g

def sanitize_uri(term):
    return re.sub(r'[^a-zA-Z0-9-]', '', term).replace(' ', '_')

def process_term(term, classes):
    if term.startswith('http'):
        return URIRef(term)
    elif term.startswith('mcro:'):
        return MCRO[term.split('mcro:')[-1]]
    elif term.startswith('dul:'):
        return DUL[term.split('dul:')[-1]]
    elif term in classes:
        return MCRO[sanitize_uri(term)]
    else:
        return MCRO[sanitize_uri(term)]

def process_predicate(predicate_str):
    if predicate_str.startswith('http'):
        return URIRef(predicate_str)
    elif predicate_str.startswith('rdf:type'):
        return RDF.type
    elif predicate_str.startswith('rdfs:subClassOf'):
        return RDFS.subClassOf
    elif predicate_str.startswith('dul:'):
        return DUL[predicate_str.split('dul:')[-1]]
    elif predicate_str.startswith('mcro:'):
        return MCRO[predicate_str.split('mcro:')[-1]]
    else:
        # Fallback for known mappings
        return {
            "hasLibraries": MCRO.hasLibraries,
            "hasDatasets": VOID.Dataset,
            "hasModelSizes": DCTERMS.extent,
            "hasLicense": DCTERMS.license,
            "hasAuthors": DCTERMS.creator,
            "hasInputFormat": DUL.hasParameterDataValue,
            "hasTrainingData": DUL.hasParameterDataValue,
            "hasArchitecture": DUL.hasComponentPart,
            "hasPreprocessing": DUL.hasParameterDataValue,
            "hasHardware": DCTERMS.requires
        }.get(predicate_str, MCRO[sanitize_uri(predicate_str)])

def process_object(obj_str, classes):
    if obj_str.startswith('http'):
        return URIRef(obj_str)
    elif obj_str.startswith('{'):
        try:
            # Handle JSON strings with single quotes
            data = json.loads(obj_str.replace("'", '"'))
            return Literal(json.dumps(data), datatype=RDF.JSON)
        except json.JSONDecodeError:
            return Literal(obj_str)
    elif obj_str.startswith('mcro:'):
        return MCRO[obj_str.split('mcro:')[-1]]
    elif obj_str in classes:
        return MCRO[sanitize_uri(obj_str)]
    else:
        return Literal(obj_str)

if __name__ == "__main__":
    triples = load_triples("ontology_aligned_triples.json")
    graph = create_ttl(triples)
    graph.serialize(destination="model_card.ttl", format="turtle")