# Ontology Builder

A helpful tool-box to help extend the MIAPPE and SSN/SOSA ontologies with (1) new terms, (2) new relations between terms, and (3) new axioms to define terms.



I think the most complex part of this ontology is the integration of DataFiles and their content (without reading the files at this point).

# Change 
    <!-- http://purl.org/ppeo/PPEO.owl#hasDescription -->

    <owl:DatatypeProperty rdf:about="http://purl.org/ppeo/PPEO.owl#hasDescription">
        <rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string"/>
        <rdfs:label xml:lang="en">has description</rdfs:label>
    </owl:DatatypeProperty>



In [None]:
#!/usr/bin/env python3
"""
Convert OWL/RDF XML to JSON-LD template with placeholders.
This script extracts class definitions and creates JSON-LD templates
with appropriate structure for properties.
"""

import xml.etree.ElementTree as ET
import json
import re
from collections import defaultdict
from typing import Dict, List, Any, Optional
from dataclasses import dataclass
import argparse

# Namespaces used in OWL/RDF
NAMESPACES = {
    'owl': 'http://www.w3.org/2002/07/owl#',
    'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
    'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
    'xml': 'http://www.w3.org/XML/1998/namespace'
}

@dataclass
class PropertyRestriction:
    """Represents a property restriction on a class."""
    property_uri: str
    restriction_type: str  # 'someValuesFrom', 'qualifiedCardinality', 'maxQualifiedCardinality'
    target_class: Optional[str] = None
    target_datatype: Optional[str] = None
    cardinality: Optional[int] = None
    min_cardinality: Optional[int] = None
    max_cardinality: Optional[int] = None

class OWLToJSONLDConverter:
    def __init__(self, base_context: Optional[Dict[str, str]] = None):
        """Initialize the converter with optional base context."""
        self.base_context = base_context or {
            "ppeo": "http://purl.org/ppeo/PPEO.owl#",
            "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
            "xsd": "http://www.w3.org/2001/XMLSchema#",
            "owl": "http://www.w3.org/2002/07/owl#"
        }
        
    def parse_owl_file(self, file_path: str) -> Dict[str, List[PropertyRestriction]]:
        """
        Parse OWL file and extract class definitions with their property restrictions.
        
        Returns a dictionary mapping class URIs to lists of their property restrictions.
        """
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Register namespaces
        for prefix, uri in NAMESPACES.items():
            ET.register_namespace(prefix, uri)
        
        classes = defaultdict(list)
        
        # Find all owl:Class elements
        for class_elem in root.findall('.//owl:Class', NAMESPACES):
            class_uri = class_elem.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about')
            if not class_uri:
                continue
            
            # Find all rdfs:subClassOf restrictions
            for sub_class_elem in class_elem.findall('rdfs:subClassOf', NAMESPACES):
                restriction = self._parse_restriction(sub_class_elem)
                if restriction:
                    classes[class_uri].append(restriction)
            
            # Also check for annotated axioms
            for axiom in root.findall(f'.//owl:Axiom[owl:annotatedSource[@rdf:resource="{class_uri}"]]', NAMESPACES):
                target_elem = axiom.find('owl:annotatedTarget', NAMESPACES)
                if target_elem is not None:
                    restriction = self._parse_restriction(target_elem)
                    if restriction:
                        classes[class_uri].append(restriction)
        
        return dict(classes)
    
    def _parse_restriction(self, elem: ET.Element) -> Optional[PropertyRestriction]:
        """Parse a restriction element and extract property information."""
        # Check if this is a restriction
        restriction_elem = elem.find('owl:Restriction', NAMESPACES)
        if restriction_elem is None:
            restriction_elem = elem
        
        # Get the property
        on_property_elem = restriction_elem.find('owl:onProperty', NAMESPACES)
        if on_property_elem is None:
            return None
        
        property_uri = on_property_elem.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource')
        if not property_uri:
            return None
        
        # Check for different restriction types
        restriction = PropertyRestriction(property_uri=property_uri, restriction_type='')
        
        # Check for someValuesFrom
        some_values_elem = restriction_elem.find('owl:someValuesFrom', NAMESPACES)
        if some_values_elem is not None:
            restriction.restriction_type = 'someValuesFrom'
            target = some_values_elem.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource')
            if target:
                restriction.target_class = target
        
        # Check for qualifiedCardinality
        card_elem = restriction_elem.find('owl:qualifiedCardinality', NAMESPACES)
        if card_elem is not None:
            restriction.restriction_type = 'qualifiedCardinality'
            try:
                restriction.cardinality = int(card_elem.text)
            except (ValueError, TypeError):
                restriction.cardinality = 1
            
            # Check for onClass or onDataRange
            on_class_elem = restriction_elem.find('owl:onClass', NAMESPACES)
            on_datarange_elem = restriction_elem.find('owl:onDataRange', NAMESPACES)
            
            if on_class_elem is not None:
                restriction.target_class = on_class_elem.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource')
            elif on_datarange_elem is not None:
                datatype = on_datarange_elem.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource')
                if datatype:
                    restriction.target_datatype = datatype
                else:
                    # Check for unionOf
                    union_elem = on_datarange_elem.find('.//owl:unionOf', NAMESPACES)
                    if union_elem is not None:
                        # For simplicity, take first datatype in union
                        first_type = union_elem.find('.//rdf:Description', NAMESPACES)
                        if first_type is not None:
                            restriction.target_datatype = first_type.get(
                                '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about'
                            )
        
        # Check for maxQualifiedCardinality
        max_card_elem = restriction_elem.find('owl:maxQualifiedCardinality', NAMESPACES)
        if max_card_elem is not None:
            restriction.restriction_type = 'maxQualifiedCardinality'
            try:
                restriction.max_cardinality = int(max_card_elem.text)
            except (ValueError, TypeError):
                restriction.max_cardinality = 1
            
            # Check for onClass or onDataRange
            on_class_elem = restriction_elem.find('owl:onClass', NAMESPACES)
            on_datarange_elem = restriction_elem.find('owl:onDataRange', NAMESPACES)
            
            if on_class_elem is not None:
                restriction.target_class = on_class_elem.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource')
            elif on_datarange_elem is not None:
                datatype = on_datarange_elem.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource')
                if datatype:
                    restriction.target_datatype = datatype
        
        return restriction
    
    def create_jsonld_template(self, class_uri: str, restrictions: List[PropertyRestriction]) -> Dict[str, Any]:
        """
        Create a JSON-LD template for a class based on its restrictions.
        """
        # Extract class name from URI
        class_name = self._extract_local_name(class_uri)
        
        # Create the base template
        template = {
            "@context": self.base_context.copy(),
            "@type": self._shorten_uri(class_uri),
        }
        
        # Group properties by type
        for restriction in restrictions:
            prop_name = self._shorten_uri(restriction.property_uri)
            
            if restriction.restriction_type == 'someValuesFrom' and restriction.target_class:
                # Object property pointing to another class
                target_type = self._shorten_uri(restriction.target_class)
                template[prop_name] = {
                    "@type": target_type,
                    "@graph": []  # Empty array for nested objects
                }
            
            elif restriction.restriction_type in ['qualifiedCardinality', 'maxQualifiedCardinality']:
                if restriction.target_datatype:
                    # Data property with datatype
                    datatype = self._shorten_datatype_uri(restriction.target_datatype)
                    template[prop_name] = {
                        "@type": datatype,
                        "@value": ""  # Empty string placeholder
                    }
                elif restriction.target_class:
                    # Object property with cardinality
                    target_type = self._shorten_uri(restriction.target_class)
                    
                    # Determine if it should be single or array based on cardinality
                    if restriction.restriction_type == 'qualifiedCardinality' and restriction.cardinality == 1:
                        # Single object
                        template[prop_name] = {
                            "@type": target_type,
                            "@graph": []
                        }
                    else:
                        # Array of objects (for max cardinality > 1 or unspecified)
                        template[prop_name] = [{
                            "@type": target_type,
                            "@graph": []
                        }]
        
        return template
    
    def _shorten_uri(self, uri: str) -> str:
        """Shorten a full URI to prefixed form using context."""
        for prefix, base_uri in self.base_context.items():
            if uri.startswith(base_uri):
                return f"{prefix}:{uri[len(base_uri):]}"
        
        # If no prefix matches, return the full URI
        return uri
    
    def _shorten_datatype_uri(self, uri: str) -> str:
        """Shorten datatype URIs, especially XSD types."""
        if uri.startswith("http://www.w3.org/2001/XMLSchema#"):
            return f"xsd:{uri[42:]}"
        return self._shorten_uri(uri)
    
    def _extract_local_name(self, uri: str) -> str:
        """Extract the local name from a URI."""
        if '#' in uri:
            return uri.split('#')[-1]
        elif '/' in uri:
            return uri.split('/')[-1]
        return uri
    
    def convert_file(self, input_file: str, output_file: str, class_uri: Optional[str] = None):
        """
        Convert an OWL file to JSON-LD template.
        
        Args:
            input_file: Path to input OWL file
            output_file: Path to output JSON file
            class_uri: Specific class URI to convert (if None, converts all classes)
        """
        # Parse the OWL file
        classes = self.parse_owl_file(input_file)
        
        if not classes:
            print(f"No classes found in {input_file}")
            return
        
        if class_uri:
            # Convert specific class
            if class_uri in classes:
                template = self.create_jsonld_template(class_uri, classes[class_uri])
                result = template
            else:
                print(f"Class {class_uri} not found in the ontology")
                return
        else:
            # Convert all classes
            result = {}
            for class_uri, restrictions in classes.items():
                class_name = self._extract_local_name(class_uri)
                template = self.create_jsonld_template(class_uri, restrictions)
                result[class_name] = template
        
        # Write to output file
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(result, f, indent=2, ensure_ascii=False)
        
        print(f"Successfully converted to {output_file}")


def main():
    """Main function for command line usage."""
    parser = argparse.ArgumentParser(description='Convert OWL/RDF XML to JSON-LD template')
    parser.add_argument('input', help='Input OWL/RDF XML file')
    parser.add_argument('output', help='Output JSON file')
    parser.add_argument('--class-uri', help='Specific class URI to convert (optional)')
    parser.add_argument('--context', help='Additional context JSON file (optional)')
    
    args = parser.parse_args()
    
    # Load custom context if provided
    base_context = None
    if args.context:
        with open(args.context, 'r', encoding='utf-8') as f:
            base_context = json.load(f)
    
    # Create converter and convert
    converter = OWLToJSONLDConverter(base_context)
    converter.convert_file(args.input, args.output, args.class_uri)


if __name__ == "__main__":
    # Example usage
    print("OWL to JSON-LD Converter")
    print("========================")
    
    # For testing with the provided example
    test_input = "example.owl"  # Save your OWL content to this file
    test_output = "study_template.json"
    
    converter = OWLToJSONLDConverter()
    
    # Convert the study class specifically
    converter.convert_file(
        test_input, 
        test_output,
        class_uri="http://purl.org/ppeo/PPEO.owl#study"
    )