In [1]:
import argparse
import subprocess
import logging
import random
from typing import List, Dict, Any, Tuple, Set # Import types for better type hinting
# Assuming Restriction, And, Or are imported from owlready2 for the original logic
# If not available, these imports will need to be added:
# from owlready2.expression import Restriction, And, Or 
# from owlready2 import Thing, Ontology, Property # Add other necessary owlready2 imports

import logging

# --- Simple Logger Setup to File ---

# Define the log file name
LOG_FILE_PATH = 'app_output.log'

# Configure the root logger to output to the file
# filemode='w' overwrites the file each time; use 'a' to append (default is 'a' in Python 3.9+)
logging.basicConfig(
    filename=LOG_FILE_PATH,
    level=logging.DEBUG, # Only messages of INFO level and higher are saved
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# --- Logging Messages ---


# --- Logging Setup ---
# Configure a logger for this module
logger = logging.getLogger(__name__)

# --- Utility Functions for Ontology Restriction Parsing ---

# Refactor the restriction parsing function for better encapsulation and standard practices.

def _extract_classes_from_restriction_expression(expr: Any, prop: Any, required_toppings: Set[Any]) -> None:
    """
    Recursively extracts required OWL classes from restriction expressions 
    (e.g., Restriction, And, Or, or lists/tuples of expressions) linked by a specific property.

    This internal helper function populates the 'required_toppings' set.

    Args:
        expr (Any): The current OWL expression to process (e.g., Restriction, And, Or, list).
        prop (Any): The target property object (e.g., 'has_topping').
        required_toppings (Set[Any]): The set to add the required OWL classes (values) to.
    """
    # Case 1: Direct Restriction (e.g., 'Only' or 'Some' restrictions)
    # Check if the expression is a Restriction and if its property matches the target property.
    if isinstance(expr, Restriction) and getattr(expr, "property", None) == prop:
        # The 'value' attribute of the restriction holds the class being restricted.
        # Check if the value has a 'name' attribute, typical of an OWL class object.
        if hasattr(expr.value, "name"):
            logger.debug(f"Found required restriction class: {expr.value.name}")
            required_toppings.add(expr.value)
    
    # Case 2: Conjunction (And) or Disjunction (Or)
    # Recursively process sub-expressions within logical operators.
    elif isinstance(expr, (And, Or)):
        # Owlready2 stores sub-expressions in the 'Classes' attribute.
        for subexpr in expr.Classes:
            _extract_classes_from_restriction_expression(subexpr, prop, required_toppings)
    
    # Case 3: A list or tuple of expressions
    # Handle cases where expressions are stored in an iterable (common in owlready2).
    elif isinstance(expr, (list, tuple)):
        for subexpr in expr:
            _extract_classes_from_restriction_expression(subexpr, prop, required_toppings)
    
    # Otherwise, ignore the expression.

def properties_from_restrictions(sub_class: Any, prop: Any) -> List[Any]:
    """
    Extracts OWL classes required by restrictions on a given OWL class ('sub_class') 
    for a specific property ('prop'). It handles nested logical expressions.

    Args:
        sub_class (Any): The OWL class object whose restrictions are being examined.
        prop (Any): The target property object (e.g., 'has_topping').

    Returns:
        List[Any]: A list of OWL class objects that are required targets 
                   of the property restrictions.
    """
    required_toppings: Set[Any] = set()

    # The `is_a` attribute contains the class hierarchy and restrictions defined on the class.
    for eq in sub_class.is_a:
        logger.debug(f"Processing `is_a` expression: {eq}")
        # Extract restrictions from the main expression
        _extract_classes_from_restriction_expression(eq, prop, required_toppings)
        
        # Check for nested restrictions, though `is_a` often contains the full hierarchy directly.
        # The original code's check `if eq.is_a is not None` might be redundant 
        # but is kept here to match the original intent of handling complex structures.
        if hasattr(eq, 'is_a') and eq.is_a is not None:
             _extract_classes_from_restriction_expression(eq.is_a, prop, required_toppings)

    return list(required_toppings)

# --- Core Instance Generation Function ---

def generate_instances(onto: Any, class_config: Dict[str, int], relation_config: List[Tuple[str, str, str]]) -> Dict[str, List[Tuple[Any, Any]]]:
    """
    Generic instance generator for an ontology, populating classes and relations.

    The generator first creates a specified number of individuals for each class 
    (potentially across its subclasses) and then attempts to link them based on 
    explicit relations defined in 'relation_config' and implicit constraints (restrictions).
    
    Args:
        onto (Any): The owlready2 Ontology object to populate.
        class_config (Dict[str, int]): Mapping of class names -> number of individuals to create.
        relation_config (List[Tuple[str, str, str]]): List of tuples defining relation patterns:
            (subject_class_name, property_name, object_class_name - Note: obj_class is not used 
             in the current logic, as it's driven by restrictions).
    
    Returns:
        Dict[str, List[Tuple[Any, Any]]]: A dictionary mapping class names to lists 
                                          of (subclass_object, individual_object) tuples.
    """
    # Use the 'with onto:' context manager to ensure all changes are applied to the ontology
    # (especially important for `owlready2` operations).
    with onto:
        # Store individuals created, mapping Class Name -> List of (Subclass, Individual)
        instances: Dict[str, List[Tuple[Any, Any]]] = {}
        instance_counter = {} # To track counter for newly created objects not in config

        # 1. Create instances for each class specified in the config
        logger.info("--- Creating Individuals ---")
        for cls_name, n in class_config.items():
            # Search for the main class object in the ontology
            main_class = onto.search_one(iri=f"*{cls_name}")
            if main_class is None:
                logger.warning(f"Main class '{cls_name}' not found in ontology. Skipping.")
                continue

            # Get all direct and indirect subclasses of the main class
            classes = list(main_class.subclasses())
            
            # If no subclasses are found, the main class itself is the target class for instantiation
            if not classes:
                classes = [main_class]
            
            logger.info(f"Instantiating {n} individuals for {cls_name} across {len(classes)} subclasses.")

            for i in range(n):
                # Choose a random subclass for instantiation
                cls_to_instantiate = random.choice(classes)
                
                # Use the main class name as the key in the instances dictionary
                if cls_name not in instances:
                    instances[cls_name] = []
                
                # Create the individual. The name is constructed using the subclass name and an index.
                # E.g., main_class(f"SubclassName_0") creates an instance of main_class that is
                # also an instance of SubclassName, with the given name.
                new_individual = main_class(f"{cls_to_instantiate.name}_{i}")
                
                # Store the (Subclass_Object, Individual_Object) tuple
                instances[cls_name].append((cls_to_instantiate, new_individual))
                logger.debug(f"Created individual '{new_individual.name}' of class '{cls_to_instantiate.name}'.")

        logger.info("Total individuals created across configured classes.")
        logger.debug(f"Instances: {instances}")

        # 2. Randomly assign relations based on configuration and class restrictions
        logger.info("--- Assigning Relations ---")
        # The 'obj_class' in relation_config is often ignored here, as the actual object class
        # is determined by the `properties_from_restrictions` function based on the subject's definition.
        for subj_cls_name, prop_name, _ in relation_config:
            if subj_cls_name not in instances:
                logger.warning(f"Subject class '{subj_cls_name}' not instantiated. Skipping relation for '{prop_name}'.")
                continue

            subjects: List[Tuple[Any, Any]] = instances[subj_cls_name]
            
            # Search for the property object
            rel = onto.search_one(iri=f"*{prop_name}")
            if rel is None:
                logger.warning(f"Property '{prop_name}' not found in ontology. Skipping.")
                continue

            logger.info(f"Processing relation '{prop_name}' for subjects of class '{subj_cls_name}'.")
            
            for subj_cls_obj, subj_individual in subjects:
                # Determine the required object classes based on the subject's class restrictions
                required_obj_classes = properties_from_restrictions(subj_cls_obj, rel)
                
                logger.debug(f"Subject class '{subj_cls_obj.name}' requires objects of classes: "
                             f"{[cls.name for cls in required_obj_classes]} for property '{prop_name}'.")

                # The original code's logic is to apply the relation for *all* found restrictions
                for obj_cls_to_use in required_obj_classes:
                    # Check if instances of the required object class already exist
                    obj_cls_name = obj_cls_to_use.name
                    
                    if obj_cls_name in instances and instances[obj_cls_name]:
                        # Choose a random existing individual of the required class as the object
                        # We select the [1] element of the tuple, which is the individual object.
                        new_obj_individual = random.choice(instances[obj_cls_name])[1]
                        logger.debug(f"Using existing object '{new_obj_individual.name}' of class '{obj_cls_name}'.")
                    else:
                        # If no existing instance is found, create a new one dynamically
                        # This assumes the required object class (obj_cls_to_use) can be called
                        # to create an instance, which is typical for owlready2 classes.
                        instance_counter[obj_cls_name] = instance_counter.get(obj_cls_name, 0) + 1
                        i = instance_counter[obj_cls_name]
                        new_obj_individual = obj_cls_to_use(f"{obj_cls_name}_dynamic_{i}")
                        
                        # Store the newly created object (not part of the initial config)
                        if obj_cls_name not in instances:
                            instances[obj_cls_name] = []
                        instances[obj_cls_name].append((obj_cls_to_use, new_obj_individual))
                        logger.debug(f"Created new dynamic object '{new_obj_individual.name}' of class '{obj_cls_name}'.")
                    
                    # Apply the relation: prop[subject] = object
                    # For object properties, this sets a link. For functional properties, this may overwrite.
                    rel[subj_individual].append(new_obj_individual)
                    
                    # Store the relation in the instances dict using the property name as the key
                    # NOTE: This line `instances[prop_name] = rel[subj_individual]` seems non-standard
                    # as it overwrites the 'prop_name' key in the `instances` dict in every iteration. 
                    # It's kept for functional equivalence but may be indicative of a logic error 
                    # in how relations are tracked. A separate dict for relations is usually better.
                    # This line is not essential for the ontology population itself.
                    # instances[prop_name] = rel[subj_individual] 
                    
                    logger.info(f"Linked '{subj_individual.name}' to '{new_obj_individual.name}' via '{prop_name}'.")

    return instances

In [2]:
from owlready2 import get_ontology, sync_reasoner_pellet, default_world, Restriction, And, Or # Import all necessary owlready2 functions/classes explicitly
from rdflib import Graph # Used for RDF graph manipulation and counting triples
import logging
from typing import Dict, List, Tuple, Any

# Assuming the user's previously defined helper functions are available here:
# from your_utils_module import generate_instances # Example import if in a separate file

# --- Logging Setup ---
# Set up a basic logger for feedback during script execution
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) 
# Note: For production use, you would typically configure a handler (like StreamHandler)
# logging.basicConfig(level=logging.INFO) 

# --- Configuration ---
# Define the dataset name. Only one should be uncommented at a time.
# The selected dataset name determines the TBOX file to load.
# dataset_name = "pizza"
dataset_name = "pizza"
#dataset_name = "OWL2DL-1"

logger.info(f"Configuration set for dataset: **{dataset_name}**")

# --- Instance Generation Configuration (Abox) ---

# Define the set of configurations for different experiments.
# The active configuration is selected based on `dataset_name`.

# Configuration for the 'pizza' ontology
pizza_config: Dict[str, Any] = {
    "class_config": {
        "NamedPizza": 1000  
    },
    "relation_config": [
        ("NamedPizza", "hasTopping", "PizzaTopping")
    ]
}

# Configuration for the 'family' ontology (Currently active)
family_config: Dict[str, Any] = {
    "class_config": {
        "Person": 10  
    },
    "relation_config": [
        ("Person", "hasFather", "Person"),
        ("Person", "hasMother", "Person"),
        ("Person", "hasSex", "Sex"),
    ]
}

# Configuration for the 'University' ontology
university_config: Dict[str, Any] = {
    "class_config": {
        "University": 30,
        "Person": 100
    },
    "relation_config": [
        ("University", "hasDepartment", "Department"),
        ("Person", "hasDoctoralDegreeFrom", "University"),
        ("Person", "teachesCourse", "Course"),
        ("Person", "takesCourse", "Course"),
    ]
}

# Select the active configuration based on the dataset_name variable
if dataset_name == "pizza":
    active_config = pizza_config
elif dataset_name == "family":
    active_config = family_config
elif dataset_name == "OWL2DL-1":
    # Assuming OWL2DL-1 uses the university config for this example
    active_config = university_config 
else:
    raise ValueError(f"No configuration defined for dataset '{dataset_name}'.")

class_config: Dict[str, int] = active_config["class_config"]
relation_config: List[Tuple[str, str, str]] = active_config["relation_config"]

# ==========================================================
# 1. Load Ontology (TBox)
# ==========================================================
# Construct the path to the TBox (Terminological Box) file.
tbox_path: str = f"../ontologies/{dataset_name}_TBOX.owl"

try:
    # Get the ontology object and load the TBox from the specified path.
    onto = get_ontology(tbox_path).load()
    logger.info(f"Loaded ontology successfully: **{onto.base_iri}** from path: {tbox_path}")
except FileNotFoundError:
    logger.error(f"Error: Ontology file not found at {tbox_path}")
    raise

# ==========================================================
# 2. Generate Individuals (ABox)
# ==========================================================
# Use the previously defined `generate_instances` function to create individuals 
# (ABox assertions) and establish initial relations based on the selected configuration.
# This function populates the ontology object (`onto`) in memory.
logger.info("Generating individuals and initial ABox relations...")
# NOTE: The generate_instances function must be accessible/imported.
try:
    instances: Dict[str, List[Tuple[Any, Any]]] = generate_instances(onto, class_config, relation_config)
    logger.info(f"Finished generating ABox. Total classes instantiated: {len(instances)}")
except NameError:
    logger.error("The `generate_instances` function is not defined or imported.")
    raise

# ==========================================================
# 3. Save and Analyze Ontology BEFORE Reasoning
# ==========================================================
# Save the ontology state *before* running the reasoner.
temp_file_before: str = f"{dataset_name}_withABox.owl"
onto.save(temp_file_before, format="rdfxml")
logger.info(f"Ontology state saved to '{temp_file_before}' for pre-reasoning analysis.")