In [1]:
from ontology_loader import OntologyLoader

source_address = 'data/mouse-human/source.xml'
target_address = 'data/mouse-human/target.xml'

source_ontology = OntologyLoader(source_address)
target_ontology = OntologyLoader(target_address)

source_ontology.load()
target_ontology.load()

Ontology loaded successfully from data/mouse-human/source.xml
Ontology loaded successfully from data/mouse-human/target.xml


get_ontology("http://human.owl#")

In [2]:
from logmap.ontology import Ontology
from logmap.inverted_index import InvertedIndexBuilder
from logmap.anchor_mapper import AnchorMapper

source_ont = Ontology(source_ontology.ontology)
target_ont = Ontology(target_ontology.ontology)

# Build inverted indexes
index_builder = InvertedIndexBuilder(external_lexicon=None)  # Provide an external lexicon if available
source_index = index_builder.build_index(source_ont)
target_index = index_builder.build_index(target_ont)

# Compute anchor mappings
anchor_mapper = AnchorMapper(source_ont, target_ont, source_index, target_index, isub_threshold=0.8)
anchors = anchor_mapper.compute_anchors()

print("Anchor mappings with confidence scores:")
for mapping, confidence in anchors.items():
    print(f"{mapping}: {confidence}")

Anchor mappings with confidence scores:
('2578', '1447'): 0.0
('7', '122'): 0.0
('268', '690'): 0.0
('917', '3038'): 0.0
('772', '2855'): 0.0
('2120', '2011'): 0.16666666666666666
('425', '62'): 0.2
('295', '548'): 0.0
('1739', '1601'): 0.25
('1671', '1720'): 0.0
('249', '1395'): 0.0
('189', '633'): 0.0
('410', '1745'): 0.0
('525', '44'): 0.0
('2364', '3179'): 0.5
('2569', '1545'): 0.0
('1921', '2861'): 0.1111111111111111
('130', '2963'): 0.0
('1420', '3123'): 0.0
('2475', '3300'): 0.0
('204', '207'): 0.16666666666666666
('45', '1113'): 0.0
('954', '313'): 0.0
('2459', '2558'): 0.0
('301', '1171'): 0.1
('2452', '3295'): 0.0
('1850', '1496'): 0.0
('2445', '3224'): 0.5
('127', '1583'): 0.0
('139', '272'): 0.058823529411764705
('813', '2900'): 0.0
('1358', '2353'): 0.0
('1986', '635'): 0.16666666666666666
('1320', '3165'): 0.3333333333333333
('1952', '2904'): 0.14285714285714285
('2363', '3178'): 0.5
('2404', '3195'): 0.5
('2260', '3255'): 1.0
('109', '702'): 0.0
('2409', '3289'): 0.5
('2

Repair Phase:

Horn Clause Generation: Convert ontologies and mappings into Horn clauses. Only active mappings (current iteration) are candidates for repair.

Unsatisfiability Detection: Use the Dowling-Gallier algorithm to find classes that are unsatisfiable due to logical conflicts.

Repair Calculation: Identify minimal subsets of active mappings causing conflicts. Remove mappings with the lowest confidence, prioritizing equivalence-to-subsumption weakening.

Discovery Phase:

Context Expansion: Expand each anchor's context by adding immediate neighbors (parents/children) from the ontology hierarchy.

Lexical Matching: Use ISUB to compute string similarity between classes in expanded contexts.

Structural Confidence: Validate mappings using the proportion of matching neighbors weighted by similarity scores.

Threshold Filtering: Retain mappings above confidence thresholds and add them as new active mappings.

In [98]:
from collections import defaultdict
import numpy as np
from jellyfish import jaro_winkler_similarity as isub  # Using Jaro-Winkler as ISUB substitute

def dowling_gallier(horn_clauses):
    """Linear-time Horn clause satisfiability checker (simplified)"""
    graph = defaultdict(list)
    literals = set()
    for clause in horn_clauses:
        if ' -> ' not in clause:
            continue
        body, head = clause.split(' -> ')
        if body == 'true':
            graph[head].append(None)  # Fact
        else:
            antecedents = body.split(' ∧ ')
            graph[head].append(antecedents)
            literals.update(antecedents)
        literals.add(head)
    
    marked = set()
    queue = [lit for lit in graph if None in graph[lit]]
    
    while queue:
        current = queue.pop(0)
        if current in marked:
            continue
        marked.add(current)
        for consequent in graph:
            for antecedents in graph[consequent]:
                if antecedents is None:
                    continue
                if all(a in marked for a in antecedents):
                    if consequent not in marked:
                        queue.append(consequent)
    
    return [lit for lit in literals if lit not in marked]

def precompute_interval_labels(ontology):
    """Precompute interval labels using depth-first traversal"""
    index = {}
    # Assume ontology has a root class; adjust as needed
    root = next(cls for cls in ontology.classes() if not ontology.get_parents_of(cls))
    
    # Descendants traversal (preorder)
    desc_counter = [1]
    def traverse_desc(cls):
        index[cls.iri] = {'desc_pre': desc_counter[0], 'desc_max': None}
        desc_counter[0] += 1
        for child in ontology.get_children_of(cls):
            traverse_desc(child)
        index[cls.iri]['desc_max'] = desc_counter[0] - 1
    traverse_desc(root)
    
    # Ancestors traversal (preorder)
    anc_counter = [1]
    def traverse_anc(cls):
        index[cls.iri]['anc_pre'] = anc_counter[0]
        anc_counter[0] += 1
        for parent in ontology.get_parents_of(cls):
            traverse_anc(parent)
        index[cls.iri]['anc_max'] = anc_counter[0] - 1
    traverse_anc(root)
    
    return index

class LogMapMapper:
    def __init__(self, ontology1, ontology2, raw_anchors):
        self.ontology1 = ontology1
        self.ontology2 = ontology2
        self.ontology1_list = list(ontology1.classes())
        self.ontology2_list = list(ontology2.classes())
        # Compute confidence for raw anchors using neighbor similarity
        self.active_mappings = {
            (c1, c2): self._structural_confidence(c1, c2)
            for (c1, c2) in raw_anchors
        }
        self.established_mappings = {}
        self.contexts = defaultdict(lambda: {'source': set(), 'target': set()})
        self.interval_index1 = precompute_interval_labels(ontology1)
        self.interval_index2 = precompute_interval_labels(ontology2)
        
        # Initialize contexts with anchor classes
        for (c1, c2) in self.active_mappings:
            self.contexts[(c1, c2)]['source'].add(c1)
            self.contexts[(c1, c2)]['target'].add(c2)

    def repair_mappings(self):
        """Implements repair process from Section 2.4"""
        horn_clauses = self._convert_to_horn()
        unsatisfiable = dowling_gallier(horn_clauses)
        
        # Sort by topological level (highest first)
        unsatisfiable_sorted = sorted(
            unsatisfiable,
            key=lambda cls: self._get_topological_level(cls),
            reverse=True
        )
        
        repair_plan = []
        for cls in unsatisfiable_sorted:
            involved = [m for m in self.active_mappings if m[0] == cls or m[1] == cls]
            if involved:
                # Find minimal repair: remove mapping with lowest confidence
                weakest = min(involved, key=lambda m: self.active_mappings[m])
                repair_plan.append(weakest)
        
        # Apply repairs
        for mapping in repair_plan:
            if mapping in self.active_mappings:
                del self.active_mappings[mapping]
        
        return repair_plan

    def discover_mappings(self, expansion_thresh=0.7, mapping_thresh=0.95):
        """Implements context expansion and discovery"""
        new_mappings = {}
        new_active = defaultdict(lambda: {'source': set(), 'target': set()})
        
        for anchor, ctx in self.contexts.items():
            # Expand to immediate neighbors only
            source_expanded = self._get_neighbors(ctx['source'], self.ontology1)
            target_expanded = self._get_neighbors(ctx['target'], self.ontology2)
            print(source_expanded)
            # Match classes in expanded contexts
            for src in source_expanded:
                for tgt in target_expanded:
                    sim = isub(self._get_label(src, self.ontology1), 
                              self._get_label(tgt, self.ontology2))
                    if sim >= expansion_thresh:
                        new_active[anchor]['source'].add(src)
                        new_active[anchor]['target'].add(tgt)
                        if sim >= mapping_thresh:
                            struct_conf = self._structural_confidence(src, tgt)
                            if struct_conf >= 0.6:
                                new_mappings[(src, tgt)] = sim * struct_conf
        
        # Update contexts and active mappings
        self._update_contexts(new_active)
        self.active_mappings.update(new_mappings)
        return new_mappings

    def _convert_to_horn(self):
        """Convert ontology axioms and mappings to Horn clauses"""
        clauses = []
        # Ontology axioms
        for ontology in [self.ontology1, self.ontology2]:
            for cls in ontology.classes():
                for parent in cls.is_a:
                    clauses.append(f"{parent.iri} -> {cls.iri}")
                for disjoint in cls.disjoint_with():
                    clauses.append(f"{cls.iri} ∧ {disjoint.iri} -> false")
        # Mappings (active + established)
        for mapping in {**self.active_mappings, **self.established_mappings}:
            c1, c2 = mapping
            clauses.append(f"{c1} -> {c2}")  # Treat as directional
        return clauses

    def _structural_confidence(self, c1, c2):
        """Compute structural confidence for an anchor (Section 2.3)"""
        # FIX: Use _get_neighbors and pass class IDs as a list
        src_neighbors = self._get_neighbors([c1], self.ontology1)
        tgt_neighbors = self._get_neighbors([c2], self.ontology2)
        
        total_sim, matches = 0.0, 0
        for s_n in src_neighbors:
            print("hereee")
            print("heree2")
            max_sim = max(
                (
                    isub(
                        self._get_label(s_n, self.ontology1),
                        self._get_label(t_n, self.ontology2)
                    )
                    for t_n in tgt_neighbors
                ),
                default=0.0
            )
            print(max_sim)
            if max_sim >= 0.7:
                total_sim += max_sim
                matches += 1
        
        if not src_neighbors:
            return 0.0
        return (total_sim / len(src_neighbors)) * (matches / len(src_neighbors))

    def _get_neighbors(self, class_ids, ontology):
        """Get immediate parents and children, safely handling missing IRIs"""
        neighbors = set()
        ontology_list = list(ontology.classes())
        
        for cls_id in class_ids:
            cls = ontology_list[int(cls_id)]
            # Get parents with IRI check
            try:
                for p in ontology.get_parents_of(cls):
                    if hasattr(p, 'iri') and p.iri:  # Ensure IRI exists
                        neighbors.add(p.iri)
            except AttributeError:
                pass  # Handle missing get_parents_of() method
                
            # Get children with IRI check
            try:
                for c in ontology.get_children_of(cls):
                    if hasattr(c, 'iri') and c.iri:  # Ensure IRI exists
                        neighbors.add(c.iri)
            except AttributeError:
                pass
        print(neighbors)
        return neighbors

    # Helper methods
    def _get_label(self, cls_id, ontology):
        ontology_list = list(ontology.classes())
        for o in ontology_list:
            # print(cls_id)
            if o.iri == cls_id:
                if hasattr(o, 'label') and o.label:
                    print(o.label)
                    return o.label
                else: print("no label")
        print("not found")
        return ""

In [99]:
mapper = LogMapMapper(source_ontology.ontology, target_ontology.ontology, anchors)

MAX_ITERATIONS = 5
for iteration in range(MAX_ITERATIONS):
    print(f"\n=== Iteration {iteration+1} ===")
    
    # Repair phase
    repaired = mapper.repair_mappings()
    print(f"Repaired {len(repaired)} mappings")
    
    # Discovery phase
    new_mappings = mapper.discover_mappings()
    print(f"Discovered {len(new_mappings)} new mappings")
    
    # Merge active mappings into established
    mapper.established_mappings.update(mapper.active_mappings)
    mapper.active_mappings = {}
    
    # Check termination condition
    if not new_mappings:
        break

print("\nFinal mappings:", len(mapper.established_mappings))

{'http://mouse.owl#MA_0002505'}
{'http://human.owl#NCI_C13236'}
hereee
heree2
['mammary gland fluid/secretion']
['Body_Fluid_or_Substance']


TypeError: argument 'a': 'IndividualValueList' object cannot be converted to 'PyString'