In this note book we procede to axiomatisation.

In [None]:
import os
import spacy
import pandas as pd
from typing import Set, List
from olaf import Pipeline
from olaf.commons.logging_config import logger
from olaf.data_container import CandidateTerm, Relation, Concept
from olaf.data_container.knowledge_representation_schema import KnowledgeRepresentation
from olaf.pipeline.pipeline_component.term_extraction import (
    POSTermExtraction,
    TFIDFTermExtraction,
    ManualCandidateTermExtraction
    )

from olaf.pipeline.pipeline_component.concept_relation_extraction import (
    CTsToConceptExtraction, CTsToRelationExtraction,
    SynonymRelationExtraction, SynonymConceptExtraction,
    AgglomerativeClusteringRelationExtraction, AgglomerativeClusteringConceptExtraction
)
from olaf.pipeline.pipeline_component.axiom_extraction import OWLAxiomExtraction

from olaf.commons.kr_to_rdf_tools import (
    kr_concepts_to_owl_classes, kr_relations_to_owl_obj_props, 
    kr_metarelations_to_owl, kr_relations_to_anonymous_some_parent, 
    kr_relations_to_anonymous_some_equivalent,kr_relations_to_domain_range_obj_props,
    concept_lrs_to_owl_individuals, all_individuals_different
)

from olaf.commons.spacy_processing_tools import is_not_punct, is_not_stopword, select_on_pos

from olaf.pipeline.pipeline_component.candidate_term_enrichment import SemanticBasedEnrichment

from olaf.repository.corpus_loader.text_corpus_loader import TextCorpusLoader

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from matplotlib_venn import venn2, venn3

In [3]:
import torch, gc
def free_gpu():
    """
    frees up the GPU cache for other calculations. 
    """
    gc.collect()
    torch.cuda.empty_cache()

free_gpu()

In [4]:
nlp = spacy.load("en_core_web_lg")

# Select Corpus

In [5]:
corpus_path = "../data/GC10-DET_doc.txt"
corpus_loader = TextCorpusLoader(corpus_path)

# Concept and Relation extraction

## Concept Extraction

In [6]:
# a list of candidate terms by a LLM to create concepts
concepts = [
    "defect type",
    "steel strip surface",
    "punching",
    "mechanical failure",
    "welding line",
    "coil",
    "weld line",
    "crescent gap",
    "cutting",
    "water spot",
    "drying",
    "oil spot",
    "mechanical lubricant",
    "silk spot",
    "plaque",
    "strip surface",
    "roller",
    "pressure",
    "inclusion",
    "metal surface",
    "spots",
    "fish scale shape",
    "block irregular distribution",
    "rolled pit",
    "bulges",
    "pits",
    "steel plate",
    "work roll",
    "tension roll",
    "damage",
    "crease",
    "fold",
    "uncoiling process",
    "waist folding",
    "deformation",
    "low-carbon"
]

ct_concept_label = { concept : {concept} for concept in concepts}


## Relation Extraction

Used an enriched context to creation realtion triples using this following prompt:

You are an helpful assistant helping building an ontology from technical documentation of quality defects.
   Based on the context given, create relation as triples based on extracted relation candidates and extrated concepts
        we will use this list of relations to extract relations between concepts : 
'has abnormal'
'has appearance'
'is caused by'
'is part of'
'is produced by'
    Here is an example. Concepts: animal, gum, tooth , back spot, caries
    [["tooth","is part of","gum"], ["tooth","has appearence","black sopt"], ["back spot","is caused by","carrries"]]

    Context:(corpus)
    Concepts : (list of concept)

In [7]:
from spacy.matcher import Matcher

relation_llm_output = [
    ["water spot", "is produced by", "drying"],
    ["water spot", "is produced by", "production"],
    ["oil spot", "is caused by", "contamination"],
    ["contamination", "is caused by", "mechanical lubricant"],
    ["oil spot", "has appearance", "product"],
    ["crescent gap", "is caused by", "cutting"],
    ["weld line", "is part of", "strip"],
    ["inclusion", "has appearance", "small spots"],
    ["inclusion", "has appearance", "fish scale shape"],
    ["inclusion", "has appearance", "strip shape"],
    ["inclusion", "has appearance", "block irregular distribution"],
    ["inclusion", "is part of", "upper surface"],
    ["inclusion", "is part of", "lower surface"],
    ["inclusion", "is accompanied by", "rough pockmarked surfaces"],
    ["crease", "has appearance", "vertical transverse fold"],
    ["crease", "has abnormal", "spacing"],
    ["crease", "is caused by", "local yield"],
    ["crease", "is part of", "strip"],
    ["silk spot", "has appearance", "plaque"],
    ["silk spot", "is caused by", "uneven temperature"],
    ["silk spot", "is caused by", "uneven pressure"],
    ["waist folding", "has appearance", "obvious folds"],
    ["waist folding", "has appearance", "wrinkles"],
    ["waist folding", "is caused by", "local deformation"],
    ["waist folding", "is caused by", "low-carbon"],
    ["punching", "is produced by", "production line"],
    ["punching", "is produced by", "strip"],
    ["punching", "is caused by", "mechanical failure"],
    ["punctate", "is part of", "rolled pit"],
    ["rolled pit", "has appearance", "bulges"],
    ["rolled pit", "has appearance", "pits"],
    ["rolled pit", "is caused by", "work roll"],
    ["rolled pit", "is caused by", "tension roll"],
    ["rolled pit", "is part of", "steel plate"]
]

found_relations = {
   Relation(
       relation[1], 
       Concept(relation[0]), 
       Concept(relation[2])
       ) for relation in relation_llm_output
}


## Hierarchisation

In [35]:
from ast import Dict
from typing import Any
from sentence_transformers import SentenceTransformer, util
from olaf.data_container.metarelation_schema import Metarelation
from olaf.pipeline.pipeline_component.pipeline_component_schema import PipelineComponent


model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
llm_output = [
    ["crease", "is_generalised_by", "defect type"],
    ["waist folding", "is_generalised_by", "defect type"],
    ["water spot", "is_generalised_by", "defect type"],
    ["oil spot", "is_generalised_by", "defect type"],
    ["crescent gap", "is_generalised_by", "defect type"],
    ["silk spot", "is_generalised_by", "defect type"],
    ["inclusion", "is_generalised_by", "defect type"],
    ["punctate", "is_generalised_by", "defect type"],
    ["punching defects", "is_generalised_by", "defect type"],
    ["rolled pit", "is_generalised_by", "defect type"],
    ["crease", "is_generalised_by", "fold"],
    ["waist folding", "is_generalised_by", "fold"],
    ["pits", "is_generalised_by", "bulges"],
    ["fish scale shape", "is_generalised_by", "strip shape"],
    ["block irregular distribution", "is_generalised_by", "strip shape"],
    ["spots", "is_generalised_by", "strip shape"],
    ["rough pockmarked surfaces", "is_generalised_by", "strip shape"],
    ["punching defects", "is_generalised_by", "punching"],
    ["rolled pit", "is_generalised_by", "strip shape"],
    ["strip surface", "is_generalised_by", "metal surface"],
    ["weld line", "is_generalised_by", "welding line"],
    ["work roll", "is_generalised_by", "roller"],
    ["tension roll", "is_generalised_by", "roller"],
    ["product specifications", "is_generalised_by", "product"],
    ["water spot", "is_generalised_by", "spots"],
    ["oil spot", "is_generalised_by", "spots"]
]
"""
You are an helpful assistant helping building an ontology from technical documentation of quality defects.
        Based on the context given, define if there is a hierarchy between the listed concepts.
        The high level of concept super classes are ['Cause', 'Defect Type', 'Product', 'Factory', 'Appearance'] and concepts have hierarchy between them.
        The result should be given as a python list of list of string with double quotes.

        Here is an example. Concepts: animal, mammal, dog(canine), flower
        [["mammal","is_generalised_by","animal"], ["dog","is_generalised_by","mammal"], ["dog","is_generalised_by","animal"]]
Only 'is_generalised_by' is possible.
    
        Context: 
        Concepts:"""

def are_similar(embedding_a : np.ndarray, embedding_b: np.ndarray, threshold=.6):
    """compute the cosine similarity between two vectors
    """
    return util.pytorch_cos_sim(embedding_a, embedding_b) > threshold

def are_similar_label(label_a : str, label_b: str, threshold=.7, model=model):
    """compute the cosine similarity between two string after computing their embeddings.
    """
    embedding_a, embedding_b = model.encode([label_a, label_b])
    return util.pytorch_cos_sim(embedding_a, embedding_b) > threshold

class CustomHierarchisationComponent(PipelineComponent):
    """
    A custom LLM hierarchisation componnet that creates metarelation from a llm output
    """

    def __init__(self, llm_output :List) -> None:
        """Initialise PipelineComponent instance."""
        self.llm_output = llm_output
    
    def check_resources(self) -> None:
        """Method to check that the component has access to all its required resources."""
        pass
    
    def optimise(self) -> None:
        """A method to optimise the pipeline component by tuning the options."""
        pass
    
    def _compute_metrics(self) -> None:
        """A method to compute component performance metrics. It is used by the optimise
        method to update the options.
        """
        pass
    
    def get_performance_report(self) :
        """A getter for the pipeline component performance report.
            If the component has been optimised, it only returns the best performance.
            Otherwise, it returns the results obtained with the set parameters.

        Returns
        -------
        Dict[str, Any]
            The pipeline component performance report.
        """
        pass


    def find_concept_by_label(self, label: str, concepts: Set[Concept], model=model) -> Concept:
        """Find a concept based on its label.

        Parameters
        ----------
        label: str
            The label of the wanted concept.
        concepts: Set[Concept]
            The set of concepts to be searched.

        Returns
        -------
        Concept
            The concept with the wanted label.
        """
        selected_concept = None
        concept_embedings = [model.encode(concept.label) for concept in concepts]
        label_embeding = model.encode(label)
        for concept in concepts:
            if are_similar(label_embeding, model.encode(concept.label)):
                selected_concept = concept
                break
        return selected_concept
    
    def run(self, pipeline: Pipeline) -> None:
        """Method that is responsible for the execution of the component.

        Parameters
        ----------
        pipeline : Pipeline
            The pipeline running
        """
        metarelations = set()
        
        try:
            for meta_tuple in self.llm_output:
                source_concept = self.find_concept_by_label(meta_tuple[0], pipeline.kr.concepts,)
                destination_concept = self.find_concept_by_label(
                    meta_tuple[2], pipeline.kr.concepts
                )
                if source_concept is not None and destination_concept is not None and meta_tuple[1] == "is_generalised_by":
                    new_metarelation = Metarelation(
                        source_concept, destination_concept, "is_generalised_by"
                    )
                    metarelations.add(new_metarelation)
        except (SyntaxError, ValueError):
            logger.error(
                """LLM generator output is not in the expected format. 
                The metarelations can not be extracted."""
            )
        pipeline.kr.metarelations.update(metarelations)



In [26]:

axiom_generators = {    
        kr_concepts_to_owl_classes,
        kr_relations_to_owl_obj_props,
        kr_metarelations_to_owl,
        kr_relations_to_anonymous_some_parent,
        concept_lrs_to_owl_individuals
}

owl_axiom_extraction = OWLAxiomExtraction(
    owl_axiom_generators=axiom_generators,
    base_uri="https://github.com/wikit-ai/olaf-llm-eswc2024/o/example#"
)

In [10]:

pipeline = Pipeline(
    spacy_model=nlp,
    pipeline_components=[
    ManualCandidateTermExtraction(
        ct_label_strings_map=ct_concept_label
    ),
    AgglomerativeClusteringConceptExtraction(
        distance_threshold=.3
    ),
    CustomHierarchisationComponent(
        llm_output=llm_output
    ),
    owl_axiom_extraction
    ],
    corpus_loader=corpus_loader
)
pipeline.kr.relations = found_relations


free_gpu() 
pipeline.run()



In [11]:
pipeline.kr.concepts

{block irregular distribution,
 bulges,
 crease,
 crescent gap,
 cutting,
 damage,
 defect type,
 deformation,
 drying,
 fish scale shape,
 fold,
 inclusion,
 low-carbon,
 mechanical failure,
 mechanical lubricant,
 metal surface,
 oil spot,
 pits,
 plaque,
 pressure,
 punching,
 rolled pit,
 roller,
 silk spot,
 spots,
 steel plate,
 steel strip surface,
 tension roll,
 uncoiling process,
 waist folding,
 water spot,
 weld line,
 work roll}

In [12]:
from olaf.repository.serialiser.kr_serialisers import KRJSONSerialiser


kr_ttl_graph_path = os.path.join("defect_onto_kr_ttl_graph.ttl")
kr_rdf_graph_path = os.path.join("defect_onto_kr_rdf_graph.rdf")
kr_json_graph_path = os.path.join("defect_onto_kr_json_graph.json")


pipeline.kr.rdf_graph.serialize(kr_ttl_graph_path, format="ttl")

kr_json_serialiser = KRJSONSerialiser()
kr_json_serialiser.serialise(kr=pipeline.kr, file_path=kr_json_graph_path)

# Performing hierarchisation

In [42]:
metarelation_llm_output = [
    ["block irregular distribution", "is_generalised_by", "Appearance"],
    ["bulges", "is_generalised_by", "Appearance"],
    ["crease", "is_generalised_by", "Defect Type"],
    ["crescent gap", "is_generalised_by", "Defect Type"],
    ["cutting", "is_generalised_by", "Cause"],
    ["damage", "is_generalised_by", "Cause"],
    ["deformation", "is_generalised_by", "Cause"],
    ["drying", "is_generalised_by", "Cause"],
    ["fish scale shape", "is_generalised_by", "Appearance"],
    ["fold", "is_generalised_by", "Appearance"],
    ["inclusion", "is_generalised_by", "Defect Type"],
    ["low-carbon", "is_generalised_by", "Cause"],
    ["mechanical failure", "is_generalised_by", "Cause"],
    ["mechanical lubricant", "is_generalised_by", "Cause"],
    ["metal surface", "is_generalised_by", "Product"],
    ["oil spot", "is_generalised_by", "Defect Type"],
    ["pits", "is_generalised_by", "Appearance"],
    ["plaque", "is_generalised_by", "Appearance"],
    ["pressure", "is_generalised_by", "Cause"],
    ["punching", "is_generalised_by", "Defect Type"],
    ["rolled pit", "is_generalised_by", "Defect Type"],
    ["roller", "is_generalised_by", "Factory"],
    ["silk spot", "is_generalised_by", "Defect Type"],
    ["spots", "is_generalised_by", "Appearance"],
    ["steel plate", "is_generalised_by", "Product"],
    ["steel strip surface", "is_generalised_by", "Product"],
    ["tension roll", "is_generalised_by", "Factory"],
    ["uncoiling process", "is_generalised_by", "Factory"],
    ["waist folding", "is_generalised_by", "Defect Type"],
    ["water spot", "is_generalised_by", "Defect Type"],
    ["welding line", "is_generalised_by", "Defect Type"],
    ["work roll", "is_generalised_by", "Factory"]
]

# In this seed Knowledge representation we add our five unmissable super classes that might not appear in the corpus.
seed_kr = KnowledgeRepresentation(
    concepts={
        Concept(label) for label in ['cause', 'defect type', 'product', 'factory', 'appearance'] 
    },
    relations=found_relations
)

pipeline = Pipeline(
    spacy_model=nlp,
    seed_kr=seed_kr,
    pipeline_components=[
    ManualCandidateTermExtraction(
        ct_label_strings_map=ct_concept_label
    ),
    AgglomerativeClusteringConceptExtraction(
        distance_threshold=.3
    ),
    CustomHierarchisationComponent(
        llm_output=metarelation_llm_output
    ),
     OWLAxiomExtraction(
        owl_axiom_generators={
            kr_concepts_to_owl_classes,
            kr_relations_to_owl_obj_props,
            kr_metarelations_to_owl,
            kr_relations_to_anonymous_some_parent,
            concept_lrs_to_owl_individuals
        },
        base_uri="https://github.com/wikit-ai/olaf-llm-eswc2024/o/example#"
    )
    ],
    corpus_loader=corpus_loader
)


free_gpu() 
pipeline.run()


kr_ttl_graph_path = os.path.join("../results/defect_onto_kr_ttl_graph_cheat.ttl")

pipeline.kr.rdf_graph.serialize(kr_ttl_graph_path, format="ttl")





<Graph identifier=Nb7bf766a35194d769503feec5ca87103 (<class 'rdflib.graph.Graph'>)>

In [14]:
pipeline.kr.metarelations

{meta : (low-carbon, is_generalised_by, cause),
 meta : (oil spot, is_generalised_by, defect type),
 meta : (inclusion, is_generalised_by, defect type),
 meta : (weld line, is_generalised_by, defect type),
 meta : (spots, is_generalised_by, appearance),
 meta : (punching, is_generalised_by, defect type),
 meta : (rolled pit, is_generalised_by, defect type),
 meta : (water spot, is_generalised_by, defect type),
 meta : (plaque, is_generalised_by, appearance),
 meta : (steel strip surface, is_generalised_by, product),
 meta : (mechanical lubricant, is_generalised_by, cause),
 meta : (block irregular distribution, is_generalised_by, appearance),
 meta : (pressure, is_generalised_by, cause),
 meta : (cutting, is_generalised_by, cause),
 meta : (fold, is_generalised_by, defect type),
 meta : (fish scale shape, is_generalised_by, appearance),
 meta : (uncoiling process, is_generalised_by, factory),
 meta : (metal surface, is_generalised_by, product),
 meta : (crescent gap, is_generalised_by,

# performing hierarchisation with a another version of llm prompt ( becausse of an another version of prompt)

we make undertand the model that all provided concepts can be  super classes (OWL Classes)

prompt :

You are an helpful assistant helping building an ontology from technical documentation of quality defects.
        Based on the context given, define if there is a hierarchy between the listed concepts.
        The high level of concept super classes are ['Cause', 'Defect Type', 'Product', 'Factory', 'Appearance'] and all provided concepts can be super class.
        The result should be given as a python list of list of string with double quotes.

        Here is an example. Concepts: animal, mammal, dog(canine), flower
        [["mammal","is_generalised_by","animal"], ["dog","is_generalised_by","mammal"], ["dog","is_generalised_by","animal"]]
Only 'is_generalised_by' is possible.
    
        Context:    
        Concepts:

In [36]:
metarelation_llm_output = [
    ["block irregular distribution", "is_generalised_by", "inclusion"],
    ["bulges", "is_generalised_by", "rolled pit"],
    ["crease", "is_generalised_by", "Defect Type"],
    ["crescent gap", "is_generalised_by", "Defect Type"],
    ["cutting", "is_generalised_by", "Cause"],
    ["damage", "is_generalised_by", "Cause"],
    ["deformation", "is_generalised_by", "waist folding"],
    ["drying", "is_generalised_by", "Cause"],
    ["fish scale shape", "is_generalised_by", "inclusion"],
    ["fold", "is_generalised_by", "crease"],
    ["inclusion", "is_generalised_by", "Defect Type"],
    ["low-carbon", "is_generalised_by", "Cause"],
    ["mechanical failure", "is_generalised_by", "Cause"],
    ["mechanical lubricant", "is_generalised_by", "Cause"],
    ["metal surface", "is_generalised_by", "Product"],
    ["oil spot", "is_generalised_by", "Defect Type"],
    ["pits", "is_generalised_by", "rolled pit"],
    ["plaque", "is_generalised_by", "silk spot"],
    ["pressure", "is_generalised_by", "Cause"],
    ["punching", "is_generalised_by", "Defect Type"],
    ["rolled pit", "is_generalised_by", "Defect Type"],
    ["roller", "is_generalised_by", "Factory"],
    ["silk spot", "is_generalised_by", "Defect Type"],
    ["spots", "is_generalised_by", "inclusion"],
    ["steel plate", "is_generalised_by", "Product"],
    ["steel strip surface", "is_generalised_by", "Product"],
    ["tension roll", "is_generalised_by", "Factory"],
    ["uncoiling process", "is_generalised_by", "Factory"],
    ["waist folding", "is_generalised_by", "Defect Type"],
    ["water spot", "is_generalised_by", "Defect Type"],
    ["welding line", "is_generalised_by", "Defect Type"],
    ["work roll", "is_generalised_by", "Factory"]
]


seed_kr = KnowledgeRepresentation(
    concepts={
        Concept(label) for label in ['cause', 'defect type', 'product', 'factory', 'appearance'] 
    },
    relations=found_relations
)

pipeline = Pipeline(
    spacy_model=nlp,
    seed_kr=seed_kr,
    pipeline_components=[
    ManualCandidateTermExtraction(
        ct_label_strings_map=ct_concept_label
    ),
    AgglomerativeClusteringConceptExtraction(
        distance_threshold=.3
    ),
    CustomHierarchisationComponent(
        llm_output=metarelation_llm_output
    ),
    OWLAxiomExtraction(
        owl_axiom_generators={    
            kr_concepts_to_owl_classes,
            kr_relations_to_owl_obj_props,
            kr_metarelations_to_owl,
            kr_relations_to_domain_range_obj_props,
            concept_lrs_to_owl_individuals
        },
        base_uri="https://github.com/wikit-ai/olaf-llm-eswc2024/o/example#"
    )
    ],
    corpus_loader=corpus_loader
)
# pipeline.kr.relations = found_relations


free_gpu() 
pipeline.run()


kr_ttl_graph_path = os.path.join("../results/defect_onto_kr_ttl_graph_cheat_v2.ttl")

pipeline.kr.rdf_graph.serialize(kr_ttl_graph_path, format="ttl")



<Graph identifier=Na186084015b74197802d517f06190ae7 (<class 'rdflib.graph.Graph'>)>

## axiomatisation enhancement

### added relation equivalent

In [40]:
metarelation_llm_output = [
    ["block irregular distribution", "is_generalised_by", "inclusion"],
    ["bulges", "is_generalised_by", "rolled pit"],
    ["crease", "is_generalised_by", "Defect Type"],
    ["crescent gap", "is_generalised_by", "Defect Type"],
    ["cutting", "is_generalised_by", "Cause"],
    ["damage", "is_generalised_by", "Cause"],
    ["deformation", "is_generalised_by", "waist folding"],
    ["drying", "is_generalised_by", "Cause"],
    ["fish scale shape", "is_generalised_by", "inclusion"],
    ["fold", "is_generalised_by", "crease"],
    ["inclusion", "is_generalised_by", "Defect Type"],
    ["low-carbon", "is_generalised_by", "Cause"],
    ["mechanical failure", "is_generalised_by", "Cause"],
    ["mechanical lubricant", "is_generalised_by", "Cause"],
    ["metal surface", "is_generalised_by", "Product"],
    ["oil spot", "is_generalised_by", "Defect Type"],
    ["pits", "is_generalised_by", "rolled pit"],
    ["plaque", "is_generalised_by", "silk spot"],
    ["pressure", "is_generalised_by", "Cause"],
    ["punching", "is_generalised_by", "Defect Type"],
    ["rolled pit", "is_generalised_by", "Defect Type"],
    ["roller", "is_generalised_by", "Factory"],
    ["silk spot", "is_generalised_by", "Defect Type"],
    ["spots", "is_generalised_by", "inclusion"],
    ["steel plate", "is_generalised_by", "Product"],
    ["steel strip surface", "is_generalised_by", "Product"],
    ["tension roll", "is_generalised_by", "Factory"],
    ["uncoiling process", "is_generalised_by", "Factory"],
    ["waist folding", "is_generalised_by", "Defect Type"],
    ["water spot", "is_generalised_by", "Defect Type"],
    ["welding line", "is_generalised_by", "Defect Type"],
    ["work roll", "is_generalised_by", "Factory"]
]


seed_kr = KnowledgeRepresentation(
    concepts={
        Concept(label) for label in ['cause', 'defect type', 'product', 'factory', 'appearance'] 
    },
    relations=found_relations
)



pipeline = Pipeline(
    spacy_model=nlp,
    seed_kr=seed_kr,
    pipeline_components=[
    ManualCandidateTermExtraction(
        ct_label_strings_map=ct_concept_label
    ),
    AgglomerativeClusteringConceptExtraction(
        distance_threshold=.3
    ),
    CustomHierarchisationComponent(
        llm_output=metarelation_llm_output
    ),
    OWLAxiomExtraction(
    owl_axiom_generators={    
        kr_concepts_to_owl_classes,
        kr_relations_to_owl_obj_props,
        kr_metarelations_to_owl,
        kr_relations_to_domain_range_obj_props,
        kr_relations_to_anonymous_some_equivalent,
        concept_lrs_to_owl_individuals
    },
    base_uri="https://github.com/wikit-ai/olaf-llm-eswc2024/o/example#"
    )
    ],
    corpus_loader=corpus_loader
)
pipeline.kr.relations = found_relations


free_gpu() 
pipeline.run()


kr_ttl_graph_path = os.path.join("../results/defect_onto_kr_ttl_graph_cheat_equi.ttl")

pipeline.kr.rdf_graph.serialize(kr_ttl_graph_path, format="ttl")



<Graph identifier=N3f05a4a675c34c6aafac09b65572df78 (<class 'rdflib.graph.Graph'>)>

### add distinct class

In [18]:
from rdflib.collection import Collection
from rdflib import OWL, RDF, RDFS, BNode, Graph, Literal, URIRef

from olaf.commons.kr_to_rdf_tools import owl_class_uri, owl_instance_uri


def find_superclass(kr :  KnowledgeRepresentation, concept : Concept) -> Concept:

    super_classes = {metarelation.destination_concept for metarelation in kr.metarelations}
    
    related_metarelations = {
        metarelation for metarelation in kr.metarelations 
        if are_similar_label(concept.label, metarelation.source_concept.label) or 
        are_similar_label(concept.label, metarelation.destination_concept.label)
        }
    pass

def find_superclasses(kr :  KnowledgeRepresentation, concept : Concept):
    superclasses = []

    current_concept = concept
    while True:
        found = False
        for meta_relation in kr.metarelations:
            if meta_relation.source_concept.label == current_concept.label:
                superclasses.append(meta_relation.destination_concept)
                current_concept = meta_relation.destination_concept
                found = True
                break
        if not found:
            break

    return superclasses


def find_family_classes(kr :  KnowledgeRepresentation, concept: Concept, super_class : Concept):
    family_classes = []
    return {
        metarelation.source_concept
        for metarelation in kr.metarelations
        if metarelation.destination_concept.label == super_class.label
        and metarelation.source_concept.label != concept.label
    }


def all_classes_distinct(kr: KnowledgeRepresentation, base_uri: URIRef) -> Graph:
    """Create the RDF triples corresponding to make each KR concepts linguistic representation an
    OWL named instance and making each instance different.

    Parameters
    ----------
    kr : KnowledgeRepresentation
        The Knowledge Representation containing the concepts.
    base_uri : URIRef
        The base URI to use when creating the class URIs.

    Returns
    -------
    Graph
        The constructed RDF triples.
    """
    rdf_graph = Graph()

    concepts_uris = set()
    instance_uris = set()

    for concept in kr.concepts:
       
        concept_uri = owl_class_uri(label=concept.label, base_uri=base_uri)
        rdf_graph.add((concept_uri, RDF.type, OWL.Class))
        rdf_graph.add((concept_uri, RDFS.label, Literal(concept.label)))
        concepts_uris.add(concept_uri)

        super_class = find_superclasses(kr, concept)
        # super_class_uri = owl_class_uri(label=super_class.label, base_uri=base_uri)

        disjoinct_classes = kr.concepts - super_class
        for disjoinct_class in disjoinct_classes:
            disjoinct_class_uri = owl_class_uri(label=disjoinct_class.label, base_uri=base_uri)
            rdf_graph.add((
                concept_uri, 
                OWL.disjointWith, 
                disjoinct_class_uri))
            
            rdf_graph.add((
                disjoinct_class_uri, 
                OWL.disjointWith, 
                concept_uri))


        for c_lr in concept.linguistic_realisations:
            instance_uri = owl_instance_uri(label=c_lr.label, base_uri=base_uri)
            rdf_graph.add((instance_uri, RDF.type, concept_uri))
            rdf_graph.add((instance_uri, RDF.type, OWL.NamedIndividual))
            rdf_graph.add((instance_uri, RDFS.label, Literal(c_lr.label)))
            instance_uris.add(instance_uri)
    
    
    instance_node = BNode()

    concepts_uris_collection = Collection(graph=rdf_graph, uri=BNode(), seq=list(concepts_uris))
    instance_uris_collection = Collection(graph=rdf_graph, uri=instance_node, seq=list(instance_uris))
    
    # concept_node = BNode()
    # rdf_graph.add((concept_node, RDF.type, OWL.AllDisjointClasses))
    # rdf_graph.add((concept_node, OWL.members, concepts_uris_collection.uri))

    # rdf_graph.add((instance_node, RDF.type, OWL.AllDifferent))
    # rdf_graph.add((instance_node, OWL.distinctMembers, instance_uris_collection.uri))

    return rdf_graph



def all_sub_classes_distinct(kr: KnowledgeRepresentation, base_uri: URIRef) -> Graph:
#     """Create the RDF triples corresponding to make each KR concepts linguistic representation an
#     OWL named instance and making each instance different.

#     Parameters
#     ----------
#     kr : KnowledgeRepresentation
#         The Knowledge Representation containing the concepts.
#     base_uri : URIRef
#         The base URI to use when creating the class URIs.

#     Returns
#     -------
#     Graph
#         The constructed RDF triples.
#     """

    rdf_graph = Graph()

    concepts_uris = set()
    instance_uris = set()

    for concept in kr.concepts:
       
        concept_uri = owl_class_uri(label=concept.label, base_uri=base_uri)
        rdf_graph.add((concept_uri, RDF.type, OWL.Class))
        rdf_graph.add((concept_uri, RDFS.label, Literal(concept.label)))
        concepts_uris.add(concept_uri)

        super_classes = find_superclasses(kr, concept)
        if len(super_classes):
            super_class= super_classes[-1]
            class_family  = {
                metarelation.source_concept
                for metarelation in kr.metarelations
                if metarelation.destination_concept.label == super_class.label
                and metarelation.source_concept.label != concept.label
            }


        
            for disjoinct_class in class_family:
                disjoinct_class_uri = owl_class_uri(label=disjoinct_class.label, base_uri=base_uri)
                rdf_graph.add((
                    concept_uri, 
                    OWL.disjointWith, 
                    disjoinct_class_uri))
                
                rdf_graph.add((
                    disjoinct_class_uri, 
                    OWL.disjointWith, 
                    concept_uri))


        for c_lr in concept.linguistic_realisations:
            instance_uri = owl_instance_uri(label=c_lr.label, base_uri=base_uri)
            rdf_graph.add((instance_uri, RDF.type, concept_uri))
            rdf_graph.add((instance_uri, RDF.type, OWL.NamedIndividual))
            rdf_graph.add((instance_uri, RDFS.label, Literal(c_lr.label)))
            instance_uris.add(instance_uri)


    return rdf_graph

    

In [19]:
find_family_classes(pipeline.kr, Concept("water spot"), Concept("defect type"))

{crease,
 crescent gap,
 inclusion,
 oil spot,
 punching,
 rolled pit,
 silk spot,
 waist folding,
 weld line}

In [20]:
pipeline.kr.metarelations

{meta : (silk spot, is_generalised_by, defect type),
 meta : (block irregular distribution, is_generalised_by, inclusion),
 meta : (inclusion, is_generalised_by, defect type),
 meta : (steel plate, is_generalised_by, product),
 meta : (weld line, is_generalised_by, defect type),
 meta : (mechanical lubricant, is_generalised_by, cause),
 meta : (deformation, is_generalised_by, waist folding),
 meta : (cutting, is_generalised_by, cause),
 meta : (spots, is_generalised_by, inclusion),
 meta : (crease, is_generalised_by, defect type),
 meta : (oil spot, is_generalised_by, defect type),
 meta : (metal surface, is_generalised_by, product),
 meta : (damage, is_generalised_by, cause),
 meta : (water spot, is_generalised_by, defect type),
 meta : (roller, is_generalised_by, factory),
 meta : (punching, is_generalised_by, defect type),
 meta : (crescent gap, is_generalised_by, defect type),
 meta : (bulges, is_generalised_by, rolled pit),
 meta : (uncoiling process, is_generalised_by, factory),


In [21]:
metarelation_llm_output = [
    ["block irregular distribution", "is_generalised_by", "inclusion"],
    ["bulges", "is_generalised_by", "rolled pit"],
    ["crease", "is_generalised_by", "Defect Type"],
    ["crescent gap", "is_generalised_by", "Defect Type"],
    ["cutting", "is_generalised_by", "Cause"],
    ["damage", "is_generalised_by", "Cause"],
    ["deformation", "is_generalised_by", "waist folding"],
    ["drying", "is_generalised_by", "Cause"],
    ["fish scale shape", "is_generalised_by", "inclusion"],
    ["fold", "is_generalised_by", "crease"],
    ["inclusion", "is_generalised_by", "Defect Type"],
    ["low-carbon", "is_generalised_by", "Cause"],
    ["mechanical failure", "is_generalised_by", "Cause"],
    ["mechanical lubricant", "is_generalised_by", "Cause"],
    ["metal surface", "is_generalised_by", "Product"],
    ["oil spot", "is_generalised_by", "Defect Type"],
    ["pits", "is_generalised_by", "rolled pit"],
    ["plaque", "is_generalised_by", "silk spot"],
    ["pressure", "is_generalised_by", "Cause"],
    ["punching", "is_generalised_by", "Defect Type"],
    ["rolled pit", "is_generalised_by", "Defect Type"],
    ["roller", "is_generalised_by", "Factory"],
    ["silk spot", "is_generalised_by", "Defect Type"],
    ["spots", "is_generalised_by", "inclusion"],
    ["steel plate", "is_generalised_by", "Product"],
    ["steel strip surface", "is_generalised_by", "Product"],
    ["tension roll", "is_generalised_by", "Factory"],
    ["uncoiling process", "is_generalised_by", "Factory"],
    ["waist folding", "is_generalised_by", "Defect Type"],
    ["water spot", "is_generalised_by", "Defect Type"],
    ["welding line", "is_generalised_by", "Defect Type"],
    ["work roll", "is_generalised_by", "Factory"]
]


seed_kr = KnowledgeRepresentation(
    concepts={
        Concept(label) for label in ['cause', 'defect type', 'product', 'factory', 'appearance'] 
    }
)



pipeline = Pipeline(
    spacy_model=nlp,
    seed_kr=seed_kr,
    pipeline_components=[
    ManualCandidateTermExtraction(
        ct_label_strings_map=ct_concept_label
    ),
    AgglomerativeClusteringConceptExtraction(
        distance_threshold=.3
    ),
    CustomHierarchisationComponent(
        llm_output=metarelation_llm_output
    ),
    OWLAxiomExtraction(
        owl_axiom_generators={    
            kr_concepts_to_owl_classes,
            kr_relations_to_owl_obj_props,
            kr_metarelations_to_owl,
            kr_relations_to_domain_range_obj_props,
            kr_relations_to_anonymous_some_equivalent,
            all_classes_distinct
        },
        base_uri="https://github.com/wikit-ai/olaf-llm-eswc2024/o/example#"
    )
    ],
    corpus_loader=corpus_loader
)
pipeline.kr.relations = found_relations


free_gpu() 
pipeline.run()


kr_ttl_graph_path = os.path.join("../results/defect_onto_graph_distinct_classes_v2.ttl")

pipeline.kr.rdf_graph.serialize(kr_ttl_graph_path, format="ttl")

                                    Reasoner output: 
                                   2024-07-23 14:24:01,477 ERROR org.obolibrary.robot.ReasonerHelper - The ontology is inconsistent. TIP: use a tool like Protege to find explanations
.
                                ]
                                    Reasoner output: 
                                   2024-07-23 14:24:03,315 ERROR org.obolibrary.robot.ReasonerHelper - The ontology is inconsistent. TIP: use a tool like Protege to find explanations
.
                                ]
                                    Reasoner output: 
                                   2024-07-23 14:24:04,889 ERROR org.obolibrary.robot.ReasonerHelper - The ontology is inconsistent. TIP: use a tool like Protege to find explanations
.
                                ]
                                    Reasoner output: 
                                   2024-07-23 14:24:06,397 ERROR org.obolibrary.robot.ReasonerHelper - The ontology is inconsis

<Graph identifier=N891ee82875ca4381aee7754cdf31adf6 (<class 'rdflib.graph.Graph'>)>

# The best version of axiomation

In this vresion we created individual from concepts.

we created classe so that subclasses are distinct from each other.

In [44]:
metarelation_llm_output = [
    ["block irregular distribution", "is_generalised_by", "Appearance"],
    ["bulges", "is_generalised_by", "Appearance"],
    ["crease", "is_generalised_by", "Defect Type"],
    ["crescent gap", "is_generalised_by", "Defect Type"],
    ["cutting", "is_generalised_by", "Cause"],
    ["damage", "is_generalised_by", "Cause"],
    ["deformation", "is_generalised_by", "Cause"],
    ["drying", "is_generalised_by", "Cause"],
    ["fish scale shape", "is_generalised_by", "Appearance"],
    ["fold", "is_generalised_by", "Appearance"],
    ["inclusion", "is_generalised_by", "Defect Type"],
    ["low-carbon", "is_generalised_by", "Cause"],
    ["mechanical failure", "is_generalised_by", "Cause"],
    ["mechanical lubricant", "is_generalised_by", "Cause"],
    ["metal surface", "is_generalised_by", "Product"],
    ["oil spot", "is_generalised_by", "Defect Type"],
    ["pits", "is_generalised_by", "Appearance"],
    ["plaque", "is_generalised_by", "Appearance"],
    ["pressure", "is_generalised_by", "Cause"],
    ["punching", "is_generalised_by", "Defect Type"],
    ["rolled pit", "is_generalised_by", "Defect Type"],
    ["roller", "is_generalised_by", "Factory"],
    ["silk spot", "is_generalised_by", "Defect Type"],
    ["spots", "is_generalised_by", "Appearance"],
    ["steel plate", "is_generalised_by", "Product"],
    ["steel strip surface", "is_generalised_by", "Product"],
    ["tension roll", "is_generalised_by", "Factory"],
    ["uncoiling process", "is_generalised_by", "Factory"],
    ["waist folding", "is_generalised_by", "Defect Type"],
    ["water spot", "is_generalised_by", "Defect Type"],
    ["welding line", "is_generalised_by", "Defect Type"],
    ["work roll", "is_generalised_by", "Factory"]
]

seed_kr = KnowledgeRepresentation(
    concepts={
        Concept(label) for label in ['cause', 'defect type', 'product', 'factory', 'appearance'] 
    }
)

pipeline = Pipeline(
    spacy_model=nlp,
    seed_kr=seed_kr,
    pipeline_components=[
    ManualCandidateTermExtraction(
        ct_label_strings_map=ct_concept_label
    ),
    AgglomerativeClusteringConceptExtraction(
        distance_threshold=.3
    ),
    CustomHierarchisationComponent(
        llm_output=metarelation_llm_output
    ),
     OWLAxiomExtraction(
        owl_axiom_generators={
            kr_concepts_to_owl_classes,
            kr_relations_to_owl_obj_props,
            kr_metarelations_to_owl,
            kr_relations_to_anonymous_some_parent,
            concept_lrs_to_owl_individuals,
            all_sub_classes_distinct
        },
        base_uri="https://github.com/wikit-ai/olaf-llm-eswc2024/o/example#"
    )
    ],
    corpus_loader=corpus_loader
)
pipeline.kr.relations = found_relations


free_gpu() 
pipeline.run()


kr_ttl_graph_path = os.path.join("../results/defect_onto_kr_ttl_graph_cheat.ttl")

pipeline.kr.rdf_graph.serialize(kr_ttl_graph_path, format="ttl")





<Graph identifier=Nddff7bea6317475c8f9af97294b87206 (<class 'rdflib.graph.Graph'>)>