# DEMO-INTER-AutomatedTool (Google Colab)

This Google Colab notebook provides an automated tool for comparing ontology classes.  
It is designed to allow users to:
- Load an ontology from a URL or an uploaded file.
- Extract ontology class information, including IRIs, labels, synonyms, and definitions.
- Display matching classes in a structured table and generate a downloadable CSV.

**How to Use This Notebook:**
Simply run each cell in order and follow the on-screen prompts.

## find matching class IRIs

In [5]:
# Install necessary packages
!pip install rdflib requests pandas

import io
import base64
import csv
import requests
import rdflib
import pandas as pd
from rdflib import Graph
from google.colab import files
from IPython.display import display, HTML

def load_ontology_from_url(ontology_url):
    """
    Fetches an ontology file from a URL and loads it into an RDF graph.
    """
    response = requests.get(ontology_url)
    if response.status_code == 200:
        graph = Graph()
        graph.parse(data=response.text, format="xml")
        print(f"✅ Successfully fetched ontology from: {ontology_url}")
        return graph
    else:
        raise Exception(f"❌ Error fetching ontology from {ontology_url}: {response.status_code}")

def load_ontology_from_file(file_path):
    """
    Loads an ontology file from a local file path.
    """
    graph = Graph()
    graph.parse(file_path, format="xml")
    print(f"✅ Successfully loaded ontology from local file: {file_path}")
    return graph

def extract_filtered_classes(graph, excluded_base_uris):
    """
    Extracts class IRIs, labels, and definitions from the ontology while filtering out upper-level ontology classes.
    """
    excluded_filters = " || ".join(
        [f"STRSTARTS(STR(?class), '{base_uri}')" for base_uri in excluded_base_uris]
    )
    query = f"""
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX iao: <http://purl.obolibrary.org/obo/IAO_>

    SELECT DISTINCT ?class ?label ?definition WHERE {{
        ?class a owl:Class .
        OPTIONAL {{ ?class rdfs:label ?label . }}
        OPTIONAL {{ ?class iao:0000115 ?definition . }}
        OPTIONAL {{ ?class rdfs:definition ?definition . }}
        OPTIONAL {{ ?class rdfs:comment ?definition . }}
        FILTER (!({excluded_filters}))  # Exclude specified base URIs
    }}
    """
    results = graph.query(query)
    return {
        str(row["class"]): {
            "label": str(row["label"]) if row["label"] else "(no label)",
            "definition": str(row["definition"]) if row["definition"] else "(no definition)",
        }
        for row in results
    }

def find_common_class_iris(classes1, classes2):
    """
    Identifies common class IRIs between two ontologies.
    """
    return set(classes1.keys()).intersection(set(classes2.keys()))

def save_results_to_csv(common_class_iris, classes1, classes2, filename='matching_IRIs.csv'):
    """
    Saves common class IRIs, labels, and definitions to a CSV file and provides a download link.
    """
    df = pd.DataFrame([
        {
            'Class IRI': iri,
            'Label in Ontology 1': classes1[iri]['label'],
            'Label in Ontology 2': classes2[iri]['label'],
            'Definition in Ontology 1': classes1[iri]['definition'],
            'Definition in Ontology 2': classes2[iri]['definition']
        }
        for iri in common_class_iris
    ])
    
    df.to_csv(filename, index=False)
    
    print(f"\n✅ Matching class IRIs have been saved to: {filename}")
    
    # Display in Colab
    display(HTML(df.to_html(index=False)))

    # Provide a download link
    csv_data = df.to_csv(index=False)
    b64 = base64.b64encode(csv_data.encode()).decode()
    href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">Download CSV</a>'
    display(HTML(href))

# --- MAIN EXECUTION ---

print("Choose an option for the first ontology:")
print("1: Provide a URL")
print("2: Upload a file")
option1 = input("Enter 1 or 2: ")

if option1 == "1":
    ontology_url1 = input("Enter the URL of the first ontology: ")
    ontology1 = load_ontology_from_url(ontology_url1)
elif option1 == "2":
    print("Upload the first ontology file:")
    uploaded_files = files.upload()
    ontology_file1 = list(uploaded_files.keys())[0]
    ontology1 = load_ontology_from_file(ontology_file1)
else:
    raise Exception("❌ Invalid selection. Please enter either 1 or 2.")

print("\nChoose an option for the second ontology:")
print("1: Provide a URL")
print("2: Upload a file")
option2 = input("Enter 1 or 2: ")

if option2 == "1":
    ontology_url2 = input("Enter the URL of the second ontology: ")
    ontology2 = load_ontology_from_url(ontology_url2)
elif option2 == "2":
    print("Upload the second ontology file:")
    uploaded_files = files.upload()
    ontology_file2 = list(uploaded_files.keys())[0]
    ontology2 = load_ontology_from_file(ontology_file2)
else:
    raise Exception("❌ Invalid selection. Please enter either 1 or 2.")

# Define base URIs to exclude (e.g., upper-level ontology classes)
excluded_base_uris = [
    "http://purl.obolibrary.org/obo/IAO_",
    "http://purl.obolibrary.org/obo/BFO_"
]

# Extract class details while filtering out excluded base URIs
classes1 = extract_filtered_classes(ontology1, excluded_base_uris)
classes2 = extract_filtered_classes(ontology2, excluded_base_uris)

# Identify common class IRIs
common_class_iris = find_common_class_iris(classes1, classes2)

# Display the number of common class IRIs found
print(f"\n🔍 Total common class IRIs found: {len(common_class_iris)}")

# Save results to CSV, display in Colab, and provide download link
if common_class_iris:
    save_results_to_csv(common_class_iris, classes1, classes2, filename="matching_IRIs.csv")
else:
    print("❌ No common class IRIs found.")


Choose an option for the first ontology:
1: Provide a URL
2: Upload a file


Enter 1 or 2:  1
Enter the URL of the first ontology:  https://raw.githubusercontent.com/HumanBehaviourChangeProject/ontologies/refs/heads/master/Behaviour/bcio_behaviour.owl


✅ Successfully fetched ontology from: https://raw.githubusercontent.com/HumanBehaviourChangeProject/ontologies/refs/heads/master/Behaviour/bcio_behaviour.owl

Choose an option for the second ontology:
1: Provide a URL
2: Upload a file


Enter 1 or 2:  1
Enter the URL of the second ontology:  https://raw.githubusercontent.com/HumanBehaviourChangeProject/ontologies/refs/heads/master/bcio.owl


✅ Successfully fetched ontology from: https://raw.githubusercontent.com/HumanBehaviourChangeProject/ontologies/refs/heads/master/bcio.owl

🔍 Total common class IRIs found: 238

✅ Matching class IRIs have been saved to: matching_IRIs.csv


Class IRI,Label in Ontology 1,Label in Ontology 2,Definition in Ontology 1,Definition in Ontology 2
http://humanbehaviourchange.org/ontology/BCIO_050824,knowledge acquisition about reducing harmful behaviours,knowledge acquisition about reducing harmful behaviours,A knowledge development behaviour where the knowledge is about how to reduce or avoid a harmful behaviour.,A knowledge development behaviour where the knowledge is about how to reduce or avoid a harmful behaviour.
http://humanbehaviourchange.org/ontology/BCIO_050435,behavioural attribute,behavioural attribute,A process attribute of an individual human behaviour.,A process attribute of an individual human behaviour.
http://humanbehaviourchange.org/ontology/BCIO_050361,making a referral to another health care service,making a referral to another health care service,"Providing healthcare that involves directing a person to another healthcare professional or organisation to assess, monitor, improve or maintain an aspect of this person’s health or wellbeing.","Providing healthcare that involves directing a person to another healthcare professional or organisation to assess, monitor, improve or maintain an aspect of this person’s health or wellbeing."
http://humanbehaviourchange.org/ontology/BCIO_050467,low emotional management exertion expended on a behaviour,low emotional management exertion expended on a behaviour,Emotional management exertion expended on a behaviour that is low.,Emotional management exertion expended on a behaviour that is low.
http://humanbehaviourchange.org/ontology/BCIO_050209,individual human behaviour change,individual human behaviour change,A process that results in a difference in enactment of some individual human behaviour or individual human behaviour pattern from what would have been the case otherwise.,A process that results in a difference in enactment of some individual human behaviour or individual human behaviour pattern from what would have been the case otherwise.
http://humanbehaviourchange.org/ontology/BCIO_050424,non-linguistic communication behaviour using body language,non-linguistic communication behaviour using body language,A non-linguistic communication behaviour that involves expressing thoughts or feelings through bodily movement or posture.,A non-linguistic communication behaviour that involves expressing thoughts or feelings through bodily movement or posture.
http://humanbehaviourchange.org/ontology/BCIO_042000,human behaviour,human behaviour,A process that is an individual human behaviour or a population behaviour.,A process that is an individual human behaviour or a population behaviour.
http://humanbehaviourchange.org/ontology/BCIO_050815,emotionally expressive behaviour,emotionally expressive behaviour,An expressive behaviour that conveys some emotion.,An expressive behaviour that conveys some emotion.
http://humanbehaviourchange.org/ontology/BCIO_050827,past occurrence of a target behaviour,past occurrence of a target behaviour,An individual human behaviour that is the target of an intervention and has been previously enacted.,An individual human behaviour that is the target of an intervention and has been previously enacted.
http://humanbehaviourchange.org/ontology/BCIO_050809,coping behaviour,coping behaviour,An individual human behaviour that has the goal to reduce harm or discomfort.,An individual human behaviour that has the goal to reduce harm or discomfort.


## Compare two ontologies and identify synonyms or the same labels where class IRIs are different

In [None]:
# Install necessary packages
!pip install rdflib requests pandas

import io
import base64
import csv
import requests
import os
import rdflib
import pandas as pd
from rdflib import Graph
from google.colab import files
from IPython.display import display, HTML

def load_ontology_from_url(ontology_url):
    """
    Fetches an ontology file from a URL and loads it into an RDF graph.
    """
    response = requests.get(ontology_url)
    if response.status_code == 200:
        graph = Graph()
        graph.parse(data=response.text, format="xml")
        print(f"✅ Successfully fetched ontology from: {ontology_url}")
        return graph
    else:
        raise Exception(f"❌ Error fetching ontology from {ontology_url}: {response.status_code}")

def load_ontology_from_file(file_path):
    """
    Loads an ontology file from a locally uploaded file.
    """
    graph = Graph()
    graph.parse(file_path, format="xml")
    print(f"✅ Successfully loaded ontology from local file: {file_path}")
    return graph

def extract_class_details(graph):
    """
    Extracts class labels, synonyms, and definitions from the ontology.
    """
    query = """
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX iao: <http://purl.obolibrary.org/obo/IAO_>
    PREFIX go: <http://www.geneontology.org/formats/oboInOwl#>

    SELECT ?class ?label ?synonym ?definition
    WHERE {
        ?class a owl:Class .
        OPTIONAL { ?class rdfs:label ?label . }
        OPTIONAL { ?class iao:0000115 ?definition . }
        OPTIONAL { ?class rdfs:definition ?definition . }
        OPTIONAL { ?class rdfs:comment ?definition . }
        OPTIONAL { ?class go:hasBroadSynonym ?synonym . }    
        OPTIONAL { ?class go:hasNarrowSynonym ?synonym . }    
        OPTIONAL { ?class go:hasExactSynonym ?synonym . }    
        OPTIONAL { ?class go:hasRelatedSynonym ?synonym . }    
    }
    """
    results = graph.query(query)
    class_info = {}

    # Store extracted ontology class details in a dictionary
    for row in results:
        class_uri = str(row['class'])
        label = str(row['label']) if row['label'] else None
        synonym = str(row['synonym']) if row['synonym'] else None
        definition = str(row['definition']) if row['definition'] else None

        if class_uri not in class_info:
            class_info[class_uri] = {'label': label, 'synonyms': set(), 'definition': definition}

        # Store synonyms as a lowercase set to ensure case-insensitive matching
        if synonym:
            class_info[class_uri]['synonyms'].add(synonym.lower())

    return class_info

def find_matching_classes(classes1, classes2):
    """
    Compares classes between two ontologies based on labels and synonyms to find matches.
    """
    matches = []
    for uri1, info1 in classes1.items():
        for uri2, info2 in classes2.items():
            if uri1 == uri2:
                continue  # Skip identical class URIs, as they are already matched

            # Check if labels match exactly
            if info1['label'] and info2['label'] and info1['label'].lower() == info2['label'].lower():
                matches.append((uri1, info1['label'], info1['definition'], uri2, info2['label'], info2['definition']))

            # Check if label in one ontology appears as a synonym in the other
            elif info1['label'] and info2['synonyms'] and info1['label'].lower() in info2['synonyms']:
                matches.append((uri1, info1['label'], info1['definition'], uri2, info2['label'], info2['definition']))

            elif info2['label'] and info1['synonyms'] and info2['label'].lower() in info1['synonyms']:
                matches.append((uri1, info1['label'], info1['definition'], uri2, info2['label'], info2['definition']))

    return matches

def save_matches_to_csv(matching_classes, filename='matching_classes.csv'):
    """
    Saves matching classes to a CSV file and provides a download link.
    """
    df = pd.DataFrame(matching_classes, columns=[
        'Ontology 1 Class URI', 'Ontology 1 Class Label', 'Ontology 1 Class Definition',
        'Ontology 2 Class URI', 'Ontology 2 Class Label', 'Ontology 2 Class Definition'
    ])
    
    # Save matches to a CSV file
    df.to_csv(filename, index=False)
    
    print(f"\n✅ Matching classes have been saved to: {filename}")

    # Display the matches as an interactive table in Google Colab
    display(HTML(df.to_html(index=False)))

    # Provide a download link for the CSV file
    csv_data = df.to_csv(index=False)
    b64 = base64.b64encode(csv_data.encode()).decode()
    href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">Download CSV</a>'
    display(HTML(href))

# --- MAIN EXECUTION ---

print("Choose an option for the first ontology:")
print("1: Provide a URL")
print("2: Upload a file")
option1 = input("Enter 1 or 2: ")

if option1 == "1":
    ontology_url1 = input("Enter the URL of the first ontology: ")
    ontology1 = load_ontology_from_url(ontology_url1)
elif option1 == "2":
    print("Upload the first ontology file:")
    uploaded_files = files.upload()
    ontology_file1 = list(uploaded_files.keys())[0]
    ontology1 = load_ontology_from_file(ontology_file1)
else:
    raise Exception("❌ Invalid selection. Please enter either 1 or 2.")

print("\nChoose an option for the second ontology:")
print("1: Provide a URL")
print("2: Upload a file")
option2 = input("Enter 1 or 2: ")

if option2 == "1":
    ontology_url2 = input("Enter the URL of the second ontology: ")
    ontology2 = load_ontology_from_url(ontology_url2)
elif option2 == "2":
    print("Upload the second ontology file:")
    uploaded_files = files.upload()
    ontology_file2 = list(uploaded_files.keys())[0]
    ontology2 = load_ontology_from_file(ontology_file2)
else:
    raise Exception("❌ Invalid selection. Please enter either 1 or 2.")

# Extract class details from both ontologies
classes1 = extract_class_details(ontology1)
classes2 = extract_class_details(ontology2)

# Find matches between ontologies
matching_classes = find_matching_classes(classes1, classes2)

# Display the number of matches found
print(f"\n🔍 Number of matching class labels/synonyms found: {len(matching_classes)}")

# Save results to CSV, display in Colab, and provide download link
save_matches_to_csv(matching_classes)


# find matching definitions for target classes using language model
This script automates the process of identifying semantically similar class definitions between a list of target classes provided in a CSV file and class definitions extracted from an ontology file. It uses a combination of techniques: semantic pre-filtering with cosine similarity and prompt-based evaluation using a LLaMA language model. The output is provided as a longlist for human review. This tool:
✅ Allows users to provide ontology files via URL or file upload
✅ Uses a language model to compare definitions
✅ Applies pre-filtering using cosine similarity
✅ Displays results in a table and provides a download link for a CSV file

In [None]:
import time
import csv
import requests
import base64
import pandas as pd
import torch
from rdflib import Graph
from google.colab import files
from IPython.display import display, HTML
from transformers import pipeline
from huggingface_hub import login

# --- Hugging Face Authentication ---
token = input("🔑 Enter your Hugging Face API Token: ")
login(token)
print("✅ Successfully logged in to Hugging Face!")

# Load the Llama 3 pipeline from Hugging Face API
pipe = pipeline("text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct")

# --- Function to generate text from Hugging Face API ---
def generateFromPrompt(promptStr, maxTokens=50):
    """
    Sends a prompt to Hugging Face's API and returns the generated response.
    """
    response = pipe(promptStr, max_length=maxTokens, temperature=0.7)
    return response[0]['generated_text']

# --- Function to Load Ontology from URL ---
def load_ontology_from_url(url):
    response = requests.get(url)
    if response.status_code == 200:
        g = Graph()
        g.parse(data=response.text, format="xml")
        print(f"✅ Successfully loaded ontology from: {url}")
        return g
    else:
        raise Exception(f"❌ Failed to fetch ontology from {url}")

# --- Function to Load Ontology from Uploaded File ---
def load_ontology_from_file():
    uploaded = files.upload()
    file_path = list(uploaded.keys())[0]
    g = Graph()
    g.parse(file_path, format="xml")
    print(f"✅ Successfully loaded ontology from file: {file_path}")
    return g

# --- Function to Extract Class Details ---
def extract_class_definitions(graph):
    query = """
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX iao: <http://purl.obolibrary.org/obo/IAO_>

    SELECT ?class ?label ?definition
    WHERE {
        ?class a owl:Class .
        OPTIONAL { ?class rdfs:label ?label . }
        OPTIONAL { ?class iao:0000115 ?definition . }
    }
    """
    class_defs = []
    for row in graph.query(query):
        class_defs.append({
            'class_uri': str(row['class']),
            'label': str(row['label']) if row['label'] else None,
            'definition': str(row['definition']) if row['definition'] else "No definition provided"
        })
    return class_defs

# --- Function to Compare Definitions using Llama 3 ---
def process_prompts(prompts):
    total_time = 0
    matches = []
    for prompt, target, compared in prompts:
        response, elapsed_time = generateFromPrompt(prompt)
        total_time += elapsed_time
        if "yes" in response.lower():
            matches.append((target, compared))
    print(f"\n⏳ Total processing time: {total_time:.2f} seconds")
    return matches

# --- Function to Display Results and Provide Download Link ---
def display_results(matches, filename="matching_definitions.csv"):
    df = pd.DataFrame(matches, columns=[
        'Target Class URI', 'Target Class Label', 'Target Class Definition',
        'Matched Class URI', 'Matched Class Label', 'Matched Class Definition'
    ])
    display(HTML(df.to_html(index=False)))

    # Save and create download link
    df.to_csv(filename, index=False)
    csv_data = df.to_csv(index=False)
    b64 = base64.b64encode(csv_data.encode()).decode()
    href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">📥 Download Results</a>'
    display(HTML(href))

# --- Main Execution ---
print("\n🔍 Select Ontology Source:")
print("1: Provide a URL")
print("2: Upload a file")
option1 = input("Enter 1 or 2: ")

if option1 == "1":
    url = input("Enter URL for the Ontology: ")
    ontology = load_ontology_from_url(url)
elif option1 == "2":
    ontology = load_ontology_from_file()
else:
    raise Exception("❌ Invalid option. Please enter 1 or 2.")

# Extract class definitions
defs = extract_class_definitions(ontology)

# --- Option to Upload a CSV File for Target Classes ---
print("\n🔍 Would you like to upload a CSV file with multiple target classes?")
print("1: Yes, upload a CSV")
print("2: No, enter details manually")
option2 = input("Enter 1 or 2: ")

if option2 == "1":
    print("📂 Upload your CSV file (must have columns: 'class_uri', 'label', 'definition'):")
    uploaded_files = files.upload()
    csv_filename = list(uploaded_files.keys())[0]
    target_classes = pd.read_csv(csv_filename).to_dict('records')
else:
    target_classes = [{
        'class_uri': input("Enter IRI of the target class: "),
        'label': input("Enter label of the target class: "),
        'definition': input("Enter definition of the target class: ")
    }]

# Create prompts for Llama 3
prompts = [
    (
        f"Target Label: {target['label']}\nTarget Definition: {target['definition']}\n"
        f"Compared Label: {def_['label']}\nCompared Definition: {def_['definition']}\n"
        "Same meaning? (yes/no)",
        target, def_
    )
    for target in target_classes for def_ in defs
]

# Run Llama 3 comparisons
matches = process_prompts(prompts)

# Display and allow download of results
display_results(matches)
