## Preamble

In [8]:
import matplotlib.pyplot as plt
import networkx as nx
from owlready2 import get_ontology, Thing, IRIS

## Load ontologies

In [9]:
# Load the ontologies
from ontologies_loader import envo
onto = get_ontology(envo.file_path).load()

In [10]:
# For the envo ontology the class of interest was "environmental system process"
env_sys_process_iri="http://purl.obolibrary.org/obo/ENVO_02500000"  # This root class for environmental system process, which is the class that we are interested in.

# Testing that local copy of envo gets the same class as calling the class using IRIS dictionary lookup
env_sys_process_iris = IRIS[env_sys_process_iri]
env_sys_process = onto.search(iri=env_sys_process_iri)[0]

print(env_sys_process_iris.label)
print(env_sys_process.label)

if env_sys_process_iris == env_sys_process:
    print("Local copy corresponding to IRIS lookup")
    print(f"environment system process class can be accessed at env_sys_process. Type: {(type(env_sys_process))}")

[locstr('environmental system process', 'en')]
[locstr('environmental system process', 'en')]
Local copy corresponding to IRIS lookup
environment system process class can be accessed at env_sys_process. Type: <class 'owlready2.entity.ThingClass'>


## Get unique terms from environment system process class
Ontologies can contain the same term in many places, as something can be a subclass of multiple things.

### Function to get subclasses

In [11]:
# Adapted from Cell Line Ontology term extraction in https://colab.research.google.com/drive/1JJnBhsSkdNFFcvML1h97tpnwEyK0Pqmz#scrollTo=siZ3ddILxXJq. Accessed 20240810

from owlready2 import ThingClass
import pandas as pd

def extract_subclasses_with_details(ontology_class, expected_class_iri=None):
    # Check if ontology_class is an instance of owlready2.entity.ThingClass
    if isinstance(ontology_class, ThingClass):
        root_cls = ontology_class
    else:
        raise TypeError(f"Expected an instance of owlready2.entity.ThingClass, got {type(ontology_class)}")

    if expected_class_iri is not None:
        # Check if the provided IRI matches the class's IRI
        if expected_class_iri != root_cls.iri:
            raise ValueError(f"Expected class IRI '{expected_class_iri}', but got '{root_cls.iri}'")

     # Lists to store subclass details
    subclasses = []
    labels = []
    see_also_values = []

    # Recursive function to get all subclasses and their details
    def get_subclasses(root_cls):
        for subclass in root_cls.subclasses():
            # Collect subclass details
            subclass_iri = subclass.iri
            label = subclass.label.first() if subclass.label else None
            see_also = subclass.seeAlso

            # Append subclass details to lists
            subclasses.append(subclass_iri)
            labels.append(label)
            see_also_values.append('|'.join(map(str, see_also)))

            # Recursive call to process subclasses
            get_subclasses(subclass)

    # Start the recursive function
    get_subclasses(root_cls)

    # Create DataFrame
    df = pd.DataFrame({
        'owl:Class': subclasses,
        'rdfs:label': labels,
        'rdfs:seeAlso': see_also_values,
    })

    return df


subclasses_df = extract_subclasses_with_details(env_sys_process, "http://purl.obolibrary.org/obo/ENVO_02500000")


### Process Entities

In [12]:
# Drop duplicates from the DataFrame
subclasses_df = subclasses_df.drop_duplicates()

# Sort by label for ease of data inspection
subclasses_df = subclasses_df.sort_values('rdfs:label')

subclasses_df

Unnamed: 0,owl:Class,rdfs:label,rdfs:seeAlso
507,http://purl.obolibrary.org/obo/ENVO_03000135,NSIDC blizzard,
506,http://purl.obolibrary.org/obo/ENVO_03000134,WMO blizzard,
624,http://purl.obolibrary.org/obo/ENVO_21001217,X-ray radiation,
600,http://purl.obolibrary.org/obo/ENVO_01001217,X-ray stellar radiation,
370,http://purl.obolibrary.org/obo/ENVO_01001394,accumulation of matter on aerosolised particles,
...,...,...,...
43,http://purl.obolibrary.org/obo/ENVO_01000787,wildfire,
333,http://purl.obolibrary.org/obo/ENVO_01001860,wind gust,
516,http://purl.obolibrary.org/obo/ENVO_01001749,wind storm,
508,http://purl.obolibrary.org/obo/ENVO_03000136,winter blizzard,


### Export results 

In [15]:
from config_loader import cfg
from pathlib import Path

# Construct the paths using pathlib
onto_path = Path(cfg.DATA_DIR) / "raw" / "ontologies" / "envo.owl"
output_file_path = Path(cfg.RESULTS_DIR) / "ontologies" / "envo_env_sys_process_unique_entities.tsv"

In [16]:
# Save the DataFrame to a TSV file
subclasses_df.to_csv(output_file_path, sep='\t', index=False)