In [5]:
def save_ontologies_concepts(base_path, res_folder, ontologies):
    """Saves processed ontologies in the form of concpet_id: concept_label.

    Parameters
    ----------
    base_path : str
        Path to where the .owl ontology files are located.

    res_folder : str
        Where to save preprocessed ontologies concepts mapping files. 
    
    ontologies : List[str]
        List of ontologies ids to transform.

    Returns
    ------
    df : DataFrame
        The dataframe containig two columns: id, label of each concept.
    """
    from owlready2 import get_ontology
    import pandas as pd
    import os

    for ont_name in ontologies:
        # Load the ontology file
        ontology = get_ontology(f"{base_path}/{ont_name}.owl").load()
        ids = []
        names = []
        for cls in ontology.classes():
            # Print the class name of concpets with more than 1 names 
            if(len(cls.label)>1):
                print(cls.name)
                print(cls.label)
            for label in cls.label:
                ids.append(cls.name)
                names.append(label)

        df = pd.DataFrame({
            'id': ids, 
            'label': names
        }).drop_duplicates()
        df.to_csv(os.path.join(res_folder, f'{ont_name}_labels.csv'), index=False)
    
    return df

In [6]:
# uncomment to un for all ontologies 
# (ontologies .owl files must be in base_path folder!, for example Go ontology can be downloaded from:
# http://geneontology.org/docs/download-ontology/)
# ontologies = ['chebi', 'cl', 'go', 'mondo', 'mop', 'ncbitaxon', 'pr', 'so', 'uberon']
ontologies = ['go']
base_path = '.' 
res_folder = '../../0.RESULTS/preprocessing/_ontologies_mappings/'

save_ontologies_concepts(base_path, res_folder, ontologies)


Unnamed: 0,id,label
0,GO_0000001,mitochondrion inheritance
1,GO_0048308,organelle inheritance
2,GO_0048311,mitochondrion distribution
3,GO_0000002,mitochondrial genome maintenance
4,GO_0007005,mitochondrion organization
...,...,...
47412,GO_2001312,lysobisphosphatidic acid biosynthetic process
47413,GO_2001313,UDP-4-deoxy-4-formamido-beta-L-arabinopyranose...
47414,GO_2001314,UDP-4-deoxy-4-formamido-beta-L-arabinopyranose...
47415,GO_2001315,UDP-4-deoxy-4-formamido-beta-L-arabinopyranose...
