### Path to Lung Ontology File

In [1]:
LOCAL_ONTOLOGY_FILE = '../lung_ontology.owl'

### Installed Imports

In [2]:
import ontospy

### Load Ontology

In [6]:
onto = ontospy.Ontospy(uri_or_path=LOCAL_ONTOLOGY_FILE, rdf_format='xml')

### Get Ontology Proteins with Synonyms (non-flattened)

In [10]:
sparql_proteins_query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX : <http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#>
SELECT ?p ?p_label WHERE {
    ?p rdfs:subClassOf :Protein .
    ?p rdfs:label ?p_label
}
"""

onto_proteins = onto.query(sparql_proteins_query)

In [11]:
probe_uri_dict = {}

for op in onto_proteins:
    probe_uri_dict[op[1].value] = op[0]

In [12]:
for k in probe_uri_dict.keys():
    print(k)

calcitonin
alpha-actin-2
CD34
ATP-binding_cassette_sub-family_A_member_3
uteroglobin
tubulin_alpha-1A_chain
lymphatic_vessel_endothelial_hyaluronic_acid_receptor_1
endomucin
Vimentin
homeodomain-only_protein
platelet_endothelial_cell_adhesion_molecule
advanced_glycosylation_end_product-specific_receptor
chondroitin_sulfate_proteoglycan_4
histone_H3.3C
homeobox_protein_Nkx-2.1
transcription_factor_SOX-2
transcription_factor_SOX-9
ADP-ribosylation factor-like protein 13B
pulmonary_surfactant-associated_protein_C


### Choose 1 of the Probes and Find Related Cells

In [13]:
sparql_probe_cell_query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX : <http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#>
SELECT ?c WHERE {
    ?c rdfs:subClassOf* :cell . 
    ?c rdfs:subClassOf ?restriction .
    ?restriction owl:onProperty :has_part ; owl:someValuesFrom ?p .
    VALUES ?p { <%s> }
}
""" % probe_uri_dict['transcription_factor_SOX-9']

probe_cells = onto.query(sparql_probe_cell_query)

In [15]:
probe_cells

[(rdflib.term.URIRef('http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#distal_acinar_tubule_epithelial_cell')),
 (rdflib.term.URIRef('http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#immature_type_II_pneumocyte')),
 (rdflib.term.URIRef('http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#unclassified_fibroblast'))]

### Choose 1 of the Cells and Find Related Tissues

In [16]:
sparql_cell_tissue_query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX : <http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#>
SELECT ?t WHERE {
    ?t rdfs:subClassOf* :tissue .
    ?c rdfs:subClassOf ?restriction .
    ?restriction owl:onProperty :part_of ; owl:someValuesFrom ?t .
    VALUES ?c { <%s> }
}
""" % probe_cells[1][0]

cell_tissues = onto.query(sparql_cell_tissue_query)

In [17]:
cell_tissues

[(rdflib.term.URIRef('http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#distal_acinar_tubule_epithelium'))]

### Choose 1 of the Tissues and Find Related Structures

In [18]:
sparql_tissue_structure_query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX : <http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#>
SELECT ?s ?label WHERE {
    ?s rdfs:subClassOf* :complex_anatomical_structure .
    ?s :lungmap_preferred_label ?label . 
    ?t rdfs:subClassOf ?restriction .
    ?restriction owl:onProperty :part_of ; owl:someValuesFrom ?s .
    VALUES ?t { <%s> }
}
""" % cell_tissues[0][0]

tissue_structures = onto.query(sparql_tissue_structure_query)

In [19]:
tissue_structures

[(rdflib.term.URIRef('http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#distal_acinar_tubule_bud'),
  rdflib.term.Literal('distal acinar tubule bud'))]

In [20]:
s = tissue_structures[0][1]

In [21]:
s.value

'distal acinar tubule bud'

### Success with a single protein! Now putting it all together for all of them.

We successfully mapped the SOX-9 protein to the distal acinar tubule bud structure. Now we'll construct our functions using the queries we've built and chain them together to build a complete dictionary with probe keys and lists of structures

In [22]:
def get_onto_proteins(): 
    sparql_proteins_query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX : <http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#>
SELECT ?p ?p_label WHERE {
    ?p rdfs:subClassOf :Protein .
    ?p rdfs:label ?p_label
}
"""

    results = onto.query(sparql_proteins_query)
    
    return results

def get_onto_cells_by_protein(protein_uri):
    sparql_protein_cell_query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX : <http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#>
SELECT ?c WHERE {
    ?c rdfs:subClassOf* :cell . 
    ?c rdfs:subClassOf ?restriction .
    ?restriction owl:onProperty :has_part ; owl:someValuesFrom ?p .
    VALUES ?p { <%s> }
}
""" % protein_uri

    results = onto.query(sparql_protein_cell_query)
    
    return results

def get_onto_tissues_by_cell(cell_uri):
    sparql_cell_tissue_query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX : <http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#>
SELECT ?t WHERE {
    ?t rdfs:subClassOf* :tissue .
    ?c rdfs:subClassOf ?restriction .
    ?restriction owl:onProperty :part_of ; owl:someValuesFrom ?t .
    VALUES ?c { <%s> }
}
""" % cell_uri

    results = onto.query(sparql_cell_tissue_query)
    
    return results

def get_onto_structures_by_related_uri(uri):
    sparql_tissue_structure_query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX : <http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#>
SELECT ?s ?label WHERE {
    ?s rdfs:subClassOf* :complex_anatomical_structure .
    ?s :lungmap_preferred_label ?label . 
    ?t rdfs:subClassOf ?restriction .
    ?restriction owl:onProperty :part_of ; owl:someValuesFrom ?s .
    VALUES ?t { <%s> }
}
""" % uri

    results = onto.query(sparql_tissue_structure_query)
    
    return results

def get_onto_sub_classes(uri):
    sparql_subclass_query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX : <http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#>
SELECT ?sub WHERE {
    ?sub rdfs:subClassOf ?uri . 
    VALUES ?uri { <%s> }
}
""" % uri

    results = onto.query(sparql_subclass_query)
    
    return results

In [23]:
onto_proteins = get_onto_proteins()

probe_uri_dict = {}

for op in onto_proteins:
    probe_uri_dict[op[1].value] = op[0]

In [24]:
probe_structure_dict = {}

In [25]:
for label, protein_uri in probe_uri_dict.items():
    
    print(label)
    
    cells = get_onto_cells_by_protein(protein_uri)
    
    sub_cells = []
    
    for cell in cells:
        sub_cells.extend(get_onto_sub_classes(cell[0]))
        
    cells.extend(sub_cells)
    
    for cell in cells:
        print('\t', cell[0].split('#')[1], len(cell))
        
        # first check if the cell is directly related to a structure
        structures = get_onto_structures_by_related_uri(cell[0])
        
        if len(structures) > 0:
            for structure in structures:
                print('\t\t\t', structure[0].split('#')[1], structure[1], len(structure))
                
                if label not in probe_structure_dict.keys():
                    probe_structure_dict[label] = set()
                
                probe_structure_dict[label].add(structure[1].value)            
        
        tissues = get_onto_tissues_by_cell(cell[0])
        
        sub_tissues = []
        
        for tissue in tissues:
            sub_tissues.extend(get_onto_sub_classes(tissue[0]))
        
        tissues.extend(sub_tissues)
        
        for tissue in tissues:
            print('\t\t', tissue[0].split('#')[1], len(tissue))
            
            structures = get_onto_structures_by_related_uri(tissue[0])
            
            for structure in structures:
                print('\t\t\t', structure[0].split('#')[1], structure[1], len(structure))
                
                if len(structure) > 1:
                    print(structure)
                
                if label not in probe_structure_dict.keys():
                    probe_structure_dict[label] = set()
                
                probe_structure_dict[label].add(structure[1].value)

calcitonin
	 lung_neuroendocrine_cell 1
	 Kultschitzky_cell_of_bronchiole 1
alpha-actin-2
	 myofibroblast 1
	 smooth_muscle_cell 1
	 bronchiolar-associated_smooth_muscle_cell 1
		 epithelium_of_bronchiole 1
			 bronchiole bronchiole 2
(rdflib.term.URIRef('http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#bronchiole'), rdflib.term.Literal('bronchiole'))
		 terminal_bronchiole_epithelium 1
		 epithelium_of_respiratory_bronchiole 1
	 vascular_associated_smooth_muscle_cell 1
			 respiratory_system_blood_vessel respiratory system blood vessel 2
		 arterial_smooth_muscle 1
CD34
	 endothelial_cell 1
	 endothelial_cell_of_vascular_tree 1
		 vascular_tree_endothelium 1
ATP-binding_cassette_sub-family_A_member_3
	 immature_type_II_pneumocyte 1
			 neuroepithelial_body neuroepithelial body 2
		 distal_acinar_tubule_epithelium 1
			 distal_acinar_tubule_bud distal acinar tubule bud 2
(rdflib.term.URIRef('http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#

		 distal_acinar_tubule_epithelium 1
			 distal_acinar_tubule_bud distal acinar tubule bud 2
(rdflib.term.URIRef('http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#distal_acinar_tubule_bud'), rdflib.term.Literal('distal acinar tubule bud'))
ADP-ribosylation factor-like protein 13B
	 bronchiolar-associated_smooth_muscle_cell 1
		 epithelium_of_bronchiole 1
			 bronchiole bronchiole 2
(rdflib.term.URIRef('http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#bronchiole'), rdflib.term.Literal('bronchiole'))
		 terminal_bronchiole_epithelium 1
		 epithelium_of_respiratory_bronchiole 1
	 epithelial_cell_of_the_lung 1
		 lung_epithelium 1
		 acinar_tubule_epithelium 1
		 epithelium_of_bronchiole 1
			 bronchiole bronchiole 2
(rdflib.term.URIRef('http://www.semanticweb.org/am175/ontologies/2017/1/untitled-ontology-79#bronchiole'), rdflib.term.Literal('bronchiole'))
	 acinar_epithelial_cell 1
		 acinar_tubule_epithelium 1
		 distal_acinar_tubule_epitheli

In [26]:
for k in sorted(probe_structure_dict.keys()):
    print(k)
    
    for structure in sorted(probe_structure_dict[k]):
        print("\t%s" % (structure,))

ADP-ribosylation factor-like protein 13B
	bronchial artery
	bronchiole
	distal acinar tubule bud
	neuroepithelial body
	proximal acinar tubule
ATP-binding_cassette_sub-family_A_member_3
	distal acinar tubule bud
	neuroepithelial body
advanced_glycosylation_end_product-specific_receptor
	proximal acinar tubule
alpha-actin-2
	bronchiole
	respiratory system blood vessel
histone_H3.3C
	bronchiole
	distal acinar tubule bud
homeobox_protein_Nkx-2.1
	bronchial artery
	bronchiole
	distal acinar tubule bud
	neuroepithelial body
	proximal acinar tubule
homeodomain-only_protein
	bronchial artery
	proximal acinar tubule
lymphatic_vessel_endothelial_hyaluronic_acid_receptor_1
	pulmonary lymphatic vessel
pulmonary_surfactant-associated_protein_C
	bronchial artery
	distal acinar tubule bud
	neuroepithelial body
	proximal acinar tubule
transcription_factor_SOX-2
	bronchiole
transcription_factor_SOX-9
	distal acinar tubule bud
	neuroepithelial body
uteroglobin
	bronchiole
