## Loading EDAM into an RDFlib graph

In [1]:
from rdflib import ConjunctiveGraph

Here we initialize the graph. 

In [2]:
kg = ConjunctiveGraph()

def print_size():
    print(f"The knowledge graph has {len(kg)} triples")

Here we load the EDAM ontology into the graph. 

In [3]:
#kg.parse('http://edamontology.org/EDAM.owl', format='xml')
#print_size()
edam_version = 'https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl'
kg = ConjunctiveGraph()
kg.parse(edam_version, format='xml')
#kg.bind('edam', Namespace('http://edamontology.org/'))
print(str(len(kg)) + ' triples in the EDAM triple store')


38279 triples in the EDAM triple store


In [4]:
kg.serialize("edam.json", format="json-ld")
kg.serialize("edam.ttl", format="turtle")

<Graph identifier=N561854ad78394981a29188bdf9e484fb (<class 'rdflib.graph.ConjunctiveGraph'>)>

In [5]:
# a single function to load EDAM and get the graph object as a result
def load_EDAM():
    g = ConjunctiveGraph()
    g.parse(edam_version, format='xml')
    return g

G = load_EDAM()
print(len(G))

38279


## Listing the 100 first triples  

In [6]:
i = 0

for subject,predicate,obj in kg:
    print(f'({subject}, {predicate}, {obj})')
    i+=1
    
    if i > 99:
        break
    

(http://edamontology.org/data_0874, http://www.geneontology.org/formats/oboInOwl#inSubset, http://edamontology.org/data)
(http://edamontology.org/format_1980, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://www.w3.org/2002/07/owl#Class)
(http://edamontology.org/topic_3415, http://edamontology.org/isdebtag, true)
(http://edamontology.org/operation_0004, http://www.w3.org/2002/07/owl#disjointWith, http://edamontology.org/topic_0003)
(http://edamontology.org/topic_2828, http://www.w3.org/2000/01/rdf-schema#label, X-ray diffraction)
(http://edamontology.org/operation_3431, http://www.geneontology.org/formats/oboInOwl#hasExactSynonym, Database submission)
(http://edamontology.org/format_2182, http://www.geneontology.org/formats/oboInOwl#hasDefinition, A text format resembling FASTQ short read format.)
(http://edamontology.org/data_1656, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://www.w3.org/2002/07/owl#Class)
(http://edamontology.org/data_2198, http://www.w3.org/1999/02/

In [7]:
from rdflib.namespace import RDF, RDFS, OWL 

i = 0


for s in kg.triples((None, RDF.type, OWL.Class)):
    for label in kg.triples((s, RDFS.label, None)):
        print(kg.value(s, RDFS.label))
        i +=1
    
    if i > 99:
        break



## Evaluating SPARQL queries for dashboard
Aim: getting topics that have no wikipedia url in their properties.

In [8]:
query = """
PREFIX edam: <http://edamontology.org/>

SELECT (count(?term) as ?nb_no_wikipedia) WHERE {
    ?c rdfs:subClassOf+ edam:topic_0003 ;
                rdfs:label ?term .
        FILTER NOT EXISTS {
        ?c rdfs:seeAlso ?seealso .
        FILTER (regex(str(?seealso), "wikipedia.org", "i"))
       } .
}
"""

results = kg.query(query)

for r in results :
    print(f"There are {r['nb_no_wikipedia']} topics without a URL (seeAlso property).") 

There are 14 topics without a URL (seeAlso property).


In [10]:
query = """
PREFIX edam: <http://edamontology.org/>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>


SELECT ?term WHERE {
    ?c rdfs:subClassOf+ edam:topic_0003 ;
                rdfs:label ?term .
        FILTER NOT EXISTS {
        ?c rdfs:seeAlso ?seealso .
        FILTER (regex(str(?seealso), "wikipedia.org", "i"))
       } .
}
"""
results = kg.query(query)


for r in results :
    print(f"Topic '{r['term']}' has no wikipedia link ('seeAlso' property).") 

Topic 'Literature and language' has no wikipedia link ('seeAlso' property).
Topic 'Data submission, annotation, and curation' has no wikipedia link ('seeAlso' property).
Topic 'Data identity and mapping' has no wikipedia link ('seeAlso' property).
Topic 'Genome resequencing' has no wikipedia link ('seeAlso' property).
Topic 'Simulation experiment' has no wikipedia link ('seeAlso' property).
Topic 'Nucleic acid sites, features and motifs' has no wikipedia link ('seeAlso' property).
Topic 'Protein properties' has no wikipedia link ('seeAlso' property).
Topic 'Protein sites, features and motifs' has no wikipedia link ('seeAlso' property).
Topic 'Sequence composition, complexity and repeats' has no wikipedia link ('seeAlso' property).
Topic 'Probes and primers' has no wikipedia link ('seeAlso' property).
Topic 'Sequence sites, features and motifs' has no wikipedia link ('seeAlso' property).
Topic 'Biomolecular simulation' has no wikipedia link ('seeAlso' property).
Topic 'Biotherapeutics' 

In [15]:
import os

query = os.path.dirname(".")  + "../queries/no_wikipedia_link_topic.rq"

with open(query, "r") as f:
    query = f.read()
    
    results = kg.query(query)
    nb_err = len(results)
f.close()

print(nb_err)

for r in results :
    print(f"Topic '{r['term']}' has no wikipedia link ('seeAlso' property).") 

14
Topic 'Literature and language' has no wikipedia link ('seeAlso' property).
Topic 'Data submission, annotation, and curation' has no wikipedia link ('seeAlso' property).
Topic 'Data identity and mapping' has no wikipedia link ('seeAlso' property).
Topic 'Genome resequencing' has no wikipedia link ('seeAlso' property).
Topic 'Simulation experiment' has no wikipedia link ('seeAlso' property).
Topic 'Nucleic acid sites, features and motifs' has no wikipedia link ('seeAlso' property).
Topic 'Protein properties' has no wikipedia link ('seeAlso' property).
Topic 'Protein sites, features and motifs' has no wikipedia link ('seeAlso' property).
Topic 'Sequence composition, complexity and repeats' has no wikipedia link ('seeAlso' property).
Topic 'Probes and primers' has no wikipedia link ('seeAlso' property).
Topic 'Sequence sites, features and motifs' has no wikipedia link ('seeAlso' property).
Topic 'Biomolecular simulation' has no wikipedia link ('seeAlso' property).
Topic 'Biotherapeutic

In [ ]:
query = os.path.dirname(".")  + "../queries/no_wikipedia_link_topic.rq"

with open(query, "r") as f:
    query = f.read()
    
    results = kg.query(query)
    nb_err = len(results)
f.close()

print(nb_err)

for r in results :
    print(f"Topic '{r['term']}' has no wikipedia link ('seeAlso' property).") 

In [18]:
## Query for synonyms
query = """
PREFIX edam: <http://edamontology.org/>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>


SELECT ?term ?c WHERE {
    ?c rdfs:subClassOf+ edam:topic_0003 ;
                rdfs:label ?term .
        FILTER NOT EXISTS {
        ?c oboInOwl:hasBroadSynonym  ?hasBroadSynonym .
       } .
}
"""
results = kg.query(query)

for r in results :
    print(f"Topic '{r['term']}' has no broad synonym") 

Topic 'Informatics' has no broad synonym
Topic 'Ontology and terminology' has no broad synonym
Topic 'Bioinformatics' has no broad synonym
Topic 'Laboratory information management' has no broad synonym
Topic 'Cheminformatics' has no broad synonym
Topic 'Chemometrics' has no broad synonym
Topic 'Medical informatics' has no broad synonym
Topic 'Immunoinformatics' has no broad synonym
Topic 'Literature and language' has no broad synonym
Topic 'Natural language processing' has no broad synonym
Topic 'Data management' has no broad synonym
Topic 'Data submission, annotation, and curation' has no broad synonym
Topic 'Workflows' has no broad synonym
Topic 'Data acquisition' has no broad synonym
Topic 'Data security' has no broad synonym
Topic 'Data identity and mapping' has no broad synonym
Topic 'Data architecture, analysis and design' has no broad synonym
Topic 'Data integration and warehousing' has no broad synonym
Topic 'Data governance' has no broad synonym
Topic 'Data quality management'

In [38]:
## Query for topics with no definition (ERROR level)
query = """
PREFIX edam: <http://edamontology.org/>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>

SELECT ?term ?concept WHERE {
    ?concept rdfs:subClassOf+ edam:topic_0003 ;
                rdfs:label ?term .
                
    FILTER NOT EXISTS {
        ?concept oboInOwl:hasDefinition ?def 
    } .
}
"""
results = kg.query(query)

for r in results :
    print(f"Topic '{r['term']}' and '{r['concept']}' has no def ") 

In [42]:
## Query for format with no definition (ERROR level)
query = """
PREFIX edam: <http://edamontology.org/>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>

SELECT ?term ?concept ?def WHERE {
    ?concept rdfs:subClassOf+ edam:format_1915 ;
                rdfs:label ?term ;
                
         oboInOwl:hasDefinition ?def .

}
"""
results = kg.query(query)

for r in results :
    print(f"Format term  '{r['term']}' has  def '{r['def']}' ") 

Data term  'Textual format' has  def 'Textual format.' 
Data term  'SMILES' has  def 'Chemical structure specified in Simplified Molecular Input Line Entry System (SMILES) line notation.' 
Data term  'smarts' has  def 'SMILES ARbitrary Target Specification (SMARTS) format for chemical structure specification, which is a subset of the SMILES line notation.' 
Data term  'InChI' has  def 'Chemical structure specified in IUPAC International Chemical Identifier (InChI) line notation.' 
Data term  'mf' has  def 'Chemical structure specified by Molecular Formula (MF), including a count of each element in a compound.' 
Data term  'InChIKey' has  def 'The InChIKey (hashed InChI) is a fixed length (25 character) condensed digital representation of an InChI chemical structure specification. It uniquely identifies a chemical compound.' 
Data term  'nucleotide' has  def 'Alphabet for a nucleotide sequence with possible ambiguity, unknown positions and non-sequence characters.' 
Data term  'pure nuc

In [57]:
## Query for hybrid ID with no regex (WARNING level) (does this apply to all sorts of IDs?)
query = """
PREFIX edam: <http://edamontology.org/>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>

SELECT ?term ?concept ?regex WHERE {
    ?concept rdfs:subClassOf+ edam:data_2109 ;
            rdfs:label ?term .
                
    FILTER NOT EXISTS {               
        ?concept edam:regex ?regex 
        } .

}
"""
results = kg.query(query)

for r in results :
    print(f"ID hybrid  '{r['concept']}'  '{r['term']}' has  regex '{r['regex']}' ") 

ID hybrid  'http://edamontology.org/data_1036'  'TIGR identifier' has  regex 'None' 
ID hybrid  'http://edamontology.org/data_2112'  'FlyBase primary identifier' has  regex 'None' 
ID hybrid  'http://edamontology.org/data_2174'  'FlyBase secondary identifier' has  regex 'None' 
ID hybrid  'http://edamontology.org/data_1154'  'KEGG object identifier' has  regex 'None' 
ID hybrid  'http://edamontology.org/data_2102'  'KEGG organism code' has  regex 'None' 
ID hybrid  'http://edamontology.org/data_1891'  'iHOP symbol' has  regex 'None' 
ID hybrid  'http://edamontology.org/data_2104'  'BioCyc ID' has  regex 'None' 
ID hybrid  'http://edamontology.org/data_1157'  'Pathway ID (BioCyc)' has  regex 'None' 
ID hybrid  'http://edamontology.org/data_2105'  'Compound ID (BioCyc)' has  regex 'None' 
ID hybrid  'http://edamontology.org/data_2106'  'Reaction ID (BioCyc)' has  regex 'None' 
ID hybrid  'http://edamontology.org/data_2107'  'Enzyme ID (BioCyc)' has  regex 'None' 
ID hybrid  'http://edamo

## Queries from Caseologue

In [10]:
query = """
PREFIX edam: <http://edamontology.org/>

SELECT * WHERE {
    ?x ?property ?value .
    VALUES ?x {edam:topic_0622}
}
"""

results = kg.query(query)

for r in results :
    print(f"prop '{r['property']}' has value: '{r['value']}'.") 

prop 'http://www.w3.org/2000/01/rdf-schema#seeAlso' has value: 'https://en.wikipedia.org/wiki/Genomics'.
prop 'http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym' has value: 'Whole genomes'.
prop 'http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym' has value: 'Genome annotation'.
prop 'http://www.w3.org/2000/01/rdf-schema#subClassOf' has value: 'http://edamontology.org/topic_3391'.
prop 'http://www.geneontology.org/formats/oboInOwl#inSubset' has value: 'http://edamontology.org/topics'.
prop 'http://edamontology.org/created_in' has value: 'beta12orEarlier'.
prop 'http://www.w3.org/2000/01/rdf-schema#seeAlso' has value: 'http://purl.bioontology.org/ontology/MSH/D023281'.
prop 'http://www.geneontology.org/formats/oboInOwl#inSubset' has value: 'http://edamontology.org/bio'.
prop 'http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym' has value: 'Exomes'.
prop 'http://www.geneontology.org/formats/oboInOwl#hasDefinition' has value: 'Whole genomes of one or more 

In [11]:
query = """
PREFIX edam: <http://edamontology.org/>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>


SELECT ?term ?def WHERE {
    ?c rdfs:subClassOf+ edam:topic_0003 ;
       rdfs:label ?term ;
    OPTIONAL {?c oboInOwl:hasDefinition ?def} .

    FILTER NOT EXISTS {
        #?c rdfs:seeAlso [] .
        ?c oboInOwl:hasDefinition [] .
    } .
}
"""

results = kg.query(query)

for r in results :
    print(f"Topic '{r['term']}' has  def  '{r['def']}'.") 

In [12]:
query = """
PREFIX edam: <http://edamontology.org/>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>


SELECT ?term ?syno WHERE {
    ?c rdfs:subClassOf+ edam:topic_0003 ;
       rdfs:label ?term ;
    OPTIONAL {?c oboInOwl:hasExactSynonym ?syno} .

    FILTER NOT EXISTS {
        ?c oboInOwl:hasExactSynonym [] .
    } .
}
"""

results = kg.query(query)

#for r in results :
    #print(f"Topic '{r['term']}' has  no exactSynonym.") 

Topic 'Ontology and terminology' has  no exactSynonym.
Topic 'Bioinformatics' has  no exactSynonym.
Topic 'Laboratory information management' has  no exactSynonym.
Topic 'Chemometrics' has  no exactSynonym.
Topic 'Database management' has  no exactSynonym.
Topic 'Data management' has  no exactSynonym.
Topic 'Data submission, annotation, and curation' has  no exactSynonym.
Topic 'Data identity and mapping' has  no exactSynonym.
Topic 'Data architecture, analysis and design' has  no exactSynonym.
Topic 'Data integration and warehousing' has  no exactSynonym.
Topic 'Data governance' has  no exactSynonym.
Topic 'Data quality management' has  no exactSynonym.
Topic 'Data rescue' has  no exactSynonym.
Topic 'Chemistry' has  no exactSynonym.
Topic 'Microfluidics' has  no exactSynonym.
Topic 'Computational chemistry' has  no exactSynonym.
Topic 'Drug discovery' has  no exactSynonym.
Topic 'Compound libraries and screening' has  no exactSynonym.
Topic 'Analytical chemistry' has  no exactSynonym

Queries from Caseologue

In [11]:
query="""
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT ?entity ?label WHERE {
    
    ?entity a owl:Class .
    ?entity rdfs:label ?label .
    FILTER NOT EXISTS { ?entity owl:deprecated true }
                 
    FILTER NOT EXISTS {
        FILTER REGEX(str(?entity), "^http://edamontology.org/(data|topic|operation|format)_[0-9]{4}$")
    }
    FILTER ( ?entity != <http://www.geneontology.org/formats/oboInOwl#ObsoleteClass>)

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [12]:
#Query aim = “check for missing wikipedia link in topic ”

query="""
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT ?entity ?label ?seealso WHERE {
    ?entity rdfs:subClassOf+ <http://edamontology.org/topic_0003> .
    ?entity rdfs:label ?label .
    
    FILTER NOT EXISTS {
        ?entity rdfs:seeAlso ?seealso .
        FILTER (regex(str(?seealso), "wikipedia", "i")) .
    }
}
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

Entity 'http://edamontology.org/topic_3068' - Label 'Literature and language'
Entity 'http://edamontology.org/topic_0219' - Label 'Data submission, annotation, and curation'
Entity 'http://edamontology.org/topic_3345' - Label 'Data identity and mapping'
Entity 'http://edamontology.org/topic_3923' - Label 'Genome resequencing'
Entity 'http://edamontology.org/topic_3524' - Label 'Simulation experiment'
Entity 'http://edamontology.org/topic_3511' - Label 'Nucleic acid sites, features and motifs'
Entity 'http://edamontology.org/topic_0123' - Label 'Protein properties'
Entity 'http://edamontology.org/topic_3510' - Label 'Protein sites, features and motifs'
Entity 'http://edamontology.org/topic_0157' - Label 'Sequence composition, complexity and repeats'
Entity 'http://edamontology.org/topic_0632' - Label 'Probes and primers'
Entity 'http://edamontology.org/topic_0160' - Label 'Sequence sites, features and motifs'
Entity 'http://edamontology.org/topic_3892' - Label 'Biomolecular simulation'


In [14]:
query="""
SELECT DISTINCT ?entity ?label WHERE {

    ?entity a owl:Class;
            owl:deprecated true .
    FILTER NOT EXISTS { ?entity oboInOwl:replacedBy|oboInOwl:consider ?repl1 .} 
    ?entity rdfs:label ?label .
}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

Entity 'http://edamontology.org/data_0005' - Label 'Resource type'
Entity 'http://edamontology.org/data_0007' - Label 'Tool'
Entity 'http://edamontology.org/data_0581' - Label 'Database'
Entity 'http://edamontology.org/data_0583' - Label 'Directory metadata'
Entity 'http://edamontology.org/data_0831' - Label 'MeSH vocabulary'
Entity 'http://edamontology.org/data_0832' - Label 'HGNC vocabulary'
Entity 'http://edamontology.org/data_0835' - Label 'UMLS vocabulary'
Entity 'http://edamontology.org/data_0843' - Label 'Database entry'
Entity 'http://edamontology.org/data_0848' - Label 'Raw sequence'
Entity 'http://edamontology.org/data_0851' - Label 'Sequence mask character'
Entity 'http://edamontology.org/data_0852' - Label 'Sequence mask type'
Entity 'http://edamontology.org/data_0853' - Label 'DNA sense specification'
Entity 'http://edamontology.org/data_0854' - Label 'Sequence length specification'
Entity 'http://edamontology.org/data_0855' - Label 'Sequence metadata'
Entity 'http://edamo

In [15]:
query="""
SELECT DISTINCT ?entity ?label ?property ?replacement WHERE {
    VALUES ?property { oboInOwl:replacedBy
                       oboInOwl:consider      }
    ?entity  ?property ?replacement .
    ?entity rdfs:label ?label .
    ?replacement owl:deprecated true .
    
}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

Entity 'http://edamontology.org/data_2539' - Label 'Alignment data'
Entity 'http://edamontology.org/topic_0747' - Label 'Nucleic acid sites and features'
Entity 'http://edamontology.org/topic_0748' - Label 'Protein sites and features'
Entity 'http://edamontology.org/topic_0751' - Label 'Phosphorylation sites'
Entity 'http://edamontology.org/topic_2280' - Label 'Nucleic acid structure comparison'


In [16]:
query="""
SELECT DISTINCT ?entity ?label ?property ?entity2 ?label2 ?property2 ?value WHERE { 
    VALUES ?property {
                      oboInOwl:hasDefinition
                      oboInOwl:hasExactSynonym
                      oboInOwl:hasNarrowSynonym
                      #oboInOwl:hasBroadSynonym
                      #rdfs:comment
                      rdfs:label
                        }
    VALUES ?property2 {
                      oboInOwl:hasDefinition
                      oboInOwl:hasExactSynonym
                      oboInOwl:hasNarrowSynonym
                      #oboInOwl:hasBroadSynonym
                      #rdfs:comment
                      #rdfs:label
                        }
        
    
    ?entity a owl:Class . 
    ?entity2 a owl:Class . 
    ?entity ?property ?value .
    
    ?entity2 ?property2 ?value .

    #?entity2 ?property2 ?value2 .    
    #FILTER ( UCASE(str(?value)) = UCASE(str(?value2)) )      
    
    FILTER (?entity != ?entity2)
    ?entity rdfs:label ?label .
    ?entity2 rdfs:label ?label2 .
    
}
ORDER BY ?value
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x105d86b60>>
Traceback (most recent call last):
  File "/Users/rioualen/mambaforge/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 770, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


KeyboardInterrupt: 

In [18]:
query="""
PREFIX edam:<http://edamontology.org/>

SELECT DISTINCT ?entity ?property ?property2 ?value ?label WHERE {
    VALUES ?property {oboInOwl:hasDefinition
                      oboInOwl:hasExactSynonym
                      oboInOwl:hasNarrowSynonym
                      oboInOwl:hasBroadSynonym
                      rdfs:comment
                      rdfs:label
                      edam:documentation
                        }
    VALUES ?property2 {oboInOwl:hasDefinition
                      oboInOwl:hasExactSynonym
                      oboInOwl:hasNarrowSynonym
                      oboInOwl:hasBroadSynonym
                      rdfs:comment
                      rdfs:label
                      edam:documentation
                        }

    ?entity a owl:Class . 
    ?entity ?property ?value .
    ?entity ?property2 ?value2 .
#    FILTER (?value = ?value2)
    FILTER ( UCASE(str(?value)) = UCASE(str(?value2)) )

    FILTER (?property != ?property2)    
    ?entity rdfs:label ?label .
  
}
ORDER BY ?value
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [22]:
query="""
SELECT DISTINCT ?entity ?property ?value ?label WHERE {
?entity a owl:Class .
?entity ?property ?value .    
    FILTER REGEX (?value, "^(.{0})$").
?entity rdfs:label ?label
}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [20]:
query="""
SELECT DISTINCT ?entity ?property ?def ?label WHERE {
   VALUES ?property {oboInOwl:hasDefinition}   
   ?entity ?property ?def .
   ?entity rdfs:label ?label .

   FILTER NOT EXISTS {
       FILTER REGEX(str(?def), "['.']+$")
   }
   FILTER (!isBlank(?def))

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [21]:
query="""
SELECT DISTINCT ?entity ?property ?value ?label WHERE {
   VALUES ?property {rdfs:label}   
   ?entity ?property ?value .
   ?entity rdfs:label ?label .

   FILTER REGEX(str(?value), "['.']+$")
   FILTER (!isBlank(?entity))

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [26]:
query="""
SELECT DISTINCT ?entity ?property ?value ?label WHERE {
   ?entity ?property ?value .
   ?entity rdfs:label ?label .

   #FILTER REGEX(str(?value), "[\\s\r\n]+$")
   #FILTER (!isBlank(?entity))

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

ParseException: Expected SelectQuery, found ']'  (at char 155), (line:7, col:1)

In [28]:
query="""
SELECT DISTINCT ?entity ?property ?value ?label 
WHERE {
   ?entity ?property ?value .
   ?entity rdfs:label ?label .    
   FILTER regex(?value, "\n")
   FILTER (!isBlank(?entity))

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

ParseException: Expected SelectQuery, found 'FILTER'  (at char 126), (line:6, col:4)

In [30]:
query="""
PREFIX edam:<http://edamontology.org/>

SELECT ?entity ?label ?property WHERE
{
  
    ?entity rdfs:subClassOf+ edam:format_1915 .
    ?entity rdfs:label ?label .
    {
        VALUES ?property { edam:documentation }
        FILTER NOT EXISTS {?entity ?property ?value .}
    }
    UNION
    {
        VALUES ?property { edam:is_format_of }
        FILTER NOT EXISTS {    
            ?entity rdfs:subClassOf ?restriction . 
            ?restriction rdf:type owl:Restriction ; 
                owl:onProperty ?property  ; 
                owl:someValuesFrom ?data.}
    }
}ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

Entity 'http://edamontology.org/format_1196' - Label 'SMILES'
Entity 'http://edamontology.org/format_1197' - Label 'InChI'
Entity 'http://edamontology.org/format_1197' - Label 'InChI'
Entity 'http://edamontology.org/format_1198' - Label 'mf'
Entity 'http://edamontology.org/format_1198' - Label 'mf'
Entity 'http://edamontology.org/format_1199' - Label 'InChIKey'
Entity 'http://edamontology.org/format_1199' - Label 'InChIKey'
Entity 'http://edamontology.org/format_1200' - Label 'smarts'
Entity 'http://edamontology.org/format_1200' - Label 'smarts'
Entity 'http://edamontology.org/format_1206' - Label 'unambiguous pure'
Entity 'http://edamontology.org/format_1206' - Label 'unambiguous pure'
Entity 'http://edamontology.org/format_1207' - Label 'nucleotide'
Entity 'http://edamontology.org/format_1207' - Label 'nucleotide'
Entity 'http://edamontology.org/format_1208' - Label 'protein'
Entity 'http://edamontology.org/format_1208' - Label 'protein'
Entity 'http://edamontology.org/format_1209' -

In [32]:
query="""
PREFIX edam:<http://edamontology.org/>

SELECT ?entity ?property ?value ?label ?id WHERE {
    
    VALUES ?property { edam:next_id }
    #retrieve the last ID
    <http://edamontology.org> ?property ?value .
    
    #match all IDs
    ?entity rdf:type owl:Class .
    FILTER ( ?entity != <http://www.geneontology.org/formats/oboInOwl#ObsoleteClass>) .

    ?entity rdfs:label ?label .    
    BIND(strafter(str(?entity), "_") AS ?id) .
} 
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

Entity 'http://edamontology.org/data_0005' - Label 'Resource type'
Entity 'http://edamontology.org/data_0006' - Label 'Data'
Entity 'http://edamontology.org/data_0007' - Label 'Tool'
Entity 'http://edamontology.org/data_0581' - Label 'Database'
Entity 'http://edamontology.org/data_0582' - Label 'Ontology'
Entity 'http://edamontology.org/data_0583' - Label 'Directory metadata'
Entity 'http://edamontology.org/data_0831' - Label 'MeSH vocabulary'
Entity 'http://edamontology.org/data_0832' - Label 'HGNC vocabulary'
Entity 'http://edamontology.org/data_0835' - Label 'UMLS vocabulary'
Entity 'http://edamontology.org/data_0842' - Label 'Identifier'
Entity 'http://edamontology.org/data_0843' - Label 'Database entry'
Entity 'http://edamontology.org/data_0844' - Label 'Molecular mass'
Entity 'http://edamontology.org/data_0845' - Label 'Molecular charge'
Entity 'http://edamontology.org/data_0846' - Label 'Chemical formula'
Entity 'http://edamontology.org/data_0847' - Label 'QSAR descriptor'
Entit

In [33]:
query="""
SELECT DISTINCT ?entity ?label WHERE {
    
    ?entity a owl:Class .
    OPTIONAL  {?entity rdfs:label ?label .}
    
}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

Entity 'N135c4b5b1ace416c880ccdef24bdefe6' - Label 'None'
Entity 'Nc0796c735a0249f3a51f0e3cb8920cb4' - Label 'None'
Entity 'http://edamontology.org/data_0005' - Label 'Resource type'
Entity 'http://edamontology.org/data_0006' - Label 'Data'
Entity 'http://edamontology.org/data_0007' - Label 'Tool'
Entity 'http://edamontology.org/data_0581' - Label 'Database'
Entity 'http://edamontology.org/data_0582' - Label 'Ontology'
Entity 'http://edamontology.org/data_0583' - Label 'Directory metadata'
Entity 'http://edamontology.org/data_0831' - Label 'MeSH vocabulary'
Entity 'http://edamontology.org/data_0832' - Label 'HGNC vocabulary'
Entity 'http://edamontology.org/data_0835' - Label 'UMLS vocabulary'
Entity 'http://edamontology.org/data_0842' - Label 'Identifier'
Entity 'http://edamontology.org/data_0843' - Label 'Database entry'
Entity 'http://edamontology.org/data_0844' - Label 'Molecular mass'
Entity 'http://edamontology.org/data_0845' - Label 'Molecular charge'
Entity 'http://edamontology.

In [35]:
query="""
PREFIX edam:<http://edamontology.org/>

SELECT ?entity ?label ?property WHERE
{
    ?entity rdfs:subClassOf+ edam:data_2091 .
    ?entity rdfs:label ?label .
        VALUES ?property { edam:regex               
                        }
    FILTER NOT EXISTS {?entity ?property ?value .}
    
}ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

Entity 'http://edamontology.org/data_1002' - Label 'CAS number'
Entity 'http://edamontology.org/data_1003' - Label 'Chemical registry number (Beilstein)'
Entity 'http://edamontology.org/data_1004' - Label 'Chemical registry number (Gmelin)'
Entity 'http://edamontology.org/data_1027' - Label 'Gene ID (NCBI)'
Entity 'http://edamontology.org/data_1031' - Label 'Gene ID (CGD)'
Entity 'http://edamontology.org/data_1032' - Label 'Gene ID (DictyBase)'
Entity 'http://edamontology.org/data_1033' - Label 'Ensembl gene ID'
Entity 'http://edamontology.org/data_1036' - Label 'TIGR identifier'
Entity 'http://edamontology.org/data_1039' - Label 'SCOP domain identifier'
Entity 'http://edamontology.org/data_1040' - Label 'CATH domain ID'
Entity 'http://edamontology.org/data_1041' - Label 'SCOP concise classification string (sccs)'
Entity 'http://edamontology.org/data_1042' - Label 'SCOP sunid'
Entity 'http://edamontology.org/data_1043' - Label 'CATH node ID'
Entity 'http://edamontology.org/data_1066' -

In [39]:
query="""
PREFIX edam: <http://edamontology.org/>

CONSTRUCT { 
    ?children_format rdfs:subClassOf ?restriction . 
    ?restriction rdf:type owl:Restriction ; 
            owl:onProperty edam:is_format_of  ; 
            owl:someValuesFrom ?data. 
}
WHERE {
    ?parent_format rdfs:subClassOf ?restriction . 
    ?restriction rdf:type owl:Restriction ; 
            owl:onProperty edam:is_format_of  ; 
            owl:someValuesFrom ?data.
    ?children_format rdfs:subClassOf+ ?parent_format .
    }
"""

#results = kg.query(query)

#for r in results :
#    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [40]:
query="""
SELECT DISTINCT ?entity ?property ?label ?value WHERE {
    VALUES ?property {
                    <http://edamontology.org/citation>
                    <http://edamontology.org/documentation>
                    <http://edamontology.org/information_standard>
                    <http://edamontology.org/oldParent>
                    <http://edamontology.org/ontology_used>
                    <http://edamontology.org/organisation>
                    <http://www.geneontology.org/formats/oboInOwl#consider>
                    <http://www.geneontology.org/formats/oboInOwl#inSubset>
                    <http://www.geneontology.org/formats/oboInOwl#replacedBy>
                    <http://www.w3.org/2000/01/rdf-schema#seeAlso>
                    <http://www.w3.org/2000/01/rdf-schema#subClassOf>
                    <http://www.w3.org/2000/01/rdf-schema#subPropertyOf>
                    <http://www.w3.org/2002/07/owl#annotatedProperty>
                    <http://www.w3.org/2002/07/owl#inverseOf>
                    <http://www.w3.org/2002/07/owl#disjointWith>
                    <http://www.w3.org/2000/01/rdf-schema#domain>
                    <http://www.w3.org/2000/01/rdf-schema#range>


}

?entity ?property ?value .

FILTER isLiteral(?value) 
?entity rdfs:label ?label .

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

Entity 'http://edamontology.org/data_0849' - Label 'Sequence record'
Entity 'http://edamontology.org/data_0863' - Label 'Sequence alignment'
Entity 'http://edamontology.org/data_0863' - Label 'Sequence alignment'
Entity 'http://edamontology.org/data_0871' - Label 'Phylogenetic character data'
Entity 'http://edamontology.org/data_0872' - Label 'Phylogenetic tree'
Entity 'http://edamontology.org/data_0872' - Label 'Phylogenetic tree'
Entity 'http://edamontology.org/data_0883' - Label 'Structure'
Entity 'http://edamontology.org/data_1255' - Label 'Sequence features'
Entity 'http://edamontology.org/data_1354' - Label 'Sequence profile'
Entity 'http://edamontology.org/data_1381' - Label 'Pair sequence alignment'
Entity 'http://edamontology.org/data_2044' - Label 'Sequence'
Entity 'http://edamontology.org/data_2044' - Label 'Sequence'
Entity 'http://edamontology.org/data_2048' - Label 'Report'
Entity 'http://edamontology.org/data_2091' - Label 'Accession'
Entity 'http://edamontology.org/data

In [42]:
query="""
PREFIX edam:<http://edamontology.org/>

SELECT DISTINCT ?entity ?property ?label ?property_subs_edam WHERE {

    VALUES ?property {oboInOwl:hasDefinition
                       edam:created_in
                       #oboInOwl:inSubset
                       rdfs:label
                       rdfs:subClassOf }
    ?entity a owl:Class .

    FILTER NOT EXISTS {?entity owl:deprecated true .}
    OPTIONAL {?entity rdfs:label ?label .}
    FILTER ( ?entity != <http://edamontology.org/data_0006>)
    FILTER ( ?entity != <http://edamontology.org/topic_0003>)
    FILTER ( ?entity != <http://www.w3.org/2002/07/owl#DeprecatedClass>)
    FILTER ( ?entity != <http://edamontology.org/operation_0004>)
    FILTER ( ?entity != <http://edamontology.org/format_1915>)

    FILTER NOT EXISTS {?entity ?property ?value .
                       MINUS { ?value rdf:type owl:Restriction .} #to prevent concept with rdfs:subClassOf property being only restriction (e.g. has_topic)
                       }
    FILTER (!isBlank(?entity))
    # UNION
    # {
    # VALUES ?property { oboInOwl:inSubset
    #                     }
    # FILTER NOT EXISTS {?entity ?property <http://purl.obolibrary.org/obo/edam#edam> .}
    # }
}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

KeyboardInterrupt: 

In [44]:
query="""
PREFIX edam:<http://edamontology.org/>

SELECT DISTINCT ?entity ?property ?label WHERE {
    
    ?entity owl:deprecated true .
    ?entity rdfs:label ?label .
    FILTER ( ?entity != <http://www.geneontology.org/formats/oboInOwl#ObsoleteClass>)
    
    {VALUES ?property { edam:obsolete_since
                       edam:oldParent                
                        }
        FILTER NOT EXISTS {?entity ?property ?value .}
    }
    # UNION
    # {VALUES ?property { oboInOwl:inSubset            
    #                     }
    #     FILTER NOT EXISTS {?entity  ?property <http://purl.obolibrary.org/obo/edam#obsolete> .}
    # }
    UNION
    {VALUES ?property { rdfs:subClassOf             
                        }
        FILTER NOT EXISTS {?entity ?property <http://www.w3.org/2002/07/owl#DeprecatedClass>.}}

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [45]:
query="""
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT ?entity ?target ?label ?property
WHERE {
    VALUES ?property { edam:has_topic
                       edam:has_input
                       edam:has_output
                       edam:is_format_of
                       edam:is_identifier_of
                       
                        }
    ?entity ?p ?value .
    #{?entity rdfs:subClassOf+ edam:operation_0004 .}
    #UNION
    #{
    #?entity rdfs:subClassOf+ edam:data_0006 .
    #}
    ?entity rdfs:label ?label .
    ?entity rdfs:subClassOf ?restriction . 
    ?restriction rdf:type owl:Restriction ; 
            owl:onProperty ?property  ; 
            owl:someValuesFrom ?target.
    ?target owl:deprecated ?deprecated . 
    FILTER (?deprecated = true)
}
ORDER BY ?entity 
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [46]:
query="""
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT ?entity ?label ?value ?property WHERE {
    
    ?entity rdfs:subClassOf ?restriction . 
    ?restriction rdf:type owl:Restriction ;
        owl:onProperty ?property  ; 
        owl:someValuesFrom ?value .
    ?value edam:notRecommendedForAnnotation true .
    
    FILTER ( ?entity != <http://edamontology.org/data_0842>)
    FILTER ( ?entity != <http://edamontology.org/operation_3357>)
    FILTER ( ?entity != <http://edamontology.org/operation_2422>)
    ?entity rdfs:label ?label .

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [48]:
query="""
PREFIX edam:<http://edamontology.org/>

SELECT DISTINCT ?entity ?property ?value ?label WHERE {
   ?entity ?property ?value .
   ?entity rdfs:label ?label .        
   FILTER REGEX(str(?value), "^[\\s\r\n]+")
   FILTER (!isBlank(?entity))

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

ParseException: Expected SelectQuery, found 'FILTER'  (at char 169), (line:7, col:4)

In [49]:
query="""
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?entity ?label ?id ?subset ?superclass ?label_sc ?id_sc ?subset_sc WHERE {

    ?entity a owl:Class .
    ?entity rdfs:label ?label .    
    BIND(strafter(str(?entity), "org/") AS ?id) .
    BIND(strbefore(str(?id), "_") AS ?subset) .
    
    ?entity rdfs:subClassOf ?superclass .
    ?superclass rdfs:label ?label_sc .    
    BIND(strafter(str(?superclass), "org/") AS ?id_sc) .
    BIND(strbefore(str(?id_sc), "_") AS ?subset_sc) .
    
    FILTER ( ?subset_sc != ?subset) .
} 
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [50]:
query="""
SELECT DISTINCT ?entity ?label ?property ?value WHERE {
        VALUES ?property { rdfs:subClassOf }
        ?entity ?property ?value
        FILTER (?entity = ?value)
	    ?entity rdfs:label ?label 
	 

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [51]:
query="""
SELECT DISTINCT ?entity ?property ?value ?label WHERE {
   ?entity ?property ?value .
   ?entity rdfs:label ?label .        
    
   FILTER regex(?value, "\t")
   FILTER (!isBlank(?entity))

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

In [52]:
query="""
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT ?entity ?property ?label ?reference WHERE {
    {
        VALUES ?property { rdfs:subClassOf
                           oboInOwl:replacedBy
                           oboInOwl:consider  }
        ?entity ?property ?reference .
        ?entity rdfs:label ?label .
        MINUS {?reference rdf:type owl:Restriction . }
    }
    UNION
    {
        ?entity rdfs:subClassOf ?restriction . 
            ?restriction rdf:type owl:Restriction ; 
            owl:onProperty ?property  ; 
            owl:someValuesFrom ?reference.
        ?entity rdfs:label ?label .
    }

}
ORDER BY ?entity
"""

results = kg.query(query)

for r in results :
    print(f"Entity '{r['entity']}' - Label '{r['label']}'") 

Entity 'http://edamontology.org/data_0005' - Label 'Resource type'
Entity 'http://edamontology.org/data_0005' - Label 'Resource type'
Entity 'http://edamontology.org/data_0007' - Label 'Tool'
Entity 'http://edamontology.org/data_0007' - Label 'Tool'
Entity 'http://edamontology.org/data_0581' - Label 'Database'
Entity 'http://edamontology.org/data_0581' - Label 'Database'
Entity 'http://edamontology.org/data_0582' - Label 'Ontology'
Entity 'http://edamontology.org/data_0582' - Label 'Ontology'
Entity 'http://edamontology.org/data_0583' - Label 'Directory metadata'
Entity 'http://edamontology.org/data_0583' - Label 'Directory metadata'
Entity 'http://edamontology.org/data_0831' - Label 'MeSH vocabulary'
Entity 'http://edamontology.org/data_0831' - Label 'MeSH vocabulary'
Entity 'http://edamontology.org/data_0832' - Label 'HGNC vocabulary'
Entity 'http://edamontology.org/data_0832' - Label 'HGNC vocabulary'
Entity 'http://edamontology.org/data_0835' - Label 'UMLS vocabulary'
Entity 'http: