# Loading Data

In [1]:
from rdflib import ConjunctiveGraph

In [2]:
kg = ConjunctiveGraph()
kg.parse("ebaii.ttl")
print(f"Loaded {len(kg)} triples")

Loaded 3088 triples


In [3]:
# List all types 
q = """
SELECT * WHERE {
    ?x rdf:type ?t .
}
"""

res = kg.query(q)

for r in res:
    print(f"{r['x']} has type {r['t']}")

https://academic.oup.com/bioinformatics/article/25/14/1754/225615 has type http://schema.org/ScholarlyArticle
n66fbc1fde9dc4768bc50d4d992339b51b160 has type http://schema.org/ScholarlyArticle
n66fbc1fde9dc4768bc50d4d992339b51b210 has type http://schema.org/ScholarlyArticle
n66fbc1fde9dc4768bc50d4d992339b51b294 has type http://schema.org/ScholarlyArticle
n66fbc1fde9dc4768bc50d4d992339b51b326 has type http://schema.org/ScholarlyArticle
n66fbc1fde9dc4768bc50d4d992339b51b1 has type http://schema.org/Person
n66fbc1fde9dc4768bc50d4d992339b51b2 has type http://schema.org/Person
n66fbc1fde9dc4768bc50d4d992339b51b39 has type http://schema.org/Person
n66fbc1fde9dc4768bc50d4d992339b51b40 has type http://schema.org/Person
n66fbc1fde9dc4768bc50d4d992339b51b41 has type http://schema.org/Person
n66fbc1fde9dc4768bc50d4d992339b51b42 has type http://schema.org/Person
n66fbc1fde9dc4768bc50d4d992339b51b43 has type http://schema.org/Person
n66fbc1fde9dc4768bc50d4d992339b51b44 has type http://schema.org/Per

In [4]:
# List all types 
q = """
PREFIX sc: <http://schema.org/> 

SELECT * WHERE {
    ?x rdf:type sc:Event .
}
"""

res = kg.query(q)

for r in res:
    print(f"{r['x']} has type Event")

https://tess.elixir-europe.org/events/12eme-ecole-de-bioinformatique-aviesan-ifb-inserm has type Event
n66fbc1fde9dc4768bc50d4d992339b51b125 has type Event
n66fbc1fde9dc4768bc50d4d992339b51b126 has type Event
n66fbc1fde9dc4768bc50d4d992339b51b141 has type Event
n66fbc1fde9dc4768bc50d4d992339b51b142 has type Event


In [5]:
# Show all properties and objects associated to a particular node
q = """
PREFIX sc: <http://schema.org/> 

SELECT * WHERE {
    <https://tess.elixir-europe.org/events/12eme-ecole-de-bioinformatique-aviesan-ifb-inserm> ?p ?o .
}
"""

res = kg.query(q)

for r in res:
    print(r)

(rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://schema.org/Event'))
(rdflib.term.URIRef('http://ogp.me/ns#description'), rdflib.term.Literal('<p>La formation s’adresse à des biologistes directement impliqués dans des projets “Next Generation Sequencing” (NGS). Cette édition de l’école aborde les nouveaux enjeux technologiques: elle s’articulera autour de quatres ateliers thématiques en session parallèle (RNA-seq, ChIP-seq/ATAC-seq, variants DNA-seq, single-cell), et inclura une introduction à l’intégration des données, une ouverture aux technologies “long reads”.</p>\n\n<p>L’école vise à introduire les concepts et à manipuler les outils informatiques et à en interpréter les résultats. Elle est basée sur une alternance de courtes sessions théoriques et d’ateliers pratiques. Les participants bénéficieront d’un tutorat personnalisé pour élaborer leur plan d’analyse, et effectuer les premières étapes de traitement de leurs propres données 

In [6]:
# List all types 
q = """
PREFIX sc: <http://schema.org/> 

SELECT DISTINCT * WHERE {
    ?x rdf:type sc:Event ;
       sc:name ?name .
    OPTIONAL {?x sc:startDate ?d} .
}
"""

res = kg.query(q)

for r in res:
    print(f"{r['name']} is an Event, started at {r['d']}")

12ème Ecole de Bioinformatique AVIESAN-IFB-Inserm is an Event, started at 2023-11-05 00:00:00 UTC
Docker and Singularity for Reproducible Research: Getting Started with Containers is an Event, started at None
Docker and Singularity for Reproducible Research: Getting Started with Containers is an Event, started at None
Docker and Singularity for Reproducible Research: Getting Started with Containers is an Event, started at None
Docker and Singularity for Reproducible Research: Getting Started with Containers is an Event, started at None


# Use EDAM to filter KG

In [7]:
!wget https://edamontology.org/EDAM_1.25.owl

--2024-01-19 17:24:25--  https://edamontology.org/EDAM_1.25.owl
Résolution de edamontology.org (edamontology.org)… 129.177.16.242
Connexion à edamontology.org (edamontology.org)|129.177.16.242|:443… connecté.
requête HTTP transmise, en attente de la réponse… 200 OK
Taille : 3239950 (3,1M) [application/rdf+xml]
Sauvegarde en : « EDAM_1.25.owl.1 »


2024-01-19 17:24:26 (6,67 MB/s) — « EDAM_1.25.owl.1 » sauvegardé [3239950/3239950]



In [8]:
kg.parse("EDAM_1.25.owl")
print(f"Loaded {len(kg)} triples")

Loaded 39972 triples


In [9]:
# List all direct sub-classes of Sequencing 
q = """
SELECT * WHERE {
    #?x rdfs:subClassOf <http://edamontology.org/topic_3168> .
    ?x rdfs:subClassOf <http://edamontology.org/topic_3361> .
    ?x rdfs:label ?class_label
}
"""

res = kg.query(q)

for r in res:
    print(f"{r['class_label']}")

Sequencing
Imaging
Genotyping experiment
Microarray experiment
PCR experiment
Proteomics experiment
RNAi experiment
Simulation experiment
Immunoprecipitation experiment
Cytometry
Chromosome conformation capture
Protein interaction experiment


In [10]:
# List all Tools associated to a Laboratory Technique OR Omics
q = """
PREFIX sc: <http://schema.org/> 

SELECT * WHERE {
    {
        ?x rdfs:subClassOf* <http://edamontology.org/topic_3361> .
        ?x rdfs:label ?class_label .
    } UNION {
        ?x rdfs:subClassOf* <http://edamontology.org/topic_3391> .
        ?x rdfs:label ?class_label .
    }
    
    ?tool sc:applicationSubCategory ?x .
    ?truc sc:url ?x .
}
"""

res = kg.query(q)

for r in res:
    print(f"{r['tool']}")
    
for r in res:
    print(f"{r['truc']}")

https://bio.tools/fastqc
https://bio.tools/macs
https://bio.tools/bowtie2
http://edamontology.org/topic_3168
http://edamontology.org/topic_3169
http://edamontology.org/topic_0622


In [19]:
# List all types of entities annotated with EDAM topics
q = """
PREFIX sc: <http://schema.org/> 

SELECT DISTINCT ?truc ?name ?t WHERE {
    VALUES ?p {sc:url sc:about} .
    
    ?truc ?p ?x .
    FILTER (regex(str(?x),'topic_')) .
    ?truc rdf:type ?t 
    
    OPTIONAL {?truc sc:name ?name}
}
"""

res = kg.query(q)
    
for r in res:
    print(f"{r['truc']} [{r['name']}] is a {r['t']}")

http://edamontology.org/topic_3173 [None] is a http://schema.org/DefinedTerm
http://edamontology.org/topic_3697 [None] is a http://schema.org/DefinedTerm
http://edamontology.org/topic_0089 [Ontology and terminology] is a http://schema.org/DefinedTerm
http://edamontology.org/topic_0199 [Genetic variation] is a http://schema.org/DefinedTerm
http://edamontology.org/topic_3063 [Medical informatics] is a http://schema.org/DefinedTerm
http://edamontology.org/topic_3170 [RNA-Seq] is a http://schema.org/DefinedTerm
http://edamontology.org/topic_3673 [Whole genome sequencing] is a http://schema.org/DefinedTerm
http://edamontology.org/topic_0622 [Genomics] is a http://schema.org/DefinedTerm
http://edamontology.org/topic_3168 [Sequencing] is a http://schema.org/DefinedTerm
http://edamontology.org/topic_3169 [ChIP-seq] is a http://schema.org/DefinedTerm
http://edamontology.org/topic_3174 [Metagenomics] is a http://schema.org/DefinedTerm
http://edamontology.org/topic_0091 [Bioinformatics] is a http

# Hands-on

Find all persons and their name, if they exist, authors of tools or learning resources, annotated with an EDAM topic