Skip to content

JBrowse on Sparqlify on Chado

rbuels edited this page Nov 14, 2014 · 2 revisions

by Robert Buels

To drive my explorations in running a SPARQL endpoint on top of a relational database, I set out to build a JBrowse instance that browses genomic annotations that are stored inside a GMOD Chado database. But not just any Chado database, this one would be published as a SPARQL endpoint, and queried by JBrowse using its SPARQL data backend (a product of the 2012 and 2013 BioHackathons).

Here is the more-or-less working technology and configuration stack I eventually arrived at:

Sparqlify Mapping Configuration

 Prefix ex: <http://ex.org/>
Prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
Prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
Prefix xsd: <http://www.w3.org/2001/XMLSchema#>
Prefix chado: <http://biohackathon.org/chado/>

CREATE VIEW feature AS
    CONSTRUCT {
        ?f a chado:feature;
           rdfs:label ?l ;
           a ?cvterm
        .
    }
    WITH
        ?f = uri( chado:feature, ?feature_id )
        ?l = plainLiteral( ?name )
        ?cvterm = uri( chado:cvterm, ?type_id )
    FROM
        feature


CREATE VIEW featureloc AS
    CONSTRUCT {
        ?floc a chado:featureloc;
           chado:fmin ?fmin ;
           chado:fmax ?fmax ;
           chado:source_feature ?srcfeature ;
           chado:strand ?strand
        .
        ?feature chado:located_at ?floc
        .
    }
    WITH
        ?strand = plainLiteral( ?strand )
        ?srcfeature = uri( chado:feature, ?srcfeature_id )
        ?feature = uri( chado:feature, ?feature_id )
        ?floc = uri( chado:featureloc, ?featureloc_id )
        ?fmin = plainLiteral( ?fmin )
        ?fmax = plainLiteral( ?fmax )
    FROM
        featureloc

CREATE VIEW cvterm AS
       CONSTRUCT {
          ?cvterm a chado:cvterm ;
             rdfs:label ?name ;
             chado:is_obsolete ?obsolete ;
             chado:from_cv ?cv
        }
        WITH
           ?cvterm = uri( chado:cvterm, ?cvterm_id )
           ?name = plainLiteral( ?name )
           ?obsolete = plainLiteral( ?is_obsolete )
           ?cv = uri( chado:cv, ?cv_id )
        FROM
           cvterm

CREATE VIEW cv AS
        CONSTRUCT {
           ?cv a chado:cv ;
              rdfs:label ?name
           .
        }
        WITH
           ?name = plainLiteral( ?name )
           ?cv = uri( chado:cv, ?cv_id )
        FROM
          cv


CREATE VIEW feature_relationship AS
       CONSTRUCT {
           ?relationship rdf:subject ?subject ;
               rdf:predicate ?type ;
               rdf:object ?object
           .
       }
       WITH
           ?subject = uri( chado:feature, ?subject_id )
           ?object = uri( chado:feature, ?object_id )
           ?type = uri( chado:cvterm, ?type_id )
           ?relationship = uri( chado:feature_relationship, ?feature_relationship_id )
       FROM
           feature_relationship

JBrowse tracks.conf

[tracks.genes]
label = genes
key = SPARQL Genes
storeClass = JBrowse/Store/SeqFeature/SPARQL
type = JBrowse/View/Track/CanvasFeatures
glyph = JBrowse/View/FeatureGlyph/Segments
urlTemplate = /sparql
queryTemplate =
 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
 prefix xsd: <http://www.w3.org/2001/XMLSchema#>
 prefix chado: <http://biohackathon.org/chado/>
 SELECT ?ref ?start ?end ?name ?strand
      ( ?typeLabel as ?type )
      ( ?feature as ?uniqueID )
      ( ?parentFeature as ?parentUniqueID )
 {
        ?feature a chado:feature ;
                 rdfs:label ?name ;
                 chado:located_at ?floc ;
                 a ?typeUri
        .
        ?relationship rdf:subject ?feature ;
            rdf:predicate ?relType ;
            rdf:object ?parentFeature
        .
        ?relType rdfs:label 'part_of' .
        ?parentFeature a chado:feature .
        ?floc chado:fmin ?start ;
              chado:fmax ?end ;
              chado:source_feature ?source_feature ;
              chado:strand ?strand
        .
        ?source_feature rdfs:label ?ref
        .
        ?typeUri rdfs:label ?typeLabel
        .
        FILTER(
                !(?start > {end} || ?end < {start})
                && ?ref = '{ref}'
                && ( ?typeLabel = 'mRNA'
                     || ?typeLabel = 'exon'
                   )
              )
 }