JBrowse on Sparqlify on Chado
rbuels edited this page Nov 14, 2014
·
2 revisions
by Robert Buels
To drive my explorations in running a SPARQL endpoint on top of a relational database, I set out to build a JBrowse instance that browses genomic annotations that are stored inside a GMOD Chado database. But not just any Chado database, this one would be published as a SPARQL endpoint, and queried by JBrowse using its SPARQL data backend (a product of the 2012 and 2013 BioHackathons).
Here is the more-or-less working technology and configuration stack I eventually arrived at:
- Tomato genome data (ITAG2.4 release)
- Loaded into a Chado schema inside PostgreSQL
- Exposed as a SPARQL endpoint using Sparqlify with this mapping configuration
- Viewed in JBrowse using this JBrowse configuration
Prefix ex: <http://ex.org/>
Prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
Prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
Prefix xsd: <http://www.w3.org/2001/XMLSchema#>
Prefix chado: <http://biohackathon.org/chado/>
CREATE VIEW feature AS
CONSTRUCT {
?f a chado:feature;
rdfs:label ?l ;
a ?cvterm
.
}
WITH
?f = uri( chado:feature, ?feature_id )
?l = plainLiteral( ?name )
?cvterm = uri( chado:cvterm, ?type_id )
FROM
feature
CREATE VIEW featureloc AS
CONSTRUCT {
?floc a chado:featureloc;
chado:fmin ?fmin ;
chado:fmax ?fmax ;
chado:source_feature ?srcfeature ;
chado:strand ?strand
.
?feature chado:located_at ?floc
.
}
WITH
?strand = plainLiteral( ?strand )
?srcfeature = uri( chado:feature, ?srcfeature_id )
?feature = uri( chado:feature, ?feature_id )
?floc = uri( chado:featureloc, ?featureloc_id )
?fmin = plainLiteral( ?fmin )
?fmax = plainLiteral( ?fmax )
FROM
featureloc
CREATE VIEW cvterm AS
CONSTRUCT {
?cvterm a chado:cvterm ;
rdfs:label ?name ;
chado:is_obsolete ?obsolete ;
chado:from_cv ?cv
}
WITH
?cvterm = uri( chado:cvterm, ?cvterm_id )
?name = plainLiteral( ?name )
?obsolete = plainLiteral( ?is_obsolete )
?cv = uri( chado:cv, ?cv_id )
FROM
cvterm
CREATE VIEW cv AS
CONSTRUCT {
?cv a chado:cv ;
rdfs:label ?name
.
}
WITH
?name = plainLiteral( ?name )
?cv = uri( chado:cv, ?cv_id )
FROM
cv
CREATE VIEW feature_relationship AS
CONSTRUCT {
?relationship rdf:subject ?subject ;
rdf:predicate ?type ;
rdf:object ?object
.
}
WITH
?subject = uri( chado:feature, ?subject_id )
?object = uri( chado:feature, ?object_id )
?type = uri( chado:cvterm, ?type_id )
?relationship = uri( chado:feature_relationship, ?feature_relationship_id )
FROM
feature_relationship
[tracks.genes]
label = genes
key = SPARQL Genes
storeClass = JBrowse/Store/SeqFeature/SPARQL
type = JBrowse/View/Track/CanvasFeatures
glyph = JBrowse/View/FeatureGlyph/Segments
urlTemplate = /sparql
queryTemplate =
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix chado: <http://biohackathon.org/chado/>
SELECT ?ref ?start ?end ?name ?strand
( ?typeLabel as ?type )
( ?feature as ?uniqueID )
( ?parentFeature as ?parentUniqueID )
{
?feature a chado:feature ;
rdfs:label ?name ;
chado:located_at ?floc ;
a ?typeUri
.
?relationship rdf:subject ?feature ;
rdf:predicate ?relType ;
rdf:object ?parentFeature
.
?relType rdfs:label 'part_of' .
?parentFeature a chado:feature .
?floc chado:fmin ?start ;
chado:fmax ?end ;
chado:source_feature ?source_feature ;
chado:strand ?strand
.
?source_feature rdfs:label ?ref
.
?typeUri rdfs:label ?typeLabel
.
FILTER(
!(?start > {end} || ?end < {start})
&& ?ref = '{ref}'
&& ( ?typeLabel = 'mRNA'
|| ?typeLabel = 'exon'
)
)
}