diff --git a/CHANGELOG.md b/CHANGELOG.md index ba6b771e..f36a7ae7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ This changelog was started for release 4.2.0. - Added 'custom distance' option for faldo relation (instead of just 'included_in' and 'overlap_with') - Store 'version' value when storing results. Not used for now, but might be used in deprecation warnings later - Removed some lines from coverage computation +- Added 'indirect relations': Two entities will be linked on the graph, without a direct relation between them (same as faldo relations). This is intended to be used with 'linked attributes' (ex: get all entities B where B.color is the same as A.color, without a direct relation between A and B) ### Fixed @@ -51,6 +52,7 @@ This changelog was started for release 4.2.0. - 'Same strand' queries will now match 'BothStrand' with a forward or reverse strand - Use '+', '-' and '.' for strand values in CSV instead of raw value (for homogenization) - Now allows 'infinite' recursive blocks (ie, a Minus block inside a Union block, or the opposite) +- Overhaul faldo relation: add 'direct shortcut' triples between entities and values, to avoid using slow property paths when using faldo queries ### Security diff --git a/askomics/api/file.py b/askomics/api/file.py index 6a92ebb3..69cd82e4 100644 --- a/askomics/api/file.py +++ b/askomics/api/file.py @@ -410,7 +410,7 @@ def get_column_types(): types: list of available column types """ - data = ["numeric", "text", "category", "boolean", "date", "reference", "strand", "start", "end", "general_relation", "symetric_relation", "label"] + data = ["numeric", "text", "category", "boolean", "date", "reference", "strand", "start", "end", "general_relation", "symetric_relation", "indirect_relation", "label"] return jsonify({ "types": data diff --git a/askomics/libaskomics/BedFile.py b/askomics/libaskomics/BedFile.py index dc4d6512..dc2aba3e 100644 --- a/askomics/libaskomics/BedFile.py +++ b/askomics/libaskomics/BedFile.py @@ -172,7 +172,7 @@ def generate_rdf_content(self): attribute = self.namespace_data[self.format_uri(feature.chrom)] faldo_reference = attribute self.faldo_abstraction["reference"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) if "reference" not in attribute_list: attribute_list.append("reference") @@ -195,7 +195,7 @@ def generate_rdf_content(self): attribute = rdflib.Literal(self.convert_type(feature.start + 1)) # +1 because bed is 0 based faldo_start = attribute self.faldo_abstraction["start"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) if "start" not in attribute_list: attribute_list.append("start") @@ -212,7 +212,7 @@ def generate_rdf_content(self): attribute = rdflib.Literal(self.convert_type(feature.end)) faldo_end = attribute self.faldo_abstraction["end"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) if "end" not in attribute_list: attribute_list.append("end") @@ -233,7 +233,7 @@ def generate_rdf_content(self): attribute = self.namespace_data[self.format_uri("+")] faldo_strand = self.get_faldo_strand("+") self.faldo_abstraction["strand"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) strand = True strand_type = "+" elif feature.strand == "-": @@ -242,7 +242,7 @@ def generate_rdf_content(self): attribute = self.namespace_data[self.format_uri("-")] faldo_strand = self.get_faldo_strand("-") self.faldo_abstraction["strand"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) strand = True strand_type = "-" else: @@ -251,7 +251,7 @@ def generate_rdf_content(self): attribute = self.namespace_data[self.format_uri(".")] faldo_strand = self.get_faldo_strand(".") self.faldo_abstraction["strand"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) strand = True strand_type = "." @@ -271,7 +271,7 @@ def generate_rdf_content(self): if feature.score != '.': relation = self.namespace_data[self.format_uri("score")] attribute = rdflib.Literal(self.convert_type(feature.score)) - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) if "score" not in attribute_list: attribute_list.append("score") @@ -283,6 +283,8 @@ def generate_rdf_content(self): "range": rdflib.XSD.decimal }) + # Triples respecting faldo ontology + location = BNode() begin = BNode() end = BNode() @@ -297,14 +299,23 @@ def generate_rdf_content(self): self.graph_chunk.add((begin, self.faldo.position, faldo_start)) self.graph_chunk.add((end, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((end, self.faldo.position, faldo_end)) + self.graph_chunk.add((end, self.faldo.end, faldo_end)) self.graph_chunk.add((begin, self.faldo.reference, faldo_reference)) - self.graph_chunk.add((end, self.faldo.reference, faldo_reference)) if faldo_strand: - self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand)) - self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand)) + self.graph_chunk.add((begin, self.faldo.strand, faldo_strand)) + + # Shortcut triple for faldo queries + self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start)) + self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end)) + self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference)) + + if faldo_strand: + self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand)) + strand_ref = self.get_reference_strand_uri(feature.chrom, faldo_strand, None) + for sref in strand_ref: + self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref)) # blocks block_base = self.settings.getint("triplestore", "block_size") diff --git a/askomics/libaskomics/CsvFile.py b/askomics/libaskomics/CsvFile.py index 6e7450cb..ff142f71 100644 --- a/askomics/libaskomics/CsvFile.py +++ b/askomics/libaskomics/CsvFile.py @@ -416,8 +416,9 @@ def set_rdf_abstraction(self): blank = BNode() # Relation - if self.columns_type[index] in ('general_relation', 'symetric_relation'): + if self.columns_type[index] in ('general_relation', 'symetric_relation', 'indirect_relation'): symetric_relation = True if self.columns_type[index] == 'symetric_relation' else False + indirect_relation = True if self.columns_type[index] == 'indirect_relation' else False splitted = attribute_name.split('@') attribute = self.rdfize(splitted[0]) @@ -439,6 +440,8 @@ def set_rdf_abstraction(self): if symetric_relation: self.graph_abstraction_dk.add((blank, rdflib.RDFS.domain, rdf_range)) self.graph_abstraction_dk.add((blank, rdflib.RDFS.range, entity)) + if indirect_relation: + self.graph_abstraction_dk.add((blank, self.namespace_internal["isIndirectRelation"], rdflib.Literal("true", datatype=rdflib.XSD.boolean))) continue @@ -597,6 +600,10 @@ def generate_rdf_content(self): if current_type == "label" and column_number == 1: continue + # We ignore all data for indirect relations + if current_type == "indirect_relation": + continue + # Skip entity and blank cells if column_number == 0 or (not cell and not current_type == "strand"): continue @@ -675,6 +682,9 @@ def generate_rdf_content(self): self.graph_chunk.add((attribute, relation, entity)) if self.faldo_entity and faldo_start and faldo_end: + + # Triples respecting faldo ontology + location = BNode() begin_node = BNode() end_node = BNode() @@ -699,6 +709,19 @@ def generate_rdf_content(self): self.graph_chunk.add((begin_node, rdflib.RDF.type, faldo_strand)) self.graph_chunk.add((end_node, rdflib.RDF.type, faldo_strand)) + # Shortcut triple for faldo queries + self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start)) + self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end)) + if faldo_reference: + self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference)) + if faldo_strand: + strand_ref = self.get_reference_strand_uri(reference, faldo_strand, None) + for sref in strand_ref: + self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref)) + + if faldo_strand: + self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand)) + # blocks block_base = self.settings.getint("triplestore", "block_size") block_start = int(start) // block_base diff --git a/askomics/libaskomics/File.py b/askomics/libaskomics/File.py index 6bfda7d6..3ab01700 100644 --- a/askomics/libaskomics/File.py +++ b/askomics/libaskomics/File.py @@ -458,12 +458,17 @@ def get_faldo_strand_label(self, raw_strand): return "." - def get_reference_strand_uri(self, reference, strand, block): + def get_reference_strand_uri(self, reference, strand, block=None): faldo_dict = { self.faldo.ForwardStrandPosition: "ForwardStrand", self.faldo.ReverseStrandPosition: "ReverseStrand", self.faldo.BothStrandPosition: "BothStrand" } + if block is not None: + if strand == self.faldo.BothStrandPosition: + return [self.rdfize(self.format_uri("{}_s{}s".format(reference, dstrand))) for dstrand in faldo_dict.values()] + return [self.rdfize(self.format_uri("{}_s{}".format(reference, faldo_dict[strand])))] + if strand == self.faldo.BothStrandPosition: return [self.rdfize(self.format_uri("{}_s{}_{}".format(reference, dstrand, block))) for dstrand in faldo_dict.values()] diff --git a/askomics/libaskomics/GffFile.py b/askomics/libaskomics/GffFile.py index d6ba863a..dcadac28 100644 --- a/askomics/libaskomics/GffFile.py +++ b/askomics/libaskomics/GffFile.py @@ -388,6 +388,8 @@ def generate_rdf_content(self): self.graph_chunk.add((entity, relation, attribute)) + # Triples respecting faldo ontology + location = BNode() begin = BNode() end = BNode() @@ -411,6 +413,17 @@ def generate_rdf_content(self): self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand)) self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand)) + # Shortcut triple for faldo queries + self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start)) + self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end)) + self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference)) + + if faldo_strand: + self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand)) + strand_ref = self.get_reference_strand_uri(rec.id, faldo_strand, None) + for sref in strand_ref: + self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref)) + # blocks block_base = self.settings.getint("triplestore", "block_size") block_start = int(self.convert_type(feature.location.start)) // block_base diff --git a/askomics/libaskomics/SparqlQuery.py b/askomics/libaskomics/SparqlQuery.py index fbae2b45..a2ee56d4 100644 --- a/askomics/libaskomics/SparqlQuery.py +++ b/askomics/libaskomics/SparqlQuery.py @@ -1261,21 +1261,22 @@ def build_query_from_json(self, preview=False, for_editor=False): elif link["sameStrand"]: block_uri = "includeInStrand" - self.store_triple({ - "subject": source, - "predicate": "askomics:{}".format(block_uri), - "object": common_block, - "optional": False + if link["uri"] in ('included_in', 'overlap_with'): + self.store_triple({ + "subject": source, + "predicate": "askomics:{}".format(block_uri), + "object": common_block, + "optional": False - }, block_id, sblock_id, pblock_ids, depth) + }, block_id, sblock_id, pblock_ids, depth) - self.store_triple({ - "subject": target, - "predicate": "askomics:{}".format(block_uri), - "object": common_block, - "optional": False + self.store_triple({ + "subject": target, + "predicate": "askomics:{}".format(block_uri), + "object": common_block, + "optional": False - }, block_id, sblock_id, pblock_ids, depth) + }, block_id, sblock_id, pblock_ids, depth) equal_sign = "" if link["strict"] else "=" @@ -1297,6 +1298,57 @@ def build_query_from_json(self, preview=False, for_editor=False): equalsign=equal_sign ), block_id, sblock_id, pblock_ids, depth) else: + if link["sameRef"]: + if link['sameStrand']: + self.store_triple({ + "subject": source, + "predicate": "askomics:referenceStrand", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) + + self.store_triple({ + "subject": target, + "predicate": "askomics:referenceStrand", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) + else: + self.store_triple({ + "subject": source, + "predicate": "askomics:faldoReference", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) + + self.store_triple({ + "subject": target, + "predicate": "askomics:faldoReference", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) + + elif link["sameStrand"]: + self.store_triple({ + "subject": source, + "predicate": "askomics:faldoStrand", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) + + self.store_triple({ + "subject": target, + "predicate": "askomics:faldoStrand", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) + for filter in link.get('faldoFilters', []): modifier_string = "" if filter['filterValue']: @@ -1330,7 +1382,8 @@ def build_query_from_json(self, preview=False, for_editor=False): "optional": False } - self.store_triple(triple, block_id, sblock_id, pblock_ids, depth) + if not link.get('indirect', False): + self.store_triple(triple, block_id, sblock_id, pblock_ids, depth) # Store linked attributes for attribute in self.json["attr"]: diff --git a/askomics/libaskomics/TriplestoreExplorer.py b/askomics/libaskomics/TriplestoreExplorer.py index ced8a54e..b62b5131 100644 --- a/askomics/libaskomics/TriplestoreExplorer.py +++ b/askomics/libaskomics/TriplestoreExplorer.py @@ -519,7 +519,7 @@ def get_abstraction_relations(self, single_tenant=False): query_builder = SparqlQuery(self.app, self.session) query = ''' - SELECT DISTINCT ?graph ?entity_uri ?entity_faldo ?entity_label ?attribute_uri ?attribute_faldo ?attribute_label ?attribute_range ?property_uri ?property_faldo ?property_label ?range_uri ?category_value_uri ?category_value_label + SELECT DISTINCT ?graph ?entity_uri ?entity_faldo ?entity_label ?attribute_uri ?attribute_faldo ?attribute_label ?attribute_range ?property_uri ?property_faldo ?property_label ?range_uri ?category_value_uri ?category_value_label ?indirect_relation WHERE {{ # Graphs ?graph askomics:public ?public . @@ -530,6 +530,7 @@ def get_abstraction_relations(self, single_tenant=False): ?node a askomics:AskomicsRelation . ?node rdfs:label ?property_label . ?node rdfs:range ?range_uri . + OPTIONAL {{ ?node askomics:isIndirectRelation ?indirect_relation . }} # Retrocompatibility OPTIONAL {{?node askomics:uri ?new_property_uri}} BIND( IF(isBlank(?node), ?new_property_uri, ?node) as ?property_uri) @@ -563,7 +564,8 @@ def get_abstraction_relations(self, single_tenant=False): "label": result["property_label"], "graphs": [result["graph"], ], "source": result["entity_uri"], - "target": result["range_uri"] + "target": result["range_uri"], + "indirect": result.get("indirect_relation", False) } relations.append(relation) else: diff --git a/askomics/react/src/routes/integration/csvtable.jsx b/askomics/react/src/routes/integration/csvtable.jsx index dddd55ba..9e4270fa 100644 --- a/askomics/react/src/routes/integration/csvtable.jsx +++ b/askomics/react/src/routes/integration/csvtable.jsx @@ -129,6 +129,7 @@ export default class CsvTable extends Component { + {ontoInput} @@ -158,6 +159,7 @@ export default class CsvTable extends Component { + {ontoInput} diff --git a/askomics/react/src/routes/query/query.jsx b/askomics/react/src/routes/query/query.jsx index b2263471..e28ab09f 100644 --- a/askomics/react/src/routes/query/query.jsx +++ b/askomics/react/src/routes/query/query.jsx @@ -605,7 +605,8 @@ export default class Query extends Component { selected: false, suggested: true, directed: true, - faldoFilters: this.defaultFaldoFilters + faldoFilters: this.defaultFaldoFilters, + indirect: relation.indirect }) incrementSpecialNodeGroupId ? specialNodeGroupId += 1 : specialNodeGroupId = specialNodeGroupId if (incrementSpecialNodeGroupId){ @@ -654,7 +655,8 @@ export default class Query extends Component { selected: false, suggested: true, directed: true, - faldoFilters: this.defaultFaldoFilters + faldoFilters: this.defaultFaldoFilters, + indirect: relation.indirect }) incrementSpecialNodeGroupId ? specialNodeGroupId += 1 : specialNodeGroupId = specialNodeGroupId if (incrementSpecialNodeGroupId){ @@ -702,7 +704,8 @@ export default class Query extends Component { selected: false, suggested: true, directed: true, - faldoFilters: this.defaultFaldoFilters + faldoFilters: this.defaultFaldoFilters, + indirect: false }) incrementSpecialNodeGroupId ? specialNodeGroupId += 1 : specialNodeGroupId = specialNodeGroupId if (incrementSpecialNodeGroupId){ @@ -744,7 +747,8 @@ export default class Query extends Component { selected: false, suggested: false, directed: link.directed, - faldoFilters: link.faldoFilters ? link.faldoFilters : this.defaultFaldoFilters + faldoFilters: link.faldoFilters ? link.faldoFilters : this.defaultFaldoFilters, + indirect: link.indirect ? link.indirect : false } } @@ -764,7 +768,8 @@ export default class Query extends Component { selected: false, suggested: false, directed: link.directed, - faldoFilters: link.faldoFilters ? link.faldoFilters : this.defaultFaldoFilters + faldoFilters: link.faldoFilters ? link.faldoFilters : this.defaultFaldoFilters, + indirect: link.indirect ? link.indirect : false } } }) @@ -1051,7 +1056,8 @@ export default class Query extends Component { selected: link.selected, suggested: link.suggested, directed: link.directed, - faldoFilters: link.faldoFilters + faldoFilters: link.faldoFilters, + indirect: link.indirect } } }) @@ -1071,7 +1077,8 @@ export default class Query extends Component { target: node2.id, selected: false, suggested: false, - directed: false + directed: false, + indirect: false } this.graphState.links.push(link) } @@ -1646,6 +1653,9 @@ export default class Query extends Component { if (!link.faldoFilters) { link.faldoFilters = this.defaultFaldoFilters } + if (!link.indirect){ + link.indirect = false + } }) this.graphState.nodes.map(node => { if (!node.depth) { diff --git a/askomics/react/src/routes/query/visualization.jsx b/askomics/react/src/routes/query/visualization.jsx index c2a62360..68bae8bf 100644 --- a/askomics/react/src/routes/query/visualization.jsx +++ b/askomics/react/src/routes/query/visualization.jsx @@ -30,6 +30,7 @@ export default class Visualization extends Component { this.colorDarkGrey = '#404040' this.colorFirebrick = '#cc0000' this.colorGreen = '#005500FF' + this.colorBlue = "#343aeb" this.lineWidth = 0.5 this.nodeSize = 3 this.blankNodeSize = 1 @@ -168,9 +169,14 @@ export default class Visualization extends Component { // link style link.suggested ? ctx.setLineDash([this.lineWidth, this.lineWidth]) : ctx.setLineDash([]) - let greenArray = ["included_in", "overlap_with"] + let greenArray = ["included_in", "overlap_with", "distance_from"] let unselectedColor = greenArray.indexOf(link.uri) >= 0 || link.type == "ontoLink" ? this.colorGreen : this.colorGrey let unselectedColorText = greenArray.indexOf(link.uri) >= 0 ? this.colorGreen : this.colorDarkGrey + if (link.indirect){ + unselectedColor = this.colorBlue + unselectedColorText = this.colorBlue + } + ctx.strokeStyle = link.selected ? this.colorFirebrick : unselectedColor ctx.fillStyle = link.selected ? this.colorFirebrick : greenArray.indexOf(link.uri) >= 0 ? this.colorGreen : this.colorGrey diff --git a/docs/abstraction.md b/docs/abstraction.md index 208ff7ad..4bdb23ca 100644 --- a/docs/abstraction.md +++ b/docs/abstraction.md @@ -1,9 +1,9 @@ -During integration of TSV/CSV, GFF and BED files, AskOmics create RDF triples that describe the data. This set of triple are called *Abstraction*. *Abstraction* is a set of RDF triples who describes the data. This triples define *Entities*, *Attributes* and *Relations*. Abstraction is used to build the *Query builder*. +During integration of TSV/CSV, GFF and BED files, AskOmics create RDF triples that describe the data. This set of triple are called *Abstraction*. These triples define *Entities*, *Attributes* and *Relations*. The abstraction is used to build the *Query builder*. -Raw RDF can be integrated into AskOmics. In this case, abstraction have to be built manually. The following documentation explain how to write manually write an AskOmics abstraction in turtle format. +Raw RDF can be integrated into AskOmics. In this case, the abstraction have to be built manually. The following documentation explain how to write manually write an AskOmics abstraction in turtle format. !!! warning - Starting from 4.4, attributes & relations are defined using blank nodes, to avoid overriding information + Starting from 4.4, attributes & relations are defined using blank nodes, to avoid overriding information. They are linked to the correct node using askomics:uri # Namespaces @@ -85,7 +85,6 @@ _:blank rdfs:domain :EntityName . _:blank rdfs:range :category_attributeCategory . _:blank askomics:uri :category_attribute_uri - :category_attributeCategory askomics:category :value_1 . :category_attributeCategory askomics:category :value_2 . @@ -100,7 +99,7 @@ _:blank askomics:uri :category_attribute_uri [FALDO](https://bioportal.bioontology.org/ontologies/FALDO) is a simple ontology to describe sequence feature positions and regions. AskOmics can use FALDO to describe this kind of entities. GFF, BED and some CSV/TSV are converted with FALDO. -A FALDO entity have to be declared as FALDO on the abstraction. If attribute are decribed as FALDO in the abstractio, The data triples have to use FALDO to describe the data. +A FALDO entity have to be declared as FALDO on the abstraction. If attribute are described as FALDO in the abstraction, the data triples have to use FALDO to describe the data. ```turtle :FaldoEntity rdf:type askomics:entity . @@ -112,6 +111,19 @@ A FALDO entity have to be declared as FALDO on the abstraction. If attribute are Four FALDO attributes are supported by AskOmics: reference, strand, start and end. +!!! warning + AskOmics expect faldo entities to follow the faldo ontology for triple definition. Ex: + ```turtle + # Reference + :Entity faldo:location/faldo:begin/faldo:reference "value" + # strand + :Entity faldo:location/faldo:begin/rdf:type "value" + # Start + :Entity faldo:location/faldo:begin/faldo:position "value" + # Stop. + :Entity faldo:location/faldo:end/faldo:position "value" + ``` + ### faldo:reference A faldo:reference attribute derive from a Category attribute. @@ -124,6 +136,17 @@ _:blank rdfs:label "reference_attribute" . _:blank rdfs:domain :EntityName . _:blank rdfs:range :reference_attributeCategory. _:blank askomics:uri :reference_attribute + + +:reference_attributeCategory askomics:category :value_1 . +:reference_attributeCategory askomics:category :value_2 . + +:value_1 rdf:type :reference_attributeCategoryValue . +:value_1 rdfs:label "value_1" . + +:value_2 rdf:type :reference_attributeCategoryValue . +:value_2 rdfs:label "value_2" . + ``` ### faldo:strand @@ -136,10 +159,22 @@ _:blank rdf:type askomics:AskomicsCategory . _:blank rdf:type owl:ObjectProperty . _:blank rdfs:label "strand_attribute" . _:blank rdfs:domain :EntityName . -_:blank rdfs:range :strand_attributeCategory. +_:blank rdfs:range :strand_attributeCategory . _:blank askomics:uri :strand_attribute + +:strand_attributeCategory askomics:category faldo:ForwardStrandPosition . +:strand_attributeCategory askomics:category faldo:ReverseStrandPosition . + +faldo:ForwardStrandPosition rdf:type :strand_attributeCategoryValue . +faldo:ForwardStrandPosition rdfs:label "+" . + +faldo:ReverseStrandPosition rdf:type :strand_attributeCategoryValue . +faldo:ReverseStrandPosition rdfs:label "-" . ``` +!!! note "Info" + For homogeneity with GFF and BED integration, it's better to use '+', '-' or '.' as the strand label. + ### faldo:start and faldo:end faldo:start and faldo:end are numeric attributes. @@ -162,6 +197,37 @@ _:blank rdfs:range xsd:decimal . _:blank askomics:uri :end_attribute ``` +### *Shortcut* faldo triples + +The default faldo ontology uses a chain of triple to describe the position (ex, faldo:location/faldo:begin/faldo:position). +This make *faldo queries* (included_in/overlap_with/distant_from) extremely slow. To improve query time, AskOmics can use 'shortcut triples', direct relations between the Entity and the reference/strand, to quickly filter entities on the same reference/strand/both. For example: + +```turtle +:EntityName askomics:faldoReference reference_uri . +:EntityName askomics:faldoBegin begin_value . +:EntityName askomics:faldoEnd end_value . +:EntityName askomics:faldoStrand strand_uri . +:EntityName askomics:referenceStrand reference_strand_uri . +``` + +To improve query times further, AskOmics will break down the entity genomic position in blocks (block size if defined in the configuration file). +This improve query time by filtering all entities having 'common blocks'. Each entity will span at least two blocks. Additional blocks will be created to include the reference and the strand. +For instance: + +```turtle +:EntityName askomics:includeIn block1_uri . +:EntityName askomics:includeIn block2_uri . +:EntityName askomics:includeInReference block1_reference_uri . +:EntityName askomics:includeInReference block2_reference_uri . +:EntityName askomics:includeInReferenceStrand block1_reference_strand_uri . +:EntityName askomics:includeInReferenceStrand block2_reference_strand_uri . +:EntityName askomics:includeInStrand block1_strand_uri . +:EntityName askomics:includeInStrand block1_strand_uri . +``` + +!!! note "Info" + When using 'BothStrand', make sur to add 'ForwardStrandPosition' and 'ReverseStrandPosition' to these additional triples, or they won't be matched on the 'same strand' query. + # Relations Entities are linked between them with relations. Relations are displayed with arrows between nodes on the query builder. The following turtle explain how relations are described. To avoid overwriting information, relations are described using a blank node. The relation `:RelationExample`, linking `EntitySource` to `EntityTarget`, with the label *relation_example*, will be defined as follows: @@ -178,6 +244,10 @@ _:blank dcat:endpointURL . _:blank dcat:dataset . ``` +!!! note "Info" + If defining an 'indirect relation', you can add a `_:blank askomics:isIndirectRelation true` triple. + + # Federation To describe a remote dataset, you can either fill out the "Distant endpoint" and optionally the "Distant graph" fields when integrating an RDF dataset, or you could add description triples in your dataset, as follows: diff --git a/tests/results/abstraction.json b/tests/results/abstraction.json index cd05e8cb..c29f2383 100644 --- a/tests/results/abstraction.json +++ b/tests/results/abstraction.json @@ -575,6 +575,7 @@ "graphs": [ "urn:sparql:askomics_test:1_jdoe:gene.gff3_###GFF_TIMESTAMP###" ], + "indirect": false, "label": "Parent", "source": "http://askomics.org/test/data/transcript", "target": "http://askomics.org/test/data/gene",