From f978a92f8d60760fcb038d1e6f6d4af5fe7e67e5 Mon Sep 17 00:00:00 2001 From: mboudet Date: Mon, 7 Aug 2023 11:04:51 +0200 Subject: [PATCH 01/15] Test indirect relations --- askomics/api/file.py | 2 +- askomics/libaskomics/CsvFile.py | 5 ++++- askomics/libaskomics/SparqlQuery.py | 3 ++- askomics/libaskomics/TriplestoreExplorer.py | 6 ++++-- askomics/react/src/routes/integration/csvtable.jsx | 1 + 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/askomics/api/file.py b/askomics/api/file.py index 6a92ebb3..69cd82e4 100644 --- a/askomics/api/file.py +++ b/askomics/api/file.py @@ -410,7 +410,7 @@ def get_column_types(): types: list of available column types """ - data = ["numeric", "text", "category", "boolean", "date", "reference", "strand", "start", "end", "general_relation", "symetric_relation", "label"] + data = ["numeric", "text", "category", "boolean", "date", "reference", "strand", "start", "end", "general_relation", "symetric_relation", "indirect_relation", "label"] return jsonify({ "types": data diff --git a/askomics/libaskomics/CsvFile.py b/askomics/libaskomics/CsvFile.py index 6e7450cb..5aba0fca 100644 --- a/askomics/libaskomics/CsvFile.py +++ b/askomics/libaskomics/CsvFile.py @@ -416,8 +416,9 @@ def set_rdf_abstraction(self): blank = BNode() # Relation - if self.columns_type[index] in ('general_relation', 'symetric_relation'): + if self.columns_type[index] in ('general_relation', 'symetric_relation', 'indirect_relation'): symetric_relation = True if self.columns_type[index] == 'symetric_relation' else False + indirect_relation = True if self.columns_type[index] == 'indirect_relation' else False splitted = attribute_name.split('@') attribute = self.rdfize(splitted[0]) @@ -439,6 +440,8 @@ def set_rdf_abstraction(self): if symetric_relation: self.graph_abstraction_dk.add((blank, rdflib.RDFS.domain, rdf_range)) self.graph_abstraction_dk.add((blank, rdflib.RDFS.range, entity)) + if indirect_relation: + self.graph_abstraction_dk.add((blank, self.namespace_internal["isIndirectRelation"], True)) continue diff --git a/askomics/libaskomics/SparqlQuery.py b/askomics/libaskomics/SparqlQuery.py index fbae2b45..52e4dc76 100644 --- a/askomics/libaskomics/SparqlQuery.py +++ b/askomics/libaskomics/SparqlQuery.py @@ -1330,7 +1330,8 @@ def build_query_from_json(self, preview=False, for_editor=False): "optional": False } - self.store_triple(triple, block_id, sblock_id, pblock_ids, depth) + if not link.get('indirect', False): + self.store_triple(triple, block_id, sblock_id, pblock_ids, depth) # Store linked attributes for attribute in self.json["attr"]: diff --git a/askomics/libaskomics/TriplestoreExplorer.py b/askomics/libaskomics/TriplestoreExplorer.py index ced8a54e..b62b5131 100644 --- a/askomics/libaskomics/TriplestoreExplorer.py +++ b/askomics/libaskomics/TriplestoreExplorer.py @@ -519,7 +519,7 @@ def get_abstraction_relations(self, single_tenant=False): query_builder = SparqlQuery(self.app, self.session) query = ''' - SELECT DISTINCT ?graph ?entity_uri ?entity_faldo ?entity_label ?attribute_uri ?attribute_faldo ?attribute_label ?attribute_range ?property_uri ?property_faldo ?property_label ?range_uri ?category_value_uri ?category_value_label + SELECT DISTINCT ?graph ?entity_uri ?entity_faldo ?entity_label ?attribute_uri ?attribute_faldo ?attribute_label ?attribute_range ?property_uri ?property_faldo ?property_label ?range_uri ?category_value_uri ?category_value_label ?indirect_relation WHERE {{ # Graphs ?graph askomics:public ?public . @@ -530,6 +530,7 @@ def get_abstraction_relations(self, single_tenant=False): ?node a askomics:AskomicsRelation . ?node rdfs:label ?property_label . ?node rdfs:range ?range_uri . + OPTIONAL {{ ?node askomics:isIndirectRelation ?indirect_relation . }} # Retrocompatibility OPTIONAL {{?node askomics:uri ?new_property_uri}} BIND( IF(isBlank(?node), ?new_property_uri, ?node) as ?property_uri) @@ -563,7 +564,8 @@ def get_abstraction_relations(self, single_tenant=False): "label": result["property_label"], "graphs": [result["graph"], ], "source": result["entity_uri"], - "target": result["range_uri"] + "target": result["range_uri"], + "indirect": result.get("indirect_relation", False) } relations.append(relation) else: diff --git a/askomics/react/src/routes/integration/csvtable.jsx b/askomics/react/src/routes/integration/csvtable.jsx index dddd55ba..cade2eb7 100644 --- a/askomics/react/src/routes/integration/csvtable.jsx +++ b/askomics/react/src/routes/integration/csvtable.jsx @@ -129,6 +129,7 @@ export default class CsvTable extends Component { + {ontoInput} From 950a9bd0eb49dc8ebbacf4da5fc2a314dad245cc Mon Sep 17 00:00:00 2001 From: mboudet Date: Mon, 7 Aug 2023 15:26:43 +0200 Subject: [PATCH 02/15] Fix test & add indirect to link --- askomics/react/src/routes/query/query.jsx | 21 ++++++++++++++------- tests/results/abstraction.json | 1 + 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/askomics/react/src/routes/query/query.jsx b/askomics/react/src/routes/query/query.jsx index b2263471..ef4f1f55 100644 --- a/askomics/react/src/routes/query/query.jsx +++ b/askomics/react/src/routes/query/query.jsx @@ -605,7 +605,8 @@ export default class Query extends Component { selected: false, suggested: true, directed: true, - faldoFilters: this.defaultFaldoFilters + faldoFilters: this.defaultFaldoFilters, + indirect: relation.indirect }) incrementSpecialNodeGroupId ? specialNodeGroupId += 1 : specialNodeGroupId = specialNodeGroupId if (incrementSpecialNodeGroupId){ @@ -654,7 +655,8 @@ export default class Query extends Component { selected: false, suggested: true, directed: true, - faldoFilters: this.defaultFaldoFilters + faldoFilters: this.defaultFaldoFilters, + indirect: relation.indirect }) incrementSpecialNodeGroupId ? specialNodeGroupId += 1 : specialNodeGroupId = specialNodeGroupId if (incrementSpecialNodeGroupId){ @@ -702,7 +704,8 @@ export default class Query extends Component { selected: false, suggested: true, directed: true, - faldoFilters: this.defaultFaldoFilters + faldoFilters: this.defaultFaldoFilters, + indirect: relation.indirect }) incrementSpecialNodeGroupId ? specialNodeGroupId += 1 : specialNodeGroupId = specialNodeGroupId if (incrementSpecialNodeGroupId){ @@ -744,7 +747,8 @@ export default class Query extends Component { selected: false, suggested: false, directed: link.directed, - faldoFilters: link.faldoFilters ? link.faldoFilters : this.defaultFaldoFilters + faldoFilters: link.faldoFilters ? link.faldoFilters : this.defaultFaldoFilters, + indirect: link.indirect ? link.indirect : false } } @@ -764,7 +768,8 @@ export default class Query extends Component { selected: false, suggested: false, directed: link.directed, - faldoFilters: link.faldoFilters ? link.faldoFilters : this.defaultFaldoFilters + faldoFilters: link.faldoFilters ? link.faldoFilters : this.defaultFaldoFilters, + indirect: link.indirect ? link.indirect : false } } }) @@ -1051,7 +1056,8 @@ export default class Query extends Component { selected: link.selected, suggested: link.suggested, directed: link.directed, - faldoFilters: link.faldoFilters + faldoFilters: link.faldoFilters, + indirect: link.indirect } } }) @@ -1071,7 +1077,8 @@ export default class Query extends Component { target: node2.id, selected: false, suggested: false, - directed: false + directed: false, + indirect: false } this.graphState.links.push(link) } diff --git a/tests/results/abstraction.json b/tests/results/abstraction.json index cd05e8cb..c29f2383 100644 --- a/tests/results/abstraction.json +++ b/tests/results/abstraction.json @@ -575,6 +575,7 @@ "graphs": [ "urn:sparql:askomics_test:1_jdoe:gene.gff3_###GFF_TIMESTAMP###" ], + "indirect": false, "label": "Parent", "source": "http://askomics.org/test/data/transcript", "target": "http://askomics.org/test/data/gene", From 24c4cf053329f4b45d89abea6f07218241ba79e3 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 7 Aug 2023 14:58:32 +0000 Subject: [PATCH 03/15] some fixes --- askomics/libaskomics/CsvFile.py | 6 +++++- askomics/react/src/routes/integration/csvtable.jsx | 1 + askomics/react/src/routes/query/query.jsx | 2 +- askomics/react/src/routes/query/visualization.jsx | 8 +++++++- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/askomics/libaskomics/CsvFile.py b/askomics/libaskomics/CsvFile.py index 5aba0fca..fb69f566 100644 --- a/askomics/libaskomics/CsvFile.py +++ b/askomics/libaskomics/CsvFile.py @@ -441,7 +441,7 @@ def set_rdf_abstraction(self): self.graph_abstraction_dk.add((blank, rdflib.RDFS.domain, rdf_range)) self.graph_abstraction_dk.add((blank, rdflib.RDFS.range, entity)) if indirect_relation: - self.graph_abstraction_dk.add((blank, self.namespace_internal["isIndirectRelation"], True)) + self.graph_abstraction_dk.add((blank, self.namespace_internal["isIndirectRelation"], rdflib.Literal("true", datatype=rdflib.XSD.boolean))) continue @@ -600,6 +600,10 @@ def generate_rdf_content(self): if current_type == "label" and column_number == 1: continue + # We ignore all data for indirect relations + if current_type == "indirect_relation": + continue + # Skip entity and blank cells if column_number == 0 or (not cell and not current_type == "strand"): continue diff --git a/askomics/react/src/routes/integration/csvtable.jsx b/askomics/react/src/routes/integration/csvtable.jsx index cade2eb7..9e4270fa 100644 --- a/askomics/react/src/routes/integration/csvtable.jsx +++ b/askomics/react/src/routes/integration/csvtable.jsx @@ -159,6 +159,7 @@ export default class CsvTable extends Component { + {ontoInput} diff --git a/askomics/react/src/routes/query/query.jsx b/askomics/react/src/routes/query/query.jsx index ef4f1f55..f5b96513 100644 --- a/askomics/react/src/routes/query/query.jsx +++ b/askomics/react/src/routes/query/query.jsx @@ -705,7 +705,7 @@ export default class Query extends Component { suggested: true, directed: true, faldoFilters: this.defaultFaldoFilters, - indirect: relation.indirect + indirect: false }) incrementSpecialNodeGroupId ? specialNodeGroupId += 1 : specialNodeGroupId = specialNodeGroupId if (incrementSpecialNodeGroupId){ diff --git a/askomics/react/src/routes/query/visualization.jsx b/askomics/react/src/routes/query/visualization.jsx index c2a62360..68bae8bf 100644 --- a/askomics/react/src/routes/query/visualization.jsx +++ b/askomics/react/src/routes/query/visualization.jsx @@ -30,6 +30,7 @@ export default class Visualization extends Component { this.colorDarkGrey = '#404040' this.colorFirebrick = '#cc0000' this.colorGreen = '#005500FF' + this.colorBlue = "#343aeb" this.lineWidth = 0.5 this.nodeSize = 3 this.blankNodeSize = 1 @@ -168,9 +169,14 @@ export default class Visualization extends Component { // link style link.suggested ? ctx.setLineDash([this.lineWidth, this.lineWidth]) : ctx.setLineDash([]) - let greenArray = ["included_in", "overlap_with"] + let greenArray = ["included_in", "overlap_with", "distance_from"] let unselectedColor = greenArray.indexOf(link.uri) >= 0 || link.type == "ontoLink" ? this.colorGreen : this.colorGrey let unselectedColorText = greenArray.indexOf(link.uri) >= 0 ? this.colorGreen : this.colorDarkGrey + if (link.indirect){ + unselectedColor = this.colorBlue + unselectedColorText = this.colorBlue + } + ctx.strokeStyle = link.selected ? this.colorFirebrick : unselectedColor ctx.fillStyle = link.selected ? this.colorFirebrick : greenArray.indexOf(link.uri) >= 0 ? this.colorGreen : this.colorGrey From 8a4539a762d0e086535e65e97ff15473ed61c879 Mon Sep 17 00:00:00 2001 From: mboudet Date: Mon, 7 Aug 2023 17:10:20 +0200 Subject: [PATCH 04/15] Starting to update the documentation --- CHANGELOG.md | 1 + askomics/react/src/routes/query/query.jsx | 3 +++ docs/abstraction.md | 6 +++++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba6b771e..8e34d082 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ This changelog was started for release 4.2.0. - Added 'custom distance' option for faldo relation (instead of just 'included_in' and 'overlap_with') - Store 'version' value when storing results. Not used for now, but might be used in deprecation warnings later - Removed some lines from coverage computation +- Added 'indirect relations': Two entities will be linked on the graph, without a direct relation between them (same as faldo relations). This is intended to be used with 'linked attributes' (ex: get all entities B where B.color is the same as A.color, without a direct relation between A and B) ### Fixed diff --git a/askomics/react/src/routes/query/query.jsx b/askomics/react/src/routes/query/query.jsx index f5b96513..e28ab09f 100644 --- a/askomics/react/src/routes/query/query.jsx +++ b/askomics/react/src/routes/query/query.jsx @@ -1653,6 +1653,9 @@ export default class Query extends Component { if (!link.faldoFilters) { link.faldoFilters = this.defaultFaldoFilters } + if (!link.indirect){ + link.indirect = false + } }) this.graphState.nodes.map(node => { if (!node.depth) { diff --git a/docs/abstraction.md b/docs/abstraction.md index 208ff7ad..37d41a30 100644 --- a/docs/abstraction.md +++ b/docs/abstraction.md @@ -100,7 +100,7 @@ _:blank askomics:uri :category_attribute_uri [FALDO](https://bioportal.bioontology.org/ontologies/FALDO) is a simple ontology to describe sequence feature positions and regions. AskOmics can use FALDO to describe this kind of entities. GFF, BED and some CSV/TSV are converted with FALDO. -A FALDO entity have to be declared as FALDO on the abstraction. If attribute are decribed as FALDO in the abstractio, The data triples have to use FALDO to describe the data. +A FALDO entity have to be declared as FALDO on the abstraction. If attribute are described as FALDO in the abstraction, The data triples have to use FALDO to describe the data. ```turtle :FaldoEntity rdf:type askomics:entity . @@ -178,6 +178,10 @@ _:blank dcat:endpointURL . _:blank dcat:dataset . ``` +!!! note "Info" + If defining an 'indirect relation', you can add a `_:blank askomics:isIndirectRelation true` triple. + + # Federation To describe a remote dataset, you can either fill out the "Distant endpoint" and optionally the "Distant graph" fields when integrating an RDF dataset, or you could add description triples in your dataset, as follows: From 813f9b0350a436a62170fb7bd31c8ebb2bf7dd24 Mon Sep 17 00:00:00 2001 From: mboudet Date: Tue, 8 Aug 2023 15:38:30 +0200 Subject: [PATCH 05/15] Testing some stuff for faldo relations --- askomics/libaskomics/BedFile.py | 36 +++++++++------------------ askomics/libaskomics/CsvFile.py | 23 +++++------------ askomics/libaskomics/GffFile.py | 24 +++++------------- askomics/libaskomics/SparqlQuery.py | 6 ++--- docs/abstraction.md | 38 ++++++++++++++++++++++++----- 5 files changed, 59 insertions(+), 68 deletions(-) diff --git a/askomics/libaskomics/BedFile.py b/askomics/libaskomics/BedFile.py index dc4d6512..2aeb907f 100644 --- a/askomics/libaskomics/BedFile.py +++ b/askomics/libaskomics/BedFile.py @@ -195,7 +195,7 @@ def generate_rdf_content(self): attribute = rdflib.Literal(self.convert_type(feature.start + 1)) # +1 because bed is 0 based faldo_start = attribute self.faldo_abstraction["start"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) if "start" not in attribute_list: attribute_list.append("start") @@ -212,7 +212,7 @@ def generate_rdf_content(self): attribute = rdflib.Literal(self.convert_type(feature.end)) faldo_end = attribute self.faldo_abstraction["end"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) if "end" not in attribute_list: attribute_list.append("end") @@ -233,7 +233,7 @@ def generate_rdf_content(self): attribute = self.namespace_data[self.format_uri("+")] faldo_strand = self.get_faldo_strand("+") self.faldo_abstraction["strand"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) strand = True strand_type = "+" elif feature.strand == "-": @@ -242,7 +242,7 @@ def generate_rdf_content(self): attribute = self.namespace_data[self.format_uri("-")] faldo_strand = self.get_faldo_strand("-") self.faldo_abstraction["strand"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) strand = True strand_type = "-" else: @@ -251,7 +251,7 @@ def generate_rdf_content(self): attribute = self.namespace_data[self.format_uri(".")] faldo_strand = self.get_faldo_strand(".") self.faldo_abstraction["strand"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) strand = True strand_type = "." @@ -271,7 +271,7 @@ def generate_rdf_content(self): if feature.score != '.': relation = self.namespace_data[self.format_uri("score")] attribute = rdflib.Literal(self.convert_type(feature.score)) - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) if "score" not in attribute_list: attribute_list.append("score") @@ -283,28 +283,16 @@ def generate_rdf_content(self): "range": rdflib.XSD.decimal }) - location = BNode() - begin = BNode() - end = BNode() - - self.graph_chunk.add((entity, self.faldo.location, location)) - - self.graph_chunk.add((location, rdflib.RDF.type, self.faldo.region)) - self.graph_chunk.add((location, self.faldo.begin, begin)) - self.graph_chunk.add((location, self.faldo.end, end)) - - self.graph_chunk.add((begin, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((begin, self.faldo.position, faldo_start)) + self.graph_chunk.add((entity, rdflib.RDF.type, self.faldo.ExactPosition)) + self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) - self.graph_chunk.add((end, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((end, self.faldo.position, faldo_end)) + self.graph_chunk.add((entity, rdflib.RDF.type, self.faldo.ExactPosition)) + self.graph_chunk.add((entity, self.faldo.end, faldo_end)) - self.graph_chunk.add((begin, self.faldo.reference, faldo_reference)) - self.graph_chunk.add((end, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) if faldo_strand: - self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand)) - self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand)) + self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) # blocks block_base = self.settings.getint("triplestore", "block_size") diff --git a/askomics/libaskomics/CsvFile.py b/askomics/libaskomics/CsvFile.py index fb69f566..f0619a8b 100644 --- a/askomics/libaskomics/CsvFile.py +++ b/askomics/libaskomics/CsvFile.py @@ -682,29 +682,18 @@ def generate_rdf_content(self): self.graph_chunk.add((attribute, relation, entity)) if self.faldo_entity and faldo_start and faldo_end: - location = BNode() - begin_node = BNode() - end_node = BNode() - self.graph_chunk.add((entity, self.faldo.location, location)) + self.graph_chunk.add((entity, rdflib.RDF.type, self.faldo.ExactPosition)) + self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) - self.graph_chunk.add((location, rdflib.RDF.type, self.faldo.region)) - self.graph_chunk.add((location, self.faldo.begin, begin_node)) - self.graph_chunk.add((location, self.faldo.end, end_node)) - - self.graph_chunk.add((begin_node, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((begin_node, self.faldo.position, faldo_start)) - - self.graph_chunk.add((end_node, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((end_node, self.faldo.position, faldo_end)) + self.graph_chunk.add((entity, rdflib.RDF.type, self.faldo.ExactPosition)) + self.graph_chunk.add((entity, self.faldo.end, faldo_end)) if faldo_reference: - self.graph_chunk.add((begin_node, self.faldo.reference, faldo_reference)) - self.graph_chunk.add((end_node, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) if faldo_strand: - self.graph_chunk.add((begin_node, rdflib.RDF.type, faldo_strand)) - self.graph_chunk.add((end_node, rdflib.RDF.type, faldo_strand)) + self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) # blocks block_base = self.settings.getint("triplestore", "block_size") diff --git a/askomics/libaskomics/GffFile.py b/askomics/libaskomics/GffFile.py index d6ba863a..887fb7a9 100644 --- a/askomics/libaskomics/GffFile.py +++ b/askomics/libaskomics/GffFile.py @@ -388,28 +388,16 @@ def generate_rdf_content(self): self.graph_chunk.add((entity, relation, attribute)) - location = BNode() - begin = BNode() - end = BNode() + self.graph_chunk.add((entity, rdflib.RDF.type, self.faldo.ExactPosition)) + self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) - self.graph_chunk.add((entity, self.faldo.location, location)) + self.graph_chunk.add((entity, rdflib.RDF.type, self.faldo.ExactPosition)) + self.graph_chunk.add((entity, self.faldo.end, faldo_end)) - self.graph_chunk.add((location, rdflib.RDF.type, self.faldo.region)) - self.graph_chunk.add((location, self.faldo.begin, begin)) - self.graph_chunk.add((location, self.faldo.end, end)) - - self.graph_chunk.add((begin, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((begin, self.faldo.position, faldo_start)) - - self.graph_chunk.add((end, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((end, self.faldo.position, faldo_end)) - - self.graph_chunk.add((begin, self.faldo.reference, faldo_reference)) - self.graph_chunk.add((end, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) if faldo_strand: - self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand)) - self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand)) + self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) # blocks block_base = self.settings.getint("triplestore", "block_size") diff --git a/askomics/libaskomics/SparqlQuery.py b/askomics/libaskomics/SparqlQuery.py index 52e4dc76..12c4e62a 100644 --- a/askomics/libaskomics/SparqlQuery.py +++ b/askomics/libaskomics/SparqlQuery.py @@ -1486,7 +1486,7 @@ def build_query_from_json(self, preview=False, for_editor=False): if attribute["visible"] or Utils.check_key_in_list_of_dict(attribute["filters"], "filterValue") or attribute["id"] in start_end or attribute["id"] in linked_attributes: subject = self.format_sparql_variable("{}{}_uri".format(attribute["entityLabel"], attribute["nodeId"])) if attribute["faldo"]: - predicate = "faldo:location/faldo:{}/faldo:position".format("begin" if attribute["faldo"].endswith("faldoStart") else "end") + predicate = "faldo:{}".format("begin" if attribute["faldo"].endswith("faldoStart") else "end") else: predicate = "<{}>".format(attribute["uri"]) obj = self.format_sparql_variable("{}{}_{}".format(attribute["entityLabel"], attribute["nodeId"], attribute["label"])) @@ -1571,7 +1571,7 @@ def build_query_from_json(self, preview=False, for_editor=False): category_value_uri = self.format_sparql_variable("{}{}_{}Category".format(attribute["entityLabel"], attribute["nodeId"], attribute["label"])) category_label = self.format_sparql_variable("{}{}_{}".format(attribute["entityLabel"], attribute["humanNodeId"], attribute["label"])) if attribute["faldo"] and attribute["faldo"].endswith("faldoReference"): - category_name = 'faldo:location/faldo:begin/faldo:reference' + category_name = 'faldo:reference' self.store_triple({ "subject": node_uri, "predicate": category_name, @@ -1588,7 +1588,7 @@ def build_query_from_json(self, preview=False, for_editor=False): "nested_end": True if attribute["optional"] else False }, block_id, sblock_id, pblock_ids, depth) elif attribute["faldo"] and attribute["faldo"].endswith("faldoStrand"): - category_name = 'faldo:location/faldo:begin/rdf:type' + category_name = 'faldo:strand' self.store_triple({ "subject": node_uri, "predicate": category_name, diff --git a/docs/abstraction.md b/docs/abstraction.md index 37d41a30..a1879687 100644 --- a/docs/abstraction.md +++ b/docs/abstraction.md @@ -1,9 +1,9 @@ -During integration of TSV/CSV, GFF and BED files, AskOmics create RDF triples that describe the data. This set of triple are called *Abstraction*. *Abstraction* is a set of RDF triples who describes the data. This triples define *Entities*, *Attributes* and *Relations*. Abstraction is used to build the *Query builder*. +During integration of TSV/CSV, GFF and BED files, AskOmics create RDF triples that describe the data. This set of triple are called *Abstraction*. These triples define *Entities*, *Attributes* and *Relations*. The abstraction is used to build the *Query builder*. -Raw RDF can be integrated into AskOmics. In this case, abstraction have to be built manually. The following documentation explain how to write manually write an AskOmics abstraction in turtle format. +Raw RDF can be integrated into AskOmics. In this case, the abstraction have to be built manually. The following documentation explain how to write manually write an AskOmics abstraction in turtle format. !!! warning - Starting from 4.4, attributes & relations are defined using blank nodes, to avoid overriding information + Starting from 4.4, attributes & relations are defined using blank nodes, to avoid overriding information. They are linked to the correct node using askomics:uri # Namespaces @@ -85,7 +85,6 @@ _:blank rdfs:domain :EntityName . _:blank rdfs:range :category_attributeCategory . _:blank askomics:uri :category_attribute_uri - :category_attributeCategory askomics:category :value_1 . :category_attributeCategory askomics:category :value_2 . @@ -100,7 +99,7 @@ _:blank askomics:uri :category_attribute_uri [FALDO](https://bioportal.bioontology.org/ontologies/FALDO) is a simple ontology to describe sequence feature positions and regions. AskOmics can use FALDO to describe this kind of entities. GFF, BED and some CSV/TSV are converted with FALDO. -A FALDO entity have to be declared as FALDO on the abstraction. If attribute are described as FALDO in the abstraction, The data triples have to use FALDO to describe the data. +A FALDO entity have to be declared as FALDO on the abstraction. If attribute are described as FALDO in the abstraction, the data triples have to use FALDO to describe the data. ```turtle :FaldoEntity rdf:type askomics:entity . @@ -110,6 +109,10 @@ A FALDO entity have to be declared as FALDO on the abstraction. If attribute are :FaldoEntity rdfs:label "FaldoEntity" . ``` +!!! warning + Faldo triples should use "faldo:begin", "faldo:end", "faldo:reference" or "faldo:strand" as their relation. + + Four FALDO attributes are supported by AskOmics: reference, strand, start and end. ### faldo:reference @@ -124,6 +127,17 @@ _:blank rdfs:label "reference_attribute" . _:blank rdfs:domain :EntityName . _:blank rdfs:range :reference_attributeCategory. _:blank askomics:uri :reference_attribute + + +:reference_attributeCategory askomics:category :value_1 . +:reference_attributeCategory askomics:category :value_2 . + +:value_1 rdf:type :reference_attributeCategoryValue . +:value_1 rdfs:label "value_1" . + +:value_2 rdf:type :reference_attributeCategoryValue . +:value_2 rdfs:label "value_2" . + ``` ### faldo:strand @@ -136,10 +150,22 @@ _:blank rdf:type askomics:AskomicsCategory . _:blank rdf:type owl:ObjectProperty . _:blank rdfs:label "strand_attribute" . _:blank rdfs:domain :EntityName . -_:blank rdfs:range :strand_attributeCategory. +_:blank rdfs:range :strand_attributeCategory . _:blank askomics:uri :strand_attribute + +:strand_attributeCategory askomics:category faldo:ForwardStrandPosition . +:strand_attributeCategory askomics:category faldo:ReverseStrandPosition . + +faldo:ForwardStrandPosition rdf:type :strand_attributeCategoryValue . +faldo:ForwardStrandPosition rdfs:label "+" . + +faldo:ReverseStrandPosition rdf:type :strand_attributeCategoryValue . +faldo:ReverseStrandPosition rdfs:label "-" . ``` +!!! note "Info" + For homogeneity with GFF and BED integration, it's better to use '+', '-' or '.' as the strand label. + ### faldo:start and faldo:end faldo:start and faldo:end are numeric attributes. From 4ac970f3ec0a04b54770d8f26b03a53ac6ef92dd Mon Sep 17 00:00:00 2001 From: mboudet Date: Tue, 8 Aug 2023 17:20:52 +0200 Subject: [PATCH 06/15] Actually keep the faldo stuff for compatibility, but add new triples for speed --- askomics/libaskomics/BedFile.py | 25 ++++++++++++++----- askomics/libaskomics/CsvFile.py | 13 ++++++++++ askomics/libaskomics/File.py | 5 ++++ askomics/libaskomics/GffFile.py | 13 ++++++++++ askomics/libaskomics/SparqlQuery.py | 4 +-- docs/abstraction.md | 38 ++++++++++++++++++++++++----- 6 files changed, 84 insertions(+), 14 deletions(-) diff --git a/askomics/libaskomics/BedFile.py b/askomics/libaskomics/BedFile.py index dc4d6512..7cd04eb6 100644 --- a/askomics/libaskomics/BedFile.py +++ b/askomics/libaskomics/BedFile.py @@ -172,7 +172,7 @@ def generate_rdf_content(self): attribute = self.namespace_data[self.format_uri(feature.chrom)] faldo_reference = attribute self.faldo_abstraction["reference"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) if "reference" not in attribute_list: attribute_list.append("reference") @@ -195,7 +195,7 @@ def generate_rdf_content(self): attribute = rdflib.Literal(self.convert_type(feature.start + 1)) # +1 because bed is 0 based faldo_start = attribute self.faldo_abstraction["start"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) if "start" not in attribute_list: attribute_list.append("start") @@ -212,7 +212,7 @@ def generate_rdf_content(self): attribute = rdflib.Literal(self.convert_type(feature.end)) faldo_end = attribute self.faldo_abstraction["end"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) if "end" not in attribute_list: attribute_list.append("end") @@ -233,7 +233,7 @@ def generate_rdf_content(self): attribute = self.namespace_data[self.format_uri("+")] faldo_strand = self.get_faldo_strand("+") self.faldo_abstraction["strand"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) strand = True strand_type = "+" elif feature.strand == "-": @@ -242,7 +242,7 @@ def generate_rdf_content(self): attribute = self.namespace_data[self.format_uri("-")] faldo_strand = self.get_faldo_strand("-") self.faldo_abstraction["strand"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) strand = True strand_type = "-" else: @@ -251,7 +251,7 @@ def generate_rdf_content(self): attribute = self.namespace_data[self.format_uri(".")] faldo_strand = self.get_faldo_strand(".") self.faldo_abstraction["strand"] = relation - self.graph_chunk.add((entity, relation, attribute)) + # self.graph_chunk.add((entity, relation, attribute)) strand = True strand_type = "." @@ -283,6 +283,8 @@ def generate_rdf_content(self): "range": rdflib.XSD.decimal }) + # Triples respecting faldo ontology + location = BNode() begin = BNode() end = BNode() @@ -306,6 +308,17 @@ def generate_rdf_content(self): self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand)) self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand)) + # Shortcut triple for faldo queries + self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) + self.graph_chunk.add((entity, self.faldo.end, faldo_end)) + self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) + + if faldo_strand: + self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) + strand_ref = self.get_reference_strand_uri(feature.chrom, faldo_strand, None) + for sref in strand_ref: + self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref)) + # blocks block_base = self.settings.getint("triplestore", "block_size") block_start = int(self.convert_type(feature.start + 1)) // block_base diff --git a/askomics/libaskomics/CsvFile.py b/askomics/libaskomics/CsvFile.py index fb69f566..0aadae8f 100644 --- a/askomics/libaskomics/CsvFile.py +++ b/askomics/libaskomics/CsvFile.py @@ -706,6 +706,19 @@ def generate_rdf_content(self): self.graph_chunk.add((begin_node, rdflib.RDF.type, faldo_strand)) self.graph_chunk.add((end_node, rdflib.RDF.type, faldo_strand)) + # Shortcut triple for faldo queries + self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) + self.graph_chunk.add((entity, self.faldo.end, faldo_end)) + if faldo_reference: + self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) + if faldo_strand: + strand_ref = self.get_reference_strand_uri(reference, faldo_strand, None) + for sref in strand_ref: + self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref)) + + if faldo_strand: + self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) + # blocks block_base = self.settings.getint("triplestore", "block_size") block_start = int(start) // block_base diff --git a/askomics/libaskomics/File.py b/askomics/libaskomics/File.py index 6bfda7d6..be02c14f 100644 --- a/askomics/libaskomics/File.py +++ b/askomics/libaskomics/File.py @@ -464,6 +464,11 @@ def get_reference_strand_uri(self, reference, strand, block): self.faldo.ReverseStrandPosition: "ReverseStrand", self.faldo.BothStrandPosition: "BothStrand" } + if not block: + if strand == self.faldo.BothStrandPosition: + return [self.rdfize(self.format_uri("{}_s{}s".format(reference, dstrand))) for dstrand in faldo_dict.values()] + return [self.rdfize(self.format_uri("{}_s{}".format(reference, faldo_dict[strand])))] + if strand == self.faldo.BothStrandPosition: return [self.rdfize(self.format_uri("{}_s{}_{}".format(reference, dstrand, block))) for dstrand in faldo_dict.values()] diff --git a/askomics/libaskomics/GffFile.py b/askomics/libaskomics/GffFile.py index d6ba863a..46cfa598 100644 --- a/askomics/libaskomics/GffFile.py +++ b/askomics/libaskomics/GffFile.py @@ -388,6 +388,8 @@ def generate_rdf_content(self): self.graph_chunk.add((entity, relation, attribute)) + # Triples respecting faldo ontology + location = BNode() begin = BNode() end = BNode() @@ -411,6 +413,17 @@ def generate_rdf_content(self): self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand)) self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand)) + # Shortcut triple for faldo queries + self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) + self.graph_chunk.add((entity, self.faldo.end, faldo_end)) + self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) + + if faldo_strand: + self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) + strand_ref = self.get_reference_strand_uri(rec.id, faldo_strand, None) + for sref in strand_ref: + self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref)) + # blocks block_base = self.settings.getint("triplestore", "block_size") block_start = int(self.convert_type(feature.location.start)) // block_base diff --git a/askomics/libaskomics/SparqlQuery.py b/askomics/libaskomics/SparqlQuery.py index 52e4dc76..20a15128 100644 --- a/askomics/libaskomics/SparqlQuery.py +++ b/askomics/libaskomics/SparqlQuery.py @@ -1571,7 +1571,7 @@ def build_query_from_json(self, preview=False, for_editor=False): category_value_uri = self.format_sparql_variable("{}{}_{}Category".format(attribute["entityLabel"], attribute["nodeId"], attribute["label"])) category_label = self.format_sparql_variable("{}{}_{}".format(attribute["entityLabel"], attribute["humanNodeId"], attribute["label"])) if attribute["faldo"] and attribute["faldo"].endswith("faldoReference"): - category_name = 'faldo:location/faldo:begin/faldo:reference' + category_name = 'faldo:reference' self.store_triple({ "subject": node_uri, "predicate": category_name, @@ -1588,7 +1588,7 @@ def build_query_from_json(self, preview=False, for_editor=False): "nested_end": True if attribute["optional"] else False }, block_id, sblock_id, pblock_ids, depth) elif attribute["faldo"] and attribute["faldo"].endswith("faldoStrand"): - category_name = 'faldo:location/faldo:begin/rdf:type' + category_name = 'faldo:strand' self.store_triple({ "subject": node_uri, "predicate": category_name, diff --git a/docs/abstraction.md b/docs/abstraction.md index 37d41a30..a1879687 100644 --- a/docs/abstraction.md +++ b/docs/abstraction.md @@ -1,9 +1,9 @@ -During integration of TSV/CSV, GFF and BED files, AskOmics create RDF triples that describe the data. This set of triple are called *Abstraction*. *Abstraction* is a set of RDF triples who describes the data. This triples define *Entities*, *Attributes* and *Relations*. Abstraction is used to build the *Query builder*. +During integration of TSV/CSV, GFF and BED files, AskOmics create RDF triples that describe the data. This set of triple are called *Abstraction*. These triples define *Entities*, *Attributes* and *Relations*. The abstraction is used to build the *Query builder*. -Raw RDF can be integrated into AskOmics. In this case, abstraction have to be built manually. The following documentation explain how to write manually write an AskOmics abstraction in turtle format. +Raw RDF can be integrated into AskOmics. In this case, the abstraction have to be built manually. The following documentation explain how to write manually write an AskOmics abstraction in turtle format. !!! warning - Starting from 4.4, attributes & relations are defined using blank nodes, to avoid overriding information + Starting from 4.4, attributes & relations are defined using blank nodes, to avoid overriding information. They are linked to the correct node using askomics:uri # Namespaces @@ -85,7 +85,6 @@ _:blank rdfs:domain :EntityName . _:blank rdfs:range :category_attributeCategory . _:blank askomics:uri :category_attribute_uri - :category_attributeCategory askomics:category :value_1 . :category_attributeCategory askomics:category :value_2 . @@ -100,7 +99,7 @@ _:blank askomics:uri :category_attribute_uri [FALDO](https://bioportal.bioontology.org/ontologies/FALDO) is a simple ontology to describe sequence feature positions and regions. AskOmics can use FALDO to describe this kind of entities. GFF, BED and some CSV/TSV are converted with FALDO. -A FALDO entity have to be declared as FALDO on the abstraction. If attribute are described as FALDO in the abstraction, The data triples have to use FALDO to describe the data. +A FALDO entity have to be declared as FALDO on the abstraction. If attribute are described as FALDO in the abstraction, the data triples have to use FALDO to describe the data. ```turtle :FaldoEntity rdf:type askomics:entity . @@ -110,6 +109,10 @@ A FALDO entity have to be declared as FALDO on the abstraction. If attribute are :FaldoEntity rdfs:label "FaldoEntity" . ``` +!!! warning + Faldo triples should use "faldo:begin", "faldo:end", "faldo:reference" or "faldo:strand" as their relation. + + Four FALDO attributes are supported by AskOmics: reference, strand, start and end. ### faldo:reference @@ -124,6 +127,17 @@ _:blank rdfs:label "reference_attribute" . _:blank rdfs:domain :EntityName . _:blank rdfs:range :reference_attributeCategory. _:blank askomics:uri :reference_attribute + + +:reference_attributeCategory askomics:category :value_1 . +:reference_attributeCategory askomics:category :value_2 . + +:value_1 rdf:type :reference_attributeCategoryValue . +:value_1 rdfs:label "value_1" . + +:value_2 rdf:type :reference_attributeCategoryValue . +:value_2 rdfs:label "value_2" . + ``` ### faldo:strand @@ -136,10 +150,22 @@ _:blank rdf:type askomics:AskomicsCategory . _:blank rdf:type owl:ObjectProperty . _:blank rdfs:label "strand_attribute" . _:blank rdfs:domain :EntityName . -_:blank rdfs:range :strand_attributeCategory. +_:blank rdfs:range :strand_attributeCategory . _:blank askomics:uri :strand_attribute + +:strand_attributeCategory askomics:category faldo:ForwardStrandPosition . +:strand_attributeCategory askomics:category faldo:ReverseStrandPosition . + +faldo:ForwardStrandPosition rdf:type :strand_attributeCategoryValue . +faldo:ForwardStrandPosition rdfs:label "+" . + +faldo:ReverseStrandPosition rdf:type :strand_attributeCategoryValue . +faldo:ReverseStrandPosition rdfs:label "-" . ``` +!!! note "Info" + For homogeneity with GFF and BED integration, it's better to use '+', '-' or '.' as the strand label. + ### faldo:start and faldo:end faldo:start and faldo:end are numeric attributes. From 35f2e5e6a85bf78ec4b5c044f0088016dba2ac91 Mon Sep 17 00:00:00 2001 From: mboudet Date: Tue, 8 Aug 2023 17:39:37 +0200 Subject: [PATCH 07/15] Some stuff --- askomics/libaskomics/GffFile.py | 19 ++++++++++--- askomics/libaskomics/SparqlQuery.py | 43 +++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/askomics/libaskomics/GffFile.py b/askomics/libaskomics/GffFile.py index 0f1bd8be..46cfa598 100644 --- a/askomics/libaskomics/GffFile.py +++ b/askomics/libaskomics/GffFile.py @@ -394,13 +394,24 @@ def generate_rdf_content(self): begin = BNode() end = BNode() - self.graph_chunk.add((entity, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((entity, self.faldo.end, faldo_end)) + self.graph_chunk.add((entity, self.faldo.location, location)) - self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((location, rdflib.RDF.type, self.faldo.region)) + self.graph_chunk.add((location, self.faldo.begin, begin)) + self.graph_chunk.add((location, self.faldo.end, end)) + + self.graph_chunk.add((begin, rdflib.RDF.type, self.faldo.ExactPosition)) + self.graph_chunk.add((begin, self.faldo.position, faldo_start)) + + self.graph_chunk.add((end, rdflib.RDF.type, self.faldo.ExactPosition)) + self.graph_chunk.add((end, self.faldo.position, faldo_end)) + + self.graph_chunk.add((begin, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((end, self.faldo.reference, faldo_reference)) if faldo_strand: - self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) + self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand)) + self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand)) # Shortcut triple for faldo queries self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) diff --git a/askomics/libaskomics/SparqlQuery.py b/askomics/libaskomics/SparqlQuery.py index 12c4e62a..c3c0c732 100644 --- a/askomics/libaskomics/SparqlQuery.py +++ b/askomics/libaskomics/SparqlQuery.py @@ -1261,21 +1261,22 @@ def build_query_from_json(self, preview=False, for_editor=False): elif link["sameStrand"]: block_uri = "includeInStrand" - self.store_triple({ - "subject": source, - "predicate": "askomics:{}".format(block_uri), - "object": common_block, - "optional": False + if link["uri"] in ('included_in', 'overlap_with'): + self.store_triple({ + "subject": source, + "predicate": "askomics:{}".format(block_uri), + "object": common_block, + "optional": False - }, block_id, sblock_id, pblock_ids, depth) + }, block_id, sblock_id, pblock_ids, depth) - self.store_triple({ - "subject": target, - "predicate": "askomics:{}".format(block_uri), - "object": common_block, - "optional": False + self.store_triple({ + "subject": target, + "predicate": "askomics:{}".format(block_uri), + "object": common_block, + "optional": False - }, block_id, sblock_id, pblock_ids, depth) + }, block_id, sblock_id, pblock_ids, depth) equal_sign = "" if link["strict"] else "=" @@ -1297,6 +1298,24 @@ def build_query_from_json(self, preview=False, for_editor=False): equalsign=equal_sign ), block_id, sblock_id, pblock_ids, depth) else: + if link["sameRef"]: + if link['sameStrand']: + self.store_triple({ + "subject": source, + "predicate": "askomics:referenceStrand", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) + + self.store_triple({ + "subject": target, + "predicate": "askomics:referenceStrand", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) + for filter in link.get('faldoFilters', []): modifier_string = "" if filter['filterValue']: From 4910f2bf7fb804595618efdab73ab8e8a1225770 Mon Sep 17 00:00:00 2001 From: mboudet Date: Wed, 9 Aug 2023 15:58:51 +0200 Subject: [PATCH 08/15] Typo, doc and some more stuff --- askomics/libaskomics/CsvFile.py | 26 +++++++++++++++++------ askomics/libaskomics/SparqlQuery.py | 33 +++++++++++++++++++++++++++++ docs/abstraction.md | 33 +++++++++++++++++++++++++---- 3 files changed, 82 insertions(+), 10 deletions(-) diff --git a/askomics/libaskomics/CsvFile.py b/askomics/libaskomics/CsvFile.py index 8ce47725..5b13a930 100644 --- a/askomics/libaskomics/CsvFile.py +++ b/askomics/libaskomics/CsvFile.py @@ -683,17 +683,31 @@ def generate_rdf_content(self): if self.faldo_entity and faldo_start and faldo_end: - self.graph_chunk.add((entity, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) + # Triples respecting faldo ontology - self.graph_chunk.add((entity, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((entity, self.faldo.end, faldo_end)) + location = BNode() + begin_node = BNode() + end_node = BNode() + + self.graph_chunk.add((entity, self.faldo.location, location)) + + self.graph_chunk.add((location, rdflib.RDF.type, self.faldo.region)) + self.graph_chunk.add((location, self.faldo.begin, begin_node)) + self.graph_chunk.add((location, self.faldo.end, end_node)) + + self.graph_chunk.add((begin_node, rdflib.RDF.type, self.faldo.ExactPosition)) + self.graph_chunk.add((begin_node, self.faldo.position, faldo_start)) + + self.graph_chunk.add((end_node, rdflib.RDF.type, self.faldo.ExactPosition)) + self.graph_chunk.add((end_node, self.faldo.position, faldo_end)) if faldo_reference: - self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((begin_node, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((end_node, self.faldo.reference, faldo_reference)) if faldo_strand: - self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) + self.graph_chunk.add((begin_node, rdflib.RDF.type, faldo_strand)) + self.graph_chunk.add((end_node, rdflib.RDF.type, faldo_strand)) # Shortcut triple for faldo queries self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) diff --git a/askomics/libaskomics/SparqlQuery.py b/askomics/libaskomics/SparqlQuery.py index c3c0c732..56a3ae6e 100644 --- a/askomics/libaskomics/SparqlQuery.py +++ b/askomics/libaskomics/SparqlQuery.py @@ -1315,6 +1315,39 @@ def build_query_from_json(self, preview=False, for_editor=False): "optional": False }, block_id, sblock_id, pblock_ids, depth) + else: + self.store_triple({ + "subject": source, + "predicate": "faldo:reference", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) + + self.store_triple({ + "subject": target, + "predicate": "faldo:reference", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) + + elif link["sameStrand"]: + self.store_triple({ + "subject": source, + "predicate": "faldo:strand", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) + + self.store_triple({ + "subject": target, + "predicate": "faldo:strand", + "object": common_block, + "optional": False + + }, block_id, sblock_id, pblock_ids, depth) for filter in link.get('faldoFilters', []): modifier_string = "" diff --git a/docs/abstraction.md b/docs/abstraction.md index a1879687..d4f782b5 100644 --- a/docs/abstraction.md +++ b/docs/abstraction.md @@ -109,10 +109,6 @@ A FALDO entity have to be declared as FALDO on the abstraction. If attribute are :FaldoEntity rdfs:label "FaldoEntity" . ``` -!!! warning - Faldo triples should use "faldo:begin", "faldo:end", "faldo:reference" or "faldo:strand" as their relation. - - Four FALDO attributes are supported by AskOmics: reference, strand, start and end. ### faldo:reference @@ -188,6 +184,35 @@ _:blank rdfs:range xsd:decimal . _:blank askomics:uri :end_attribute ``` +### *Shortcut* faldo triples + +The default faldo ontology uses a chain of triple to describe the position (ex, faldo:location/faldo:begin/faldo:position). +This make *faldo queries* (included_in/overlap_with/distant_from) extremely slow. To improve query time, AskOmics can use 'shortcut triples', direct relations between the Entity and the reference/strand, to quickly filter entities on the same reference/strand/both. For example: + +```turtle +:EntityName faldo:reference reference_uri . +:EntityName faldo:strand strand_uri . +:EntityName askomics:referenceStrand reference_strand_uri . +``` + +To improve query times further, AskOmics will break down the entity genomic position in blocks (block size if defined in the configuration file). +This improve query time by filtering all entities having 'common blocks'. Each entity will span at least two blocks. Additional blocks will be created to include the reference and the strand. +For instance: + +```turtle +:EntityName askomics:includeIn block1_uri . +:EntityName askomics:includeIn block2_uri . +:EntityName askomics:includeInReference block1_reference_uri . +:EntityName askomics:includeInReference block2_reference_uri . +:EntityName askomics:includeInReferenceStrand block1_reference_strand_uri . +:EntityName askomics:includeInReferenceStrand block2_reference_strand_uri . +:EntityName askomics:includeInStrand block1_strand_uri . +:EntityName askomics:includeInStrand block1_strand_uri . +``` + +!!! note "Info" + When using 'BothStrand', make sur to add 'ForwardStrandPosition' and 'ReverseStrandPosition' to these additional triples, or they won't be matched on the 'same strand' query. + # Relations Entities are linked between them with relations. Relations are displayed with arrows between nodes on the query builder. The following turtle explain how relations are described. To avoid overwriting information, relations are described using a blank node. The relation `:RelationExample`, linking `EntitySource` to `EntityTarget`, with the label *relation_example*, will be defined as follows: From 2c1a4d6ae0ed61334be781a9df732596a4368bba Mon Sep 17 00:00:00 2001 From: mboudet Date: Thu, 10 Aug 2023 10:08:24 +0200 Subject: [PATCH 09/15] More additional faldo triple under askomics namespace --- askomics/libaskomics/BedFile.py | 8 ++++---- askomics/libaskomics/CsvFile.py | 8 ++++---- askomics/libaskomics/GffFile.py | 8 ++++---- askomics/libaskomics/SparqlQuery.py | 14 +++++++------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/askomics/libaskomics/BedFile.py b/askomics/libaskomics/BedFile.py index 62cfd30b..c8ff874a 100644 --- a/askomics/libaskomics/BedFile.py +++ b/askomics/libaskomics/BedFile.py @@ -307,12 +307,12 @@ def generate_rdf_content(self): self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) # Shortcut triple for faldo queries - self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) - self.graph_chunk.add((entity, self.faldo.end, faldo_end)) - self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start)) + self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end)) + self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference)) if faldo_strand: - self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) + self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand)) strand_ref = self.get_reference_strand_uri(feature.chrom, faldo_strand, None) for sref in strand_ref: self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref)) diff --git a/askomics/libaskomics/CsvFile.py b/askomics/libaskomics/CsvFile.py index 5b13a930..ff142f71 100644 --- a/askomics/libaskomics/CsvFile.py +++ b/askomics/libaskomics/CsvFile.py @@ -710,17 +710,17 @@ def generate_rdf_content(self): self.graph_chunk.add((end_node, rdflib.RDF.type, faldo_strand)) # Shortcut triple for faldo queries - self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) - self.graph_chunk.add((entity, self.faldo.end, faldo_end)) + self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start)) + self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end)) if faldo_reference: - self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference)) if faldo_strand: strand_ref = self.get_reference_strand_uri(reference, faldo_strand, None) for sref in strand_ref: self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref)) if faldo_strand: - self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) + self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand)) # blocks block_base = self.settings.getint("triplestore", "block_size") diff --git a/askomics/libaskomics/GffFile.py b/askomics/libaskomics/GffFile.py index 46cfa598..b86f29f0 100644 --- a/askomics/libaskomics/GffFile.py +++ b/askomics/libaskomics/GffFile.py @@ -414,12 +414,12 @@ def generate_rdf_content(self): self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand)) # Shortcut triple for faldo queries - self.graph_chunk.add((entity, self.faldo.begin, faldo_start)) - self.graph_chunk.add((entity, self.faldo.end, faldo_end)) - self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((entity, self.namespace_internal["faldobegin"], faldo_start)) + self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end)) + self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference)) if faldo_strand: - self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) + self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand)) strand_ref = self.get_reference_strand_uri(rec.id, faldo_strand, None) for sref in strand_ref: self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref)) diff --git a/askomics/libaskomics/SparqlQuery.py b/askomics/libaskomics/SparqlQuery.py index 56a3ae6e..222f9128 100644 --- a/askomics/libaskomics/SparqlQuery.py +++ b/askomics/libaskomics/SparqlQuery.py @@ -1318,7 +1318,7 @@ def build_query_from_json(self, preview=False, for_editor=False): else: self.store_triple({ "subject": source, - "predicate": "faldo:reference", + "predicate": "askomics:faldoReference", "object": common_block, "optional": False @@ -1326,7 +1326,7 @@ def build_query_from_json(self, preview=False, for_editor=False): self.store_triple({ "subject": target, - "predicate": "faldo:reference", + "predicate": "askomics:faldoReference", "object": common_block, "optional": False @@ -1335,7 +1335,7 @@ def build_query_from_json(self, preview=False, for_editor=False): elif link["sameStrand"]: self.store_triple({ "subject": source, - "predicate": "faldo:strand", + "predicate": "askomics:faldoStrand", "object": common_block, "optional": False @@ -1343,7 +1343,7 @@ def build_query_from_json(self, preview=False, for_editor=False): self.store_triple({ "subject": target, - "predicate": "faldo:strand", + "predicate": "askomics:faldoStrand", "object": common_block, "optional": False @@ -1538,7 +1538,7 @@ def build_query_from_json(self, preview=False, for_editor=False): if attribute["visible"] or Utils.check_key_in_list_of_dict(attribute["filters"], "filterValue") or attribute["id"] in start_end or attribute["id"] in linked_attributes: subject = self.format_sparql_variable("{}{}_uri".format(attribute["entityLabel"], attribute["nodeId"])) if attribute["faldo"]: - predicate = "faldo:{}".format("begin" if attribute["faldo"].endswith("faldoStart") else "end") + predicate = "askomics:{}".format("faldoBegin" if attribute["faldo"].endswith("faldoStart") else "faldoEnd") else: predicate = "<{}>".format(attribute["uri"]) obj = self.format_sparql_variable("{}{}_{}".format(attribute["entityLabel"], attribute["nodeId"], attribute["label"])) @@ -1623,7 +1623,7 @@ def build_query_from_json(self, preview=False, for_editor=False): category_value_uri = self.format_sparql_variable("{}{}_{}Category".format(attribute["entityLabel"], attribute["nodeId"], attribute["label"])) category_label = self.format_sparql_variable("{}{}_{}".format(attribute["entityLabel"], attribute["humanNodeId"], attribute["label"])) if attribute["faldo"] and attribute["faldo"].endswith("faldoReference"): - category_name = 'faldo:reference' + category_name = 'askomics:faldoReference' self.store_triple({ "subject": node_uri, "predicate": category_name, @@ -1640,7 +1640,7 @@ def build_query_from_json(self, preview=False, for_editor=False): "nested_end": True if attribute["optional"] else False }, block_id, sblock_id, pblock_ids, depth) elif attribute["faldo"] and attribute["faldo"].endswith("faldoStrand"): - category_name = 'faldo:strand' + category_name = 'askomics:faldoStrand' self.store_triple({ "subject": node_uri, "predicate": category_name, From 631277510fc820047fb07a268461aa33d1594225 Mon Sep 17 00:00:00 2001 From: mboudet Date: Thu, 10 Aug 2023 17:19:22 +0200 Subject: [PATCH 10/15] Move default query back to faldo ontology --- CHANGELOG.md | 1 + askomics/libaskomics/SparqlQuery.py | 6 +++--- docs/abstraction.md | 19 +++++++++++++++++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e34d082..f36a7ae7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ This changelog was started for release 4.2.0. - 'Same strand' queries will now match 'BothStrand' with a forward or reverse strand - Use '+', '-' and '.' for strand values in CSV instead of raw value (for homogenization) - Now allows 'infinite' recursive blocks (ie, a Minus block inside a Union block, or the opposite) +- Overhaul faldo relation: add 'direct shortcut' triples between entities and values, to avoid using slow property paths when using faldo queries ### Security diff --git a/askomics/libaskomics/SparqlQuery.py b/askomics/libaskomics/SparqlQuery.py index 222f9128..a2ee56d4 100644 --- a/askomics/libaskomics/SparqlQuery.py +++ b/askomics/libaskomics/SparqlQuery.py @@ -1538,7 +1538,7 @@ def build_query_from_json(self, preview=False, for_editor=False): if attribute["visible"] or Utils.check_key_in_list_of_dict(attribute["filters"], "filterValue") or attribute["id"] in start_end or attribute["id"] in linked_attributes: subject = self.format_sparql_variable("{}{}_uri".format(attribute["entityLabel"], attribute["nodeId"])) if attribute["faldo"]: - predicate = "askomics:{}".format("faldoBegin" if attribute["faldo"].endswith("faldoStart") else "faldoEnd") + predicate = "faldo:location/faldo:{}/faldo:position".format("begin" if attribute["faldo"].endswith("faldoStart") else "end") else: predicate = "<{}>".format(attribute["uri"]) obj = self.format_sparql_variable("{}{}_{}".format(attribute["entityLabel"], attribute["nodeId"], attribute["label"])) @@ -1623,7 +1623,7 @@ def build_query_from_json(self, preview=False, for_editor=False): category_value_uri = self.format_sparql_variable("{}{}_{}Category".format(attribute["entityLabel"], attribute["nodeId"], attribute["label"])) category_label = self.format_sparql_variable("{}{}_{}".format(attribute["entityLabel"], attribute["humanNodeId"], attribute["label"])) if attribute["faldo"] and attribute["faldo"].endswith("faldoReference"): - category_name = 'askomics:faldoReference' + category_name = 'faldo:location/faldo:begin/faldo:reference' self.store_triple({ "subject": node_uri, "predicate": category_name, @@ -1640,7 +1640,7 @@ def build_query_from_json(self, preview=False, for_editor=False): "nested_end": True if attribute["optional"] else False }, block_id, sblock_id, pblock_ids, depth) elif attribute["faldo"] and attribute["faldo"].endswith("faldoStrand"): - category_name = 'askomics:faldoStrand' + category_name = 'faldo:location/faldo:begin/rdf:type' self.store_triple({ "subject": node_uri, "predicate": category_name, diff --git a/docs/abstraction.md b/docs/abstraction.md index d4f782b5..4bdb23ca 100644 --- a/docs/abstraction.md +++ b/docs/abstraction.md @@ -111,6 +111,19 @@ A FALDO entity have to be declared as FALDO on the abstraction. If attribute are Four FALDO attributes are supported by AskOmics: reference, strand, start and end. +!!! warning + AskOmics expect faldo entities to follow the faldo ontology for triple definition. Ex: + ```turtle + # Reference + :Entity faldo:location/faldo:begin/faldo:reference "value" + # strand + :Entity faldo:location/faldo:begin/rdf:type "value" + # Start + :Entity faldo:location/faldo:begin/faldo:position "value" + # Stop. + :Entity faldo:location/faldo:end/faldo:position "value" + ``` + ### faldo:reference A faldo:reference attribute derive from a Category attribute. @@ -190,8 +203,10 @@ The default faldo ontology uses a chain of triple to describe the position (ex, This make *faldo queries* (included_in/overlap_with/distant_from) extremely slow. To improve query time, AskOmics can use 'shortcut triples', direct relations between the Entity and the reference/strand, to quickly filter entities on the same reference/strand/both. For example: ```turtle -:EntityName faldo:reference reference_uri . -:EntityName faldo:strand strand_uri . +:EntityName askomics:faldoReference reference_uri . +:EntityName askomics:faldoBegin begin_value . +:EntityName askomics:faldoEnd end_value . +:EntityName askomics:faldoStrand strand_uri . :EntityName askomics:referenceStrand reference_strand_uri . ``` From e46a21611f046d699f115891e6ba7ff7c33b97a7 Mon Sep 17 00:00:00 2001 From: mboudet Date: Fri, 11 Aug 2023 10:55:27 +0200 Subject: [PATCH 11/15] typo --- askomics/libaskomics/GffFile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/askomics/libaskomics/GffFile.py b/askomics/libaskomics/GffFile.py index b86f29f0..dcadac28 100644 --- a/askomics/libaskomics/GffFile.py +++ b/askomics/libaskomics/GffFile.py @@ -414,7 +414,7 @@ def generate_rdf_content(self): self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand)) # Shortcut triple for faldo queries - self.graph_chunk.add((entity, self.namespace_internal["faldobegin"], faldo_start)) + self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start)) self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end)) self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference)) From 9c3cae1f51695b2ab74ec98d1b2af9942ccd29c5 Mon Sep 17 00:00:00 2001 From: mboudet Date: Fri, 11 Aug 2023 11:18:53 +0200 Subject: [PATCH 12/15] fix typo --- askomics/libaskomics/BedFile.py | 8 ++++---- askomics/libaskomics/File.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/askomics/libaskomics/BedFile.py b/askomics/libaskomics/BedFile.py index c8ff874a..dc2aba3e 100644 --- a/askomics/libaskomics/BedFile.py +++ b/askomics/libaskomics/BedFile.py @@ -298,13 +298,13 @@ def generate_rdf_content(self): self.graph_chunk.add((begin, rdflib.RDF.type, self.faldo.ExactPosition)) self.graph_chunk.add((begin, self.faldo.position, faldo_start)) - self.graph_chunk.add((entity, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((entity, self.faldo.end, faldo_end)) + self.graph_chunk.add((end, rdflib.RDF.type, self.faldo.ExactPosition)) + self.graph_chunk.add((end, self.faldo.end, faldo_end)) - self.graph_chunk.add((entity, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((begin, self.faldo.reference, faldo_reference)) if faldo_strand: - self.graph_chunk.add((entity, self.faldo.strand, faldo_strand)) + self.graph_chunk.add((begin, self.faldo.strand, faldo_strand)) # Shortcut triple for faldo queries self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start)) diff --git a/askomics/libaskomics/File.py b/askomics/libaskomics/File.py index be02c14f..3ab01700 100644 --- a/askomics/libaskomics/File.py +++ b/askomics/libaskomics/File.py @@ -458,13 +458,13 @@ def get_faldo_strand_label(self, raw_strand): return "." - def get_reference_strand_uri(self, reference, strand, block): + def get_reference_strand_uri(self, reference, strand, block=None): faldo_dict = { self.faldo.ForwardStrandPosition: "ForwardStrand", self.faldo.ReverseStrandPosition: "ReverseStrand", self.faldo.BothStrandPosition: "BothStrand" } - if not block: + if block is not None: if strand == self.faldo.BothStrandPosition: return [self.rdfize(self.format_uri("{}_s{}s".format(reference, dstrand))) for dstrand in faldo_dict.values()] return [self.rdfize(self.format_uri("{}_s{}".format(reference, faldo_dict[strand])))] From 4937323617883a5fe7d651b07ba9ca5ecb34f2eb Mon Sep 17 00:00:00 2001 From: root Date: Fri, 11 Aug 2023 10:11:22 +0000 Subject: [PATCH 13/15] actually revert --- askomics/libaskomics/BedFile.py | 6 ++++-- askomics/libaskomics/File.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/askomics/libaskomics/BedFile.py b/askomics/libaskomics/BedFile.py index dc2aba3e..0d3be5ff 100644 --- a/askomics/libaskomics/BedFile.py +++ b/askomics/libaskomics/BedFile.py @@ -299,12 +299,14 @@ def generate_rdf_content(self): self.graph_chunk.add((begin, self.faldo.position, faldo_start)) self.graph_chunk.add((end, rdflib.RDF.type, self.faldo.ExactPosition)) - self.graph_chunk.add((end, self.faldo.end, faldo_end)) + self.graph_chunk.add((end, self.faldo.position, faldo_end)) self.graph_chunk.add((begin, self.faldo.reference, faldo_reference)) + self.graph_chunk.add((end, self.faldo.reference, faldo_reference)) if faldo_strand: - self.graph_chunk.add((begin, self.faldo.strand, faldo_strand)) + self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand)) + self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand)) # Shortcut triple for faldo queries self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start)) diff --git a/askomics/libaskomics/File.py b/askomics/libaskomics/File.py index 3ab01700..ea32ea71 100644 --- a/askomics/libaskomics/File.py +++ b/askomics/libaskomics/File.py @@ -464,7 +464,7 @@ def get_reference_strand_uri(self, reference, strand, block=None): self.faldo.ReverseStrandPosition: "ReverseStrand", self.faldo.BothStrandPosition: "BothStrand" } - if block is not None: + if block is None: if strand == self.faldo.BothStrandPosition: return [self.rdfize(self.format_uri("{}_s{}s".format(reference, dstrand))) for dstrand in faldo_dict.values()] return [self.rdfize(self.format_uri("{}_s{}".format(reference, faldo_dict[strand])))] From 855f2869d05a996b2220460bf64ff75def5c9f2d Mon Sep 17 00:00:00 2001 From: mboudet Date: Fri, 11 Aug 2023 12:16:53 +0200 Subject: [PATCH 14/15] Actually include block in includeInStrand --- askomics/libaskomics/BedFile.py | 4 +++- askomics/libaskomics/CsvFile.py | 7 +++---- askomics/libaskomics/File.py | 6 ++++++ askomics/libaskomics/GffFile.py | 4 +++- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/askomics/libaskomics/BedFile.py b/askomics/libaskomics/BedFile.py index 0d3be5ff..0c4a9ee5 100644 --- a/askomics/libaskomics/BedFile.py +++ b/askomics/libaskomics/BedFile.py @@ -329,9 +329,11 @@ def generate_rdf_content(self): block_reference = self.rdfize(self.format_uri("{}_{}".format(feature.chrom, slice_block))) self.graph_chunk.add((entity, self.namespace_internal["includeInReference"], block_reference)) if faldo_strand: - self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], faldo_strand)) strand_ref = self.get_reference_strand_uri(feature.chrom, faldo_strand, slice_block) for sref in strand_ref: self.graph_chunk.add((entity, self.namespace_internal["includeInReferenceStrand"], sref)) + strand_ref = self.get_reference_strand_uri(None, faldo_strand, slice_block) + for sref in strand_ref: + self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], sref)) yield diff --git a/askomics/libaskomics/CsvFile.py b/askomics/libaskomics/CsvFile.py index ff142f71..064c814a 100644 --- a/askomics/libaskomics/CsvFile.py +++ b/askomics/libaskomics/CsvFile.py @@ -737,9 +737,8 @@ def generate_rdf_content(self): for sref in strand_ref: self.graph_chunk.add((entity, self.namespace_internal["includeInReferenceStrand"], sref)) if faldo_strand: - self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], faldo_strand)) - if faldo_strand == self.faldo.BothStrandPosition: - self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], self.faldo.ForwardStrandPosition)) - self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], self.faldo.ReverseStrandPosition)) + strand_ref = self.get_reference_strand_uri(None, faldo_strand, slice_block) + for sref in strand_ref: + self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], sref)) yield diff --git a/askomics/libaskomics/File.py b/askomics/libaskomics/File.py index ea32ea71..83276fb0 100644 --- a/askomics/libaskomics/File.py +++ b/askomics/libaskomics/File.py @@ -464,6 +464,12 @@ def get_reference_strand_uri(self, reference, strand, block=None): self.faldo.ReverseStrandPosition: "ReverseStrand", self.faldo.BothStrandPosition: "BothStrand" } + if reference is None: + if strand == self.faldo.BothStrandPosition: + return [self.rdfize(self.format_uri("s{}_{}".format(dstrand, block))) for dstrand in faldo_dict.values()] + + return [self.rdfize(self.format_uri("s{}_{}".format(faldo_dict[strand], block)))] + if block is None: if strand == self.faldo.BothStrandPosition: return [self.rdfize(self.format_uri("{}_s{}s".format(reference, dstrand))) for dstrand in faldo_dict.values()] diff --git a/askomics/libaskomics/GffFile.py b/askomics/libaskomics/GffFile.py index dcadac28..d87d7d9c 100644 --- a/askomics/libaskomics/GffFile.py +++ b/askomics/libaskomics/GffFile.py @@ -434,10 +434,12 @@ def generate_rdf_content(self): block_reference = self.rdfize(self.format_uri("{}_{}".format(rec.id, slice_block))) self.graph_chunk.add((entity, self.namespace_internal["includeInReference"], block_reference)) if faldo_strand: - self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], faldo_strand)) strand_ref = self.get_reference_strand_uri(rec.id, faldo_strand, slice_block) for sref in strand_ref: self.graph_chunk.add((entity, self.namespace_internal["includeInReferenceStrand"], sref)) + strand_ref = self.get_reference_strand_uri(None, faldo_strand, slice_block) + for sref in strand_ref: + self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], sref)) yield From d708e8fc07fa68708543c662e9d4f3787c1d7bca Mon Sep 17 00:00:00 2001 From: mboudet Date: Fri, 11 Aug 2023 16:31:00 +0200 Subject: [PATCH 15/15] Use shortcut in 'distant_from' query --- askomics/libaskomics/SparqlQuery.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/askomics/libaskomics/SparqlQuery.py b/askomics/libaskomics/SparqlQuery.py index a2ee56d4..33c36e93 100644 --- a/askomics/libaskomics/SparqlQuery.py +++ b/askomics/libaskomics/SparqlQuery.py @@ -1539,6 +1539,9 @@ def build_query_from_json(self, preview=False, for_editor=False): subject = self.format_sparql_variable("{}{}_uri".format(attribute["entityLabel"], attribute["nodeId"])) if attribute["faldo"]: predicate = "faldo:location/faldo:{}/faldo:position".format("begin" if attribute["faldo"].endswith("faldoStart") else "end") + # Use faldo shortcut for faldo queries + if attribute["id"] in start_end or attribute["id"] in linked_attributes: + predicate = "askomics:{}".format("faldoBegin" if attribute["faldo"].endswith("faldoStart") else "faldoEnd") else: predicate = "<{}>".format(attribute["uri"]) obj = self.format_sparql_variable("{}{}_{}".format(attribute["entityLabel"], attribute["nodeId"], attribute["label"]))