Skip to content

Commit

Permalink
Merge d708e8f into c114108
Browse files Browse the repository at this point in the history
  • Loading branch information
mboudet committed Aug 11, 2023
2 parents c114108 + d708e8f commit 90a9e40
Show file tree
Hide file tree
Showing 13 changed files with 257 additions and 45 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -25,6 +25,7 @@ This changelog was started for release 4.2.0.
- Added 'custom distance' option for faldo relation (instead of just 'included_in' and 'overlap_with')
- Store 'version' value when storing results. Not used for now, but might be used in deprecation warnings later
- Removed some lines from coverage computation
- Added 'indirect relations': Two entities will be linked on the graph, without a direct relation between them (same as faldo relations). This is intended to be used with 'linked attributes' (ex: get all entities B where B.color is the same as A.color, without a direct relation between A and B)

### Fixed

Expand All @@ -51,6 +52,7 @@ This changelog was started for release 4.2.0.
- 'Same strand' queries will now match 'BothStrand' with a forward or reverse strand
- Use '+', '-' and '.' for strand values in CSV instead of raw value (for homogenization)
- Now allows 'infinite' recursive blocks (ie, a Minus block inside a Union block, or the opposite)
- Overhaul faldo relation: add 'direct shortcut' triples between entities and values, to avoid using slow property paths when using faldo queries

### Security

Expand Down
2 changes: 1 addition & 1 deletion askomics/api/file.py
Expand Up @@ -410,7 +410,7 @@ def get_column_types():
types: list of available column types
"""

data = ["numeric", "text", "category", "boolean", "date", "reference", "strand", "start", "end", "general_relation", "symetric_relation", "label"]
data = ["numeric", "text", "category", "boolean", "date", "reference", "strand", "start", "end", "general_relation", "symetric_relation", "indirect_relation", "label"]

return jsonify({
"types": data
Expand Down
31 changes: 23 additions & 8 deletions askomics/libaskomics/BedFile.py
Expand Up @@ -172,7 +172,7 @@ def generate_rdf_content(self):
attribute = self.namespace_data[self.format_uri(feature.chrom)]
faldo_reference = attribute
self.faldo_abstraction["reference"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))

if "reference" not in attribute_list:
attribute_list.append("reference")
Expand All @@ -195,7 +195,7 @@ def generate_rdf_content(self):
attribute = rdflib.Literal(self.convert_type(feature.start + 1)) # +1 because bed is 0 based
faldo_start = attribute
self.faldo_abstraction["start"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))

if "start" not in attribute_list:
attribute_list.append("start")
Expand All @@ -212,7 +212,7 @@ def generate_rdf_content(self):
attribute = rdflib.Literal(self.convert_type(feature.end))
faldo_end = attribute
self.faldo_abstraction["end"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))

if "end" not in attribute_list:
attribute_list.append("end")
Expand All @@ -233,7 +233,7 @@ def generate_rdf_content(self):
attribute = self.namespace_data[self.format_uri("+")]
faldo_strand = self.get_faldo_strand("+")
self.faldo_abstraction["strand"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))
strand = True
strand_type = "+"
elif feature.strand == "-":
Expand All @@ -242,7 +242,7 @@ def generate_rdf_content(self):
attribute = self.namespace_data[self.format_uri("-")]
faldo_strand = self.get_faldo_strand("-")
self.faldo_abstraction["strand"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))
strand = True
strand_type = "-"
else:
Expand All @@ -251,7 +251,7 @@ def generate_rdf_content(self):
attribute = self.namespace_data[self.format_uri(".")]
faldo_strand = self.get_faldo_strand(".")
self.faldo_abstraction["strand"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))
strand = True
strand_type = "."

Expand All @@ -271,7 +271,7 @@ def generate_rdf_content(self):
if feature.score != '.':
relation = self.namespace_data[self.format_uri("score")]
attribute = rdflib.Literal(self.convert_type(feature.score))
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))

if "score" not in attribute_list:
attribute_list.append("score")
Expand All @@ -283,6 +283,8 @@ def generate_rdf_content(self):
"range": rdflib.XSD.decimal
})

# Triples respecting faldo ontology

location = BNode()
begin = BNode()
end = BNode()
Expand All @@ -306,6 +308,17 @@ def generate_rdf_content(self):
self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand))
self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand))

# Shortcut triple for faldo queries
self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start))
self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end))
self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference))

if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand))
strand_ref = self.get_reference_strand_uri(feature.chrom, faldo_strand, None)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref))

# blocks
block_base = self.settings.getint("triplestore", "block_size")
block_start = int(self.convert_type(feature.start + 1)) // block_base
Expand All @@ -316,9 +329,11 @@ def generate_rdf_content(self):
block_reference = self.rdfize(self.format_uri("{}_{}".format(feature.chrom, slice_block)))
self.graph_chunk.add((entity, self.namespace_internal["includeInReference"], block_reference))
if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], faldo_strand))
strand_ref = self.get_reference_strand_uri(feature.chrom, faldo_strand, slice_block)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInReferenceStrand"], sref))
strand_ref = self.get_reference_strand_uri(None, faldo_strand, slice_block)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], sref))

yield
32 changes: 27 additions & 5 deletions askomics/libaskomics/CsvFile.py
Expand Up @@ -416,8 +416,9 @@ def set_rdf_abstraction(self):

blank = BNode()
# Relation
if self.columns_type[index] in ('general_relation', 'symetric_relation'):
if self.columns_type[index] in ('general_relation', 'symetric_relation', 'indirect_relation'):
symetric_relation = True if self.columns_type[index] == 'symetric_relation' else False
indirect_relation = True if self.columns_type[index] == 'indirect_relation' else False
splitted = attribute_name.split('@')

attribute = self.rdfize(splitted[0])
Expand All @@ -439,6 +440,8 @@ def set_rdf_abstraction(self):
if symetric_relation:
self.graph_abstraction_dk.add((blank, rdflib.RDFS.domain, rdf_range))
self.graph_abstraction_dk.add((blank, rdflib.RDFS.range, entity))
if indirect_relation:
self.graph_abstraction_dk.add((blank, self.namespace_internal["isIndirectRelation"], rdflib.Literal("true", datatype=rdflib.XSD.boolean)))

continue

Expand Down Expand Up @@ -597,6 +600,10 @@ def generate_rdf_content(self):
if current_type == "label" and column_number == 1:
continue

# We ignore all data for indirect relations
if current_type == "indirect_relation":
continue

# Skip entity and blank cells
if column_number == 0 or (not cell and not current_type == "strand"):
continue
Expand Down Expand Up @@ -675,6 +682,9 @@ def generate_rdf_content(self):
self.graph_chunk.add((attribute, relation, entity))

if self.faldo_entity and faldo_start and faldo_end:

# Triples respecting faldo ontology

location = BNode()
begin_node = BNode()
end_node = BNode()
Expand All @@ -699,6 +709,19 @@ def generate_rdf_content(self):
self.graph_chunk.add((begin_node, rdflib.RDF.type, faldo_strand))
self.graph_chunk.add((end_node, rdflib.RDF.type, faldo_strand))

# Shortcut triple for faldo queries
self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start))
self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end))
if faldo_reference:
self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference))
if faldo_strand:
strand_ref = self.get_reference_strand_uri(reference, faldo_strand, None)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref))

if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand))

# blocks
block_base = self.settings.getint("triplestore", "block_size")
block_start = int(start) // block_base
Expand All @@ -714,9 +737,8 @@ def generate_rdf_content(self):
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInReferenceStrand"], sref))
if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], faldo_strand))
if faldo_strand == self.faldo.BothStrandPosition:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], self.faldo.ForwardStrandPosition))
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], self.faldo.ReverseStrandPosition))
strand_ref = self.get_reference_strand_uri(None, faldo_strand, slice_block)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], sref))

yield
13 changes: 12 additions & 1 deletion askomics/libaskomics/File.py
Expand Up @@ -458,12 +458,23 @@ def get_faldo_strand_label(self, raw_strand):

return "."

def get_reference_strand_uri(self, reference, strand, block):
def get_reference_strand_uri(self, reference, strand, block=None):
faldo_dict = {
self.faldo.ForwardStrandPosition: "ForwardStrand",
self.faldo.ReverseStrandPosition: "ReverseStrand",
self.faldo.BothStrandPosition: "BothStrand"
}
if reference is None:
if strand == self.faldo.BothStrandPosition:
return [self.rdfize(self.format_uri("s{}_{}".format(dstrand, block))) for dstrand in faldo_dict.values()]

return [self.rdfize(self.format_uri("s{}_{}".format(faldo_dict[strand], block)))]

if block is None:
if strand == self.faldo.BothStrandPosition:
return [self.rdfize(self.format_uri("{}_s{}s".format(reference, dstrand))) for dstrand in faldo_dict.values()]
return [self.rdfize(self.format_uri("{}_s{}".format(reference, faldo_dict[strand])))]

if strand == self.faldo.BothStrandPosition:
return [self.rdfize(self.format_uri("{}_s{}_{}".format(reference, dstrand, block))) for dstrand in faldo_dict.values()]

Expand Down
17 changes: 16 additions & 1 deletion askomics/libaskomics/GffFile.py
Expand Up @@ -388,6 +388,8 @@ def generate_rdf_content(self):

self.graph_chunk.add((entity, relation, attribute))

# Triples respecting faldo ontology

location = BNode()
begin = BNode()
end = BNode()
Expand All @@ -411,6 +413,17 @@ def generate_rdf_content(self):
self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand))
self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand))

# Shortcut triple for faldo queries
self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start))
self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end))
self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference))

if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand))
strand_ref = self.get_reference_strand_uri(rec.id, faldo_strand, None)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref))

# blocks
block_base = self.settings.getint("triplestore", "block_size")
block_start = int(self.convert_type(feature.location.start)) // block_base
Expand All @@ -421,10 +434,12 @@ def generate_rdf_content(self):
block_reference = self.rdfize(self.format_uri("{}_{}".format(rec.id, slice_block)))
self.graph_chunk.add((entity, self.namespace_internal["includeInReference"], block_reference))
if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], faldo_strand))
strand_ref = self.get_reference_strand_uri(rec.id, faldo_strand, slice_block)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInReferenceStrand"], sref))
strand_ref = self.get_reference_strand_uri(None, faldo_strand, slice_block)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], sref))

yield

Expand Down

0 comments on commit 90a9e40

Please sign in to comment.