Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix 407 #412

Merged
merged 17 commits into from Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -25,6 +25,7 @@ This changelog was started for release 4.2.0.
- Added 'custom distance' option for faldo relation (instead of just 'included_in' and 'overlap_with')
- Store 'version' value when storing results. Not used for now, but might be used in deprecation warnings later
- Removed some lines from coverage computation
- Added 'indirect relations': Two entities will be linked on the graph, without a direct relation between them (same as faldo relations). This is intended to be used with 'linked attributes' (ex: get all entities B where B.color is the same as A.color, without a direct relation between A and B)

### Fixed

Expand All @@ -51,6 +52,7 @@ This changelog was started for release 4.2.0.
- 'Same strand' queries will now match 'BothStrand' with a forward or reverse strand
- Use '+', '-' and '.' for strand values in CSV instead of raw value (for homogenization)
- Now allows 'infinite' recursive blocks (ie, a Minus block inside a Union block, or the opposite)
- Overhaul faldo relation: add 'direct shortcut' triples between entities and values, to avoid using slow property paths when using faldo queries

### Security

Expand Down
2 changes: 1 addition & 1 deletion askomics/api/file.py
Expand Up @@ -410,7 +410,7 @@ def get_column_types():
types: list of available column types
"""

data = ["numeric", "text", "category", "boolean", "date", "reference", "strand", "start", "end", "general_relation", "symetric_relation", "label"]
data = ["numeric", "text", "category", "boolean", "date", "reference", "strand", "start", "end", "general_relation", "symetric_relation", "indirect_relation", "label"]

return jsonify({
"types": data
Expand Down
31 changes: 23 additions & 8 deletions askomics/libaskomics/BedFile.py
Expand Up @@ -172,7 +172,7 @@ def generate_rdf_content(self):
attribute = self.namespace_data[self.format_uri(feature.chrom)]
faldo_reference = attribute
self.faldo_abstraction["reference"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))

if "reference" not in attribute_list:
attribute_list.append("reference")
Expand All @@ -195,7 +195,7 @@ def generate_rdf_content(self):
attribute = rdflib.Literal(self.convert_type(feature.start + 1)) # +1 because bed is 0 based
faldo_start = attribute
self.faldo_abstraction["start"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))

if "start" not in attribute_list:
attribute_list.append("start")
Expand All @@ -212,7 +212,7 @@ def generate_rdf_content(self):
attribute = rdflib.Literal(self.convert_type(feature.end))
faldo_end = attribute
self.faldo_abstraction["end"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))

if "end" not in attribute_list:
attribute_list.append("end")
Expand All @@ -233,7 +233,7 @@ def generate_rdf_content(self):
attribute = self.namespace_data[self.format_uri("+")]
faldo_strand = self.get_faldo_strand("+")
self.faldo_abstraction["strand"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))
strand = True
strand_type = "+"
elif feature.strand == "-":
Expand All @@ -242,7 +242,7 @@ def generate_rdf_content(self):
attribute = self.namespace_data[self.format_uri("-")]
faldo_strand = self.get_faldo_strand("-")
self.faldo_abstraction["strand"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))
strand = True
strand_type = "-"
else:
Expand All @@ -251,7 +251,7 @@ def generate_rdf_content(self):
attribute = self.namespace_data[self.format_uri(".")]
faldo_strand = self.get_faldo_strand(".")
self.faldo_abstraction["strand"] = relation
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))
strand = True
strand_type = "."

Expand All @@ -271,7 +271,7 @@ def generate_rdf_content(self):
if feature.score != '.':
relation = self.namespace_data[self.format_uri("score")]
attribute = rdflib.Literal(self.convert_type(feature.score))
self.graph_chunk.add((entity, relation, attribute))
# self.graph_chunk.add((entity, relation, attribute))

if "score" not in attribute_list:
attribute_list.append("score")
Expand All @@ -283,6 +283,8 @@ def generate_rdf_content(self):
"range": rdflib.XSD.decimal
})

# Triples respecting faldo ontology

location = BNode()
begin = BNode()
end = BNode()
Expand All @@ -306,6 +308,17 @@ def generate_rdf_content(self):
self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand))
self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand))

# Shortcut triple for faldo queries
self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start))
self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end))
self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference))

if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand))
strand_ref = self.get_reference_strand_uri(feature.chrom, faldo_strand, None)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref))

# blocks
block_base = self.settings.getint("triplestore", "block_size")
block_start = int(self.convert_type(feature.start + 1)) // block_base
Expand All @@ -316,9 +329,11 @@ def generate_rdf_content(self):
block_reference = self.rdfize(self.format_uri("{}_{}".format(feature.chrom, slice_block)))
self.graph_chunk.add((entity, self.namespace_internal["includeInReference"], block_reference))
if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], faldo_strand))
strand_ref = self.get_reference_strand_uri(feature.chrom, faldo_strand, slice_block)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInReferenceStrand"], sref))
strand_ref = self.get_reference_strand_uri(None, faldo_strand, slice_block)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], sref))

yield
32 changes: 27 additions & 5 deletions askomics/libaskomics/CsvFile.py
Expand Up @@ -416,8 +416,9 @@ def set_rdf_abstraction(self):

blank = BNode()
# Relation
if self.columns_type[index] in ('general_relation', 'symetric_relation'):
if self.columns_type[index] in ('general_relation', 'symetric_relation', 'indirect_relation'):
symetric_relation = True if self.columns_type[index] == 'symetric_relation' else False
indirect_relation = True if self.columns_type[index] == 'indirect_relation' else False
splitted = attribute_name.split('@')

attribute = self.rdfize(splitted[0])
Expand All @@ -439,6 +440,8 @@ def set_rdf_abstraction(self):
if symetric_relation:
self.graph_abstraction_dk.add((blank, rdflib.RDFS.domain, rdf_range))
self.graph_abstraction_dk.add((blank, rdflib.RDFS.range, entity))
if indirect_relation:
self.graph_abstraction_dk.add((blank, self.namespace_internal["isIndirectRelation"], rdflib.Literal("true", datatype=rdflib.XSD.boolean)))

continue

Expand Down Expand Up @@ -597,6 +600,10 @@ def generate_rdf_content(self):
if current_type == "label" and column_number == 1:
continue

# We ignore all data for indirect relations
if current_type == "indirect_relation":
continue

# Skip entity and blank cells
if column_number == 0 or (not cell and not current_type == "strand"):
continue
Expand Down Expand Up @@ -675,6 +682,9 @@ def generate_rdf_content(self):
self.graph_chunk.add((attribute, relation, entity))

if self.faldo_entity and faldo_start and faldo_end:

# Triples respecting faldo ontology

location = BNode()
begin_node = BNode()
end_node = BNode()
Expand All @@ -699,6 +709,19 @@ def generate_rdf_content(self):
self.graph_chunk.add((begin_node, rdflib.RDF.type, faldo_strand))
self.graph_chunk.add((end_node, rdflib.RDF.type, faldo_strand))

# Shortcut triple for faldo queries
self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start))
self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end))
if faldo_reference:
self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference))
if faldo_strand:
strand_ref = self.get_reference_strand_uri(reference, faldo_strand, None)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref))

if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand))

# blocks
block_base = self.settings.getint("triplestore", "block_size")
block_start = int(start) // block_base
Expand All @@ -714,9 +737,8 @@ def generate_rdf_content(self):
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInReferenceStrand"], sref))
if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], faldo_strand))
if faldo_strand == self.faldo.BothStrandPosition:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], self.faldo.ForwardStrandPosition))
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], self.faldo.ReverseStrandPosition))
strand_ref = self.get_reference_strand_uri(None, faldo_strand, slice_block)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], sref))

yield
13 changes: 12 additions & 1 deletion askomics/libaskomics/File.py
Expand Up @@ -458,12 +458,23 @@ def get_faldo_strand_label(self, raw_strand):

return "."

def get_reference_strand_uri(self, reference, strand, block):
def get_reference_strand_uri(self, reference, strand, block=None):
faldo_dict = {
self.faldo.ForwardStrandPosition: "ForwardStrand",
self.faldo.ReverseStrandPosition: "ReverseStrand",
self.faldo.BothStrandPosition: "BothStrand"
}
if reference is None:
if strand == self.faldo.BothStrandPosition:
return [self.rdfize(self.format_uri("s{}_{}".format(dstrand, block))) for dstrand in faldo_dict.values()]

return [self.rdfize(self.format_uri("s{}_{}".format(faldo_dict[strand], block)))]

if block is None:
if strand == self.faldo.BothStrandPosition:
return [self.rdfize(self.format_uri("{}_s{}s".format(reference, dstrand))) for dstrand in faldo_dict.values()]
return [self.rdfize(self.format_uri("{}_s{}".format(reference, faldo_dict[strand])))]

if strand == self.faldo.BothStrandPosition:
return [self.rdfize(self.format_uri("{}_s{}_{}".format(reference, dstrand, block))) for dstrand in faldo_dict.values()]

Expand Down
17 changes: 16 additions & 1 deletion askomics/libaskomics/GffFile.py
Expand Up @@ -388,6 +388,8 @@ def generate_rdf_content(self):

self.graph_chunk.add((entity, relation, attribute))

# Triples respecting faldo ontology

location = BNode()
begin = BNode()
end = BNode()
Expand All @@ -411,6 +413,17 @@ def generate_rdf_content(self):
self.graph_chunk.add((begin, rdflib.RDF.type, faldo_strand))
self.graph_chunk.add((end, rdflib.RDF.type, faldo_strand))

# Shortcut triple for faldo queries
self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start))
self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end))
self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference))

if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand))
strand_ref = self.get_reference_strand_uri(rec.id, faldo_strand, None)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref))

# blocks
block_base = self.settings.getint("triplestore", "block_size")
block_start = int(self.convert_type(feature.location.start)) // block_base
Expand All @@ -421,10 +434,12 @@ def generate_rdf_content(self):
block_reference = self.rdfize(self.format_uri("{}_{}".format(rec.id, slice_block)))
self.graph_chunk.add((entity, self.namespace_internal["includeInReference"], block_reference))
if faldo_strand:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], faldo_strand))
strand_ref = self.get_reference_strand_uri(rec.id, faldo_strand, slice_block)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInReferenceStrand"], sref))
strand_ref = self.get_reference_strand_uri(None, faldo_strand, slice_block)
for sref in strand_ref:
self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], sref))

yield

Expand Down