Skip to content

Commit

Permalink
further cross-validations and errmsg
Browse files Browse the repository at this point in the history
  • Loading branch information
Giorgio Gonnella committed Apr 5, 2017
1 parent cf6688f commit 6d25eb7
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 4 deletions.
11 changes: 9 additions & 2 deletions gfapy/gfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ def validate(self):
"""
self.__validate_segment_references()
self.__validate_path_links()
self.__validate_group_references()
self.__validate_group_items()
self.__validate_gfa2_positions()

def __str__(self):
return "\n".join([str(line) for line in self.lines])
Expand Down Expand Up @@ -262,7 +263,7 @@ def __validate_path_links(self):
"does not exist, but is required by the following paths:\n"+
l.refstr())

def __validate_group_references(self):
def __validate_group_items(self):
if self.version == "gfa1":
return
for group in self.sets + self.paths:
Expand All @@ -275,6 +276,12 @@ def __validate_group_references(self):
"does not exist, but is required by the following groups:\n"+
item.refstr())

def __validate_gfa2_positions(self):
if self.version == "gfa1":
return
for line in self.edges + self.fragments:
line.validate_positions()

def _validate_version(self):
if (self._version != None) and (self._version not in gfapy.VERSIONS):
raise gfapy.VersionError("GFA specification version {} not supported".
Expand Down
3 changes: 2 additions & 1 deletion gfapy/line/edge/gfa2/gfa2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
from ..gfa2.alignment_type import AlignmentType as GFA2_AlignmentType
from ..gfa2.references import References
from ..gfa2.other import Other
from ..gfa2.validation import Validation
from ..edge import Edge

class GFA2(Other, References, GFA2_AlignmentType, AlignmentType, FromTo,
ToGFA1, Edge):
ToGFA1, Validation, Edge):
"""An edge line of a GFA2 file."""

RECORD_TYPE = "E"
Expand Down
22 changes: 22 additions & 0 deletions gfapy/line/edge/gfa2/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import gfapy

class Validation:

def validate_positions(self):
"Checks that positions suffixed by $ are the last position of segments"
if self.is_connected():
for n in ["1","2"]:
seg = self.get("sid"+n).line
seq = seg.sequence
if not gfapy.is_placeholder(seq):
seqlen = len(seq)
for pfx in ["beg", "end"]:
fn = pfx+n
pos = self.get(fn)
if gfapy.islastpos(pos):
if pos != seqlen:
raise gfapy.InconsistencyError(
"Edge: {}\n".format(str(self))+
"Field {}: $ after ".format(fn)+
"non-last position\n".format(str(pos))+
"Segment: {}".format(str(seg)))
3 changes: 2 additions & 1 deletion gfapy/line/fragment/fragment.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .references import References
from .validation import Validation
from ..line import Line

class Fragment(References, Line):
class Fragment(References, Validation, Line):
"""
A fragment line of a GFA2 file
"""
Expand Down
21 changes: 21 additions & 0 deletions gfapy/line/fragment/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import gfapy

class Validation:

def validate_positions(self):
"Checks that positions suffixed by $ are the last position of segments"
if self.is_connected():
seg = self.get("sid")
seq = seg.sequence
if not gfapy.is_placeholder(seq):
seqlen = len(seq)
for sfx in ["beg", "end"]:
fn = "s_"+sfx
pos = self.get(fn)
if gfapy.islastpos(pos):
if pos != seqlen:
raise gfapy.InconsistencyError(
"Edge: {}\n".format(str(self))+
"Field {}: $ after ".format(str(fn))+
"non-last position ({})\n".format(str(pos))+
"Segment: {}".format(str(seg)))
1 change: 1 addition & 0 deletions gfapy/line/segment/length_gfa1.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def validate_length(self):
if not gfapy.is_placeholder(self.sequence) and "LN" in self.tagnames:
if self.LN != len(self.sequence):
raise gfapy.InconsistencyError(
"Segment: {}\n".format(str(self))+
"Length in LN tag ({}) ".format(self.LN)+
"is different from length of sequence field ({})"
.format(len(self.sequence)))
Expand Down
12 changes: 12 additions & 0 deletions tests/testdata/invalid/edge_wrong_lastpos.gfa2
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
H VN:Z:2.0
H ul:Z:https://github.com/sjackman/assembly-graph/blob/master/sample.gfa
S 1 8 CGATGCAA
S 2 10 TGCAAAGTAC
S 3 21 TGCAACGTATAGACTTGTCAC RC:i:4
S 4 7 GCATATA
S 5 8 CGATGATA
S 6 4 ATGA
E * 1+ 2+ 3 9$ 0 5 5M
E * 3+ 2+ 21$ 21$ 0 0 0M
E * 3+ 4- 17 21$ 3 7$ 1M1D2M
E * 4- 5+ 0 0 0 0 0M
33 changes: 33 additions & 0 deletions tests/testdata/invalid/fragment_wrong_lastpos.gfa2
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# File used for the collections test
# similar but NOT equivalent to the gfa1 file!
S 1 122 *
S 3 29 TGCTAGCTGACTGTCGATGCTGTGTG
E 1_to_2 1+ 2+ 110 122$ 0 12 12M
S 5 130 *
S 13 150 *
E 2_to_6 2+ 6+ 0 122$ 10 132 122M
O 14 11+ 12+
S 11 140 * xx:i:11
F 3 read1+ 0 42$ 12 55 * id:Z:read1_in_3
F 2 read2+ 45 62 0 18 * id:Z:read2_in_2
U 16 1 3 15 2_to_6 16sub
H ac:Z:test2
# another comment
S 12 150 *
S 4 120 *
H VN:Z:2.0
E 1_to_3 1+ 3+ 112 122$ 0 12 10M
G 1_to_11 1+ 11- 120 *
E 11_to_12 11+ 12+ 18 140$ 0 122 122M
S 6 150 *
X custom_record xx:Z:testtag
X custom_record X2
E 11_to_13 11+ 13+ 20 140$ 0 120 120M
G 2_to_12 2- 12+ 500 50
O 15 11+ 11_to_13+ 13+ xx:i:-1
Y another_custom_record
U 16sub 2 3
S 2 120 * xx:Z:sometag
H aa:i:12 ab:Z:test1
H aa:i:15
E 1_to_5 1+ 5+ 0 122$ 2 124 * zz:Z:tag
12 changes: 12 additions & 0 deletions tests/testdata/invalid/inconsistent_length.gfa1
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
H VN:Z:1.0
H ul:Z:https://github.com/sjackman/assembly-graph/blob/master/sample.gfa
S 1 CGATGCAA LN:i:12
S 2 TGCAAAGTAC
S 3 TGCAACGTATAGACTTGTCAC RC:i:4
S 4 GCATATA
S 5 CGATGATA
S 6 ATGA
L 1 + 2 + 5M
L 3 + 2 + 0M
L 3 + 4 - 1M1D2M1S
L 4 - 5 + 0M

0 comments on commit 6d25eb7

Please sign in to comment.