Skip to content
Browse files

Location/Split: Added its own "length" subroutine to

handle more complicated cases like cut by origin locations,
which were returning the whole sequence length
(e.g. "join(161..200,1..5))" returned 200 instead of 45).
For transpliced locations (e.g. "join(16..20,complement(6..10))"),
since exons can even be in different chromosomes, it returns
the sum of the lengths of the individual segments. Added tests.
  • Loading branch information...
1 parent 3059d70 commit dfb457ab5cb1ec1021749117cac347fb64bdf53d @fjossandon fjossandon committed Jun 27, 2014
Showing with 98 additions and 1 deletion.
  1. +74 −0 Bio/Location/Split.pm
  2. +24 −1 t/Seq/PrimarySeq.t
View
74 Bio/Location/Split.pm
@@ -603,6 +603,80 @@ sub end_pos_type {
return ( @locs ) ? $locs[0]->end_pos_type() : undef;
}
+=head2 length
+
+ Title : length
+ Usage : $len = $loc->length();
+ Function: get the length in the coordinate space this location spans
+ Example :
+ Returns : an integer
+ Args : none
+
+=cut
+
+sub length {
+ my ($self) = @_;
+ my $length = 0;
+ # Mixed strand values means transplicing (where exons can even
+ # be in different chromosomes), so in that case only give the sum
+ # of the lengths of the individual segments
+ if ($self->guide_strand == undef) {
+ for my $loc ( $self->sub_Location(0) ) {
+ $length += abs($loc->end - $loc->start) + 1
+ }
+ }
+ else {
+ my @sublocs = $self->sub_Location(0);
+ my $start = $sublocs[0]->start;
+ my $end = $sublocs[-1]->end;
+
+ # If Start > ·End, its a possible case of cut by origin
+ # location in circular sequences (e.g "join(16..20,1..2)")
+ if ($start > $end) {
+ # Figure out which segments are located before
+ # and which are located after coordinate 1
+ # (END_SEQ - 1 - START_SEQ)
+ my @end_seq_segments;
+ my @start_seq_segments;
+ my $switch = 0;
+ foreach my $subloc (@sublocs) {
+ if ($switch == 0) {
+ if ($subloc->start == 1) {
+ $switch = 1;
+ push @start_seq_segments, $subloc;
+ }
+ else {
+ push @end_seq_segments, $subloc;
+ }
+ }
+ else {
+ push @start_seq_segments, $subloc;
+ }
+ }
+
+ # If its a cut by origin location, sum the whole length of each group
+ if (scalar @end_seq_segments > 0 and @start_seq_segments > 0) {
+ my $end_segments_length = abs( $end_seq_segments[0]->start
+ - $end_seq_segments[-1]->end)
+ + 1;
+ my $start_segments_length = abs( $start_seq_segments[0]->start
+ - $start_seq_segments[-1]->end)
+ + 1;
+ $length = $end_segments_length + $start_segments_length;
+ }
+ }
+ else {
+ $length = $end - $start + 1;
+ }
+ }
+
+ # If for some reason nothing worked, fall back to previous behaviour
+ if ($length == 0) {
+ $length = abs($self->end - $self->start) + 1
+ }
+
+ return $length;
+}
=head2 seq_id
View
25 t/Seq/PrimarySeq.t
@@ -7,7 +7,7 @@ use Data::Dumper;
BEGIN {
use lib '.';
use Bio::Root::Test;
- test_begin( -tests => 287 );
+ test_begin( -tests => 310 );
use_ok('Bio::PrimarySeq');
use_ok('Bio::Location::Simple');
@@ -495,6 +495,8 @@ is $seq->subseq($loc1_strand), 'TTTTT';
is $seq->subseq($loc1_no_strand), 'TTTTT';
is $loc1_strand->to_FTstring, 'complement(1..5)';
is $loc1_no_strand->to_FTstring, 'complement(1..5)';
+is $loc1_strand->length, 5;
+is $loc1_no_strand->length, 5;
# Basic split, both locations in positive strand
# Coords: join(6..10,16..20) => CCCCCTTTTT
@@ -515,6 +517,8 @@ is $seq->subseq($loc2_strand), 'AAAAAGGGGG';
is $seq->subseq($loc2_no_strand), 'AAAAAGGGGG';
is $loc2_strand->to_FTstring, 'complement(join(6..10,16..20))';
is $loc2_no_strand->to_FTstring, 'complement(join(6..10,16..20))';
+is $loc2_strand->length, 15;
+is $loc2_no_strand->length, 15;
# Basic split, both locations in negative strand
# Coords: complement(join(6..10,16..20)) => AAAAAGGGGG
@@ -527,6 +531,7 @@ is $loc3_strand->to_FTstring, 'complement(join(6..10,16..20))';
$loc3_strand->flip_strand;
is $seq->subseq($loc3_strand), 'CCCCCTTTTT';
is $loc3_strand->to_FTstring, 'join(6..10,16..20)';
+is $loc3_strand->length, 15;
## Cut by origin-split, same strand, single sequence that pass through origin
#Coords: join(16..20,1..2) => TTTTTAA
@@ -547,6 +552,8 @@ is $seq->subseq($loc4_strand), 'TTAAAAA';
is $seq->subseq($loc4_no_strand), 'TTAAAAA';
is $loc4_strand->to_FTstring, 'complement(join(16..20,1..2))';
is $loc4_no_strand->to_FTstring, 'complement(join(16..20,1..2))';
+is $loc4_strand->length, 7;
+is $loc4_no_strand->length, 7;
## Cut by origin-combo split, same strand, 2 sequences with 1st passing through origin
#Coords: join(19..20,1..2,11..13) => TTAAGGG
@@ -569,6 +576,8 @@ is $seq->subseq($loc5_strand), 'CCCTTAA';
is $seq->subseq($loc5_no_strand), 'CCCTTAA';
is $loc5_strand->to_FTstring, 'complement(join(19..20,1..2,11..13))';
is $loc5_no_strand->to_FTstring, 'complement(join(19..20,1..2,11..13))';
+is $loc5_strand->length, 15;
+is $loc5_no_strand->length, 15;
## Cut by origin-combo split, same strand, 2 sequences with 2nd passing through origin
#Coords: join(6..10,19..20,1..4) => CCCCCTTAAAA
@@ -591,6 +600,8 @@ is $seq->subseq($loc6_strand), 'TTTTAAGGGGG';
is $seq->subseq($loc6_no_strand), 'TTTTAAGGGGG';
is $loc6_strand->to_FTstring, 'complement(join(6..10,19..20,1..4))';
is $loc6_no_strand->to_FTstring, 'complement(join(6..10,19..20,1..4))';
+is $loc6_strand->length, 19;
+is $loc6_no_strand->length, 19;
## Trans-splicing, 2 sequences in different strands, 2nd in complement
#Coords: join(6..10,complement(16..20)) => CCCCCAAAAA
@@ -611,6 +622,8 @@ is $seq->subseq($loc7_strand), 'TTTTTGGGGG';
is $seq->subseq($loc7_no_strand), 'TTTTTGGGGG';
is $loc7_strand->to_FTstring, 'join(16..20,complement(6..10))';
is $loc7_no_strand->to_FTstring, 'join(16..20,complement(6..10))';
+is $loc7_strand->length, 10;
+is $loc7_no_strand->length, 10;
## Trans-splicing, 2 sequences in different strands, 1st in complement
#Coords: join(complement(16..20),6..10) => AAAAACCCCC
@@ -631,6 +644,8 @@ is $seq->subseq($loc8_strand), 'GGGGGTTTTT';
is $seq->subseq($loc8_no_strand), 'GGGGGTTTTT';
is $loc8_strand->to_FTstring, 'join(complement(6..10),16..20)';
is $loc8_no_strand->to_FTstring, 'join(complement(6..10),16..20)';
+is $loc8_strand->length, 10;
+is $loc8_no_strand->length, 10;
## Trans-splicing w/cut by origin, 2 sequences with 1st passing through origin, 2nd in complement
#Coords: join(19..20,1..3,complement(11..13)) => TTAAACCC
@@ -653,6 +668,8 @@ is $seq->subseq($loc9_strand), 'GGGTTTAA';
is $seq->subseq($loc9_no_strand), 'GGGTTTAA';
is $loc9_strand->to_FTstring, 'join(11..13,complement(1..3),complement(19..20))';
is $loc9_no_strand->to_FTstring, 'join(11..13,complement(1..3),complement(19..20))';
+is $loc9_strand->length, 8;
+is $loc9_no_strand->length, 8;
## Trans-splicing w/cut by origin, 2 sequences with 1st passing through origin, 1st in complement
#Coords: join(complement(1..3),complement(19..20),11..13) => TTTAAGGG
@@ -675,6 +692,8 @@ is $seq->subseq($loc10_strand), 'CCCTTAAA';
is $seq->subseq($loc10_no_strand), 'CCCTTAAA';
is $loc10_strand->to_FTstring, 'join(complement(11..13),19..20,1..3)';
is $loc10_no_strand->to_FTstring, 'join(complement(11..13),19..20,1..3)';
+is $loc10_strand->length, 8;
+is $loc10_no_strand->length, 8;
## Trans-splicing w/cut by origin, 2 sequences with 2nd passing through origin, 2nd in complement
#Coords: join(6..10,complement(1..2),complement(18..20)) => CCCCCTTAAA
@@ -697,6 +716,8 @@ is $seq->subseq($loc11_strand), 'TTTAAGGGGG';
is $seq->subseq($loc11_no_strand), 'TTTAAGGGGG';
is $loc11_strand->to_FTstring, 'join(18..20,1..2,complement(6..10))';
is $loc11_no_strand->to_FTstring, 'join(18..20,1..2,complement(6..10))';
+is $loc11_strand->length, 10;
+is $loc11_no_strand->length, 10;
## Trans-splicing w/cut by origin, 2 sequences with 2nd passing through origin, 1st in complement
#Coords: join(complement(6..10),18..20,1..2) => GGGGGTTTAA
@@ -719,3 +740,5 @@ is $seq->subseq($loc12_strand), 'TTAAACCCCC';
is $seq->subseq($loc12_no_strand), 'TTAAACCCCC';
is $loc12_strand->to_FTstring, 'join(complement(1..2),complement(18..20),6..10)';
is $loc12_no_strand->to_FTstring, 'join(complement(1..2),complement(18..20),6..10)';
+is $loc12_strand->length, 10;
+is $loc12_no_strand->length, 10;

0 comments on commit dfb457a

Please sign in to comment.
Something went wrong with that request. Please try again.