Skip to content

Commit

Permalink
PrimarySeq validation speedup
Browse files Browse the repository at this point in the history
Small optimization to make validation of sequences faster:
  ~9 % faster for large sequences (10 Mbp)
  ~27% faster for small sequences (100 bp)
  • Loading branch information
fangly committed Nov 17, 2012
1 parent 27f9a62 commit cbcf173
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 10 deletions.
12 changes: 5 additions & 7 deletions Bio/PrimarySeq.pm
Expand Up @@ -324,20 +324,18 @@ sub _set_seq_by_ref {
sequences are considered valid. sequences are considered valid.
Returns : 1 if the supplied sequence string is valid, 0 otherwise. Returns : 1 if the supplied sequence string is valid, 0 otherwise.
Args : - Sequence string to be validated Args : - Sequence string to be validated
- Boolean to throw an error if the sequence is invalid - Boolean to optionally throw an error if the sequence is invalid
=cut =cut


sub validate_seq { sub validate_seq {
my ($self, $seqstr, $throw) = @_; my ($self, $seqstr, $throw) = @_;
$seqstr = '' if not defined $seqstr; if ( (defined $seqstr ) &&
$throw = 0 if not defined $throw ; # 0 for backward compatiblity ($seqstr !~ /^[$MATCHPATTERN]*$/) ) {
if ( (CORE::length $seqstr > 0 ) &&
($seqstr !~ /^([$MATCHPATTERN]+)$/) ) {
if ($throw) { if ($throw) {
$self->throw("Failed validation of sequence '".(defined($self->id) || $self->throw("Failed validation of sequence '".(defined($self->id) ||
'[unidentified sequence]')."'. Invalid characters were: " . '[unidentified sequence]')."'. Invalid characters were: " .
join('',($seqstr =~ /([^$MATCHPATTERN]+)/g))); join('',($seqstr =~ /[^$MATCHPATTERN]/g)));
} }
return 0; return 0;
} }
Expand Down
7 changes: 4 additions & 3 deletions t/Seq/PrimarySeq.t
Expand Up @@ -7,7 +7,7 @@ use Data::Dumper;
BEGIN { BEGIN {
use lib '.'; use lib '.';
use Bio::Root::Test; use Bio::Root::Test;
test_begin( -tests => 177 ); test_begin( -tests => 178 );


use_ok('Bio::PrimarySeq'); use_ok('Bio::PrimarySeq');
use_ok('Bio::Location::Simple'); use_ok('Bio::Location::Simple');
Expand Down Expand Up @@ -366,10 +366,11 @@ is $seq->validate_seq( 'ACGT' ), 1;
is $seq->validate_seq( 'XFRH' ), 1; is $seq->validate_seq( 'XFRH' ), 1;
is $seq->validate_seq( '-~' ), 1; # gap symbols is $seq->validate_seq( '-~' ), 1; # gap symbols
is $seq->validate_seq( '-.*?=~' ), 1; # other valid symbols is $seq->validate_seq( '-.*?=~' ), 1; # other valid symbols
is $seq->validate_seq( '0' ), 0;
is $seq->validate_seq( 'AAAA$' ), 0; is $seq->validate_seq( 'AAAA$' ), 0;
is $seq->validate_seq( 'tt&tt' ), 0; is $seq->validate_seq( 'tt&t!' ), 0;


throws_ok { $seq->validate_seq('tt&tt', 1); } qr/.+/; throws_ok { $seq->validate_seq('tt&t!', 1); } qr/.+/;




# Test direct option (no sequence validation) # Test direct option (no sequence validation)
Expand Down

0 comments on commit cbcf173

Please sign in to comment.