Permalink
Browse files

Tightened and optimized quality string validation

Optimized speed of string validation (as in PrimarySeq a few commits back)
Used a more thorough validation regexp (but not 100% accurate - it still takes shortcuts)
  • Loading branch information...
1 parent 6a9aac2 commit 09f5ae8034f80ca766597bd62d8628eb6331807a @fangly fangly committed Nov 17, 2012
Showing with 27 additions and 24 deletions.
  1. +12 −14 Bio/Seq/PrimaryQual.pm
  2. +2 −0 Changes
  3. +13 −10 t/Seq/PrimaryQual.t
View
@@ -98,12 +98,13 @@ The rest of the documentation details each of the object methods. Internal metho
package Bio::Seq::PrimaryQual;
-use vars qw(%valid_type);
-use strict;
+use strict;
use base qw(Bio::Root::Root Bio::Seq::QualI);
+our $MATCHPATTERN = '0-9eE\.\s+-';
+
=head2 new()
@@ -203,25 +204,22 @@ sub seq {
=head2 validate_qual($qualstring)
Title : validate_qual($qualstring)
- Usage : print("Valid.") if { &validate_qual($self,$qualities); }
- Function: Make sure that the quality, if it has length > 0, contains at
- least one digit. The individual elements of the quality array are
- not validated and can be any numeric value. Note that quality strings
- are parsed into arrays using split/\d+/,$quality_string, so make sure
- that your quality scalar looks like this if you want it to be parsed
- properly. Note that empty quality strings are considered valid.
+ Usage : print("Valid.") if { &validate_qual($self, $quality_string); }
+ Function: Test that the given quality string is valid. It is expected to
+ contain space-delimited numbers that can be parsed using split /\d+/.
+ However, this validation takes shortcuts and only tests that the
+ string contains characters valid in numbers: 0-9 . eE +-
+ Note that empty quality strings are valid too.
Returns : 1 for a valid sequence, 0 otherwise
Args : - Scalar containing the quality string to validate.
- - Boolean to throw an error if validation failed
+ - Boolean to optionally throw an error if validation failed
=cut
sub validate_qual {
my ($self, $qualstr, $throw) = @_;
- $qualstr = '' if not defined $qualstr;
- $throw = 0 if not defined $throw ; # 0 for backward compatiblity
- if ( (CORE::length $qualstr > 0) &&
- ($qualstr !~ /\d/ ) ) {
+ if ( (defined $qualstr ) &&
+ ($qualstr !~ /^[$MATCHPATTERN]*$/) ) {
if ($throw) {
$self->throw("Failed validation of quality score from '".
(defined($self->id)||'[unidentified sequence]')."'. No numeric ".
View
@@ -55,6 +55,8 @@ CPAN releases are branched from 'master'.
- now handles IUPAC degenerate residues [fangly]
* Bio::PrimarySeq and Bio::PrimarySeqI
- speed improvements for large sequences [Ben Woodcroft, fangly]
+ * Bio::PrimaryQual
+ - tightened and optimized quality string validation [fangly]
[Bug fixes]
View
@@ -7,7 +7,7 @@ BEGIN {
use lib '.';
use Bio::Root::Test;
- test_begin(-tests => 67);
+ test_begin(-tests => 70);
use_ok('Bio::SeqIO');
use_ok('Bio::Seq::Quality');
@@ -52,17 +52,20 @@ my @newqualarray = split / /,$newqualstring;
ok $qualobj->qual(\@newqualarray);
is join(' ', @{$qualobj->qual()}), $newqualstring;
-is $qualobj->validate_qual($string_quals), 1;
-is $qualobj->validate_qual("" ), 1;
-is $qualobj->validate_qual("0" ), 1;
-is $qualobj->validate_qual(undef ), 1;
-is $qualobj->validate_qual("10 20 30 30"), 1;
-is $qualobj->validate_qual(" 20 9.3 5 "), 1;
-is $qualobj->validate_qual(" 4" ), 1;
-is $qualobj->validate_qual("chad" ), 0;
-is $qualobj->validate_qual(" " ), 0;
+is $qualobj->validate_qual($string_quals ), 1;
+is $qualobj->validate_qual("" ), 1;
+is $qualobj->validate_qual("0" ), 1;
+is $qualobj->validate_qual(undef ), 1;
+is $qualobj->validate_qual(" " ), 1;
+is $qualobj->validate_qual("10 20 30 30" ), 1;
+is $qualobj->validate_qual(" 20 9 5 " ), 1;
+is $qualobj->validate_qual("+1 9.3 50e-1"), 1;
+is $qualobj->validate_qual(" 4" ), 1;
+is $qualobj->validate_qual("chad" ), 0;
+is $qualobj->validate_qual("10 one" ), 0;
ok $qualobj->qual("10 20 30 30");
+ok $qualobj->qual("+1 9.3 50e-1");
throws_ok { $qualobj->qual("chad"); } qr/.+/;
throws_ok { $qualobj->validate_qual("chad", 1) } qr/.+/;

0 comments on commit 09f5ae8

Please sign in to comment.