Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Tightened and optimized quality string validation

Optimized speed of string validation (as in PrimarySeq a few commits back)
Used a more thorough validation regexp (but not 100% accurate - it still takes shortcuts)
  • Loading branch information...
commit 09f5ae8034f80ca766597bd62d8628eb6331807a 1 parent 6a9aac2
Florent Angly authored November 17, 2012
26  Bio/Seq/PrimaryQual.pm
@@ -98,12 +98,13 @@ The rest of the documentation details each of the object methods. Internal metho
98 98
 
99 99
 
100 100
 package Bio::Seq::PrimaryQual;
101  
-use vars qw(%valid_type);
102  
-use strict;
103 101
 
  102
+use strict;
104 103
 
105 104
 use base qw(Bio::Root::Root Bio::Seq::QualI);
106 105
 
  106
+our $MATCHPATTERN = '0-9eE\.\s+-';
  107
+
107 108
 
108 109
 =head2 new()
109 110
 
@@ -203,25 +204,22 @@ sub seq {
203 204
 =head2 validate_qual($qualstring)
204 205
 
205 206
  Title   : validate_qual($qualstring)
206  
- Usage   : print("Valid.") if { &validate_qual($self,$qualities); }
207  
- Function: Make sure that the quality, if it has length > 0, contains at
208  
-           least one digit. The individual elements of the quality array are
209  
-           not validated and can be any numeric value. Note that quality strings
210  
-           are parsed into arrays using split/\d+/,$quality_string, so make sure
211  
-           that your quality scalar looks like this if you want it to be parsed
212  
-           properly. Note that empty quality strings are considered valid.
  207
+ Usage   : print("Valid.") if { &validate_qual($self, $quality_string); }
  208
+ Function: Test that the given quality string is valid. It is expected to
  209
+           contain space-delimited numbers that can be parsed using split /\d+/.
  210
+           However, this validation takes shortcuts and only tests that the
  211
+           string contains characters valid in numbers: 0-9 . eE +-
  212
+           Note that empty quality strings are valid too.
213 213
  Returns : 1 for a valid sequence, 0 otherwise
214 214
  Args    : - Scalar containing the quality string to validate.
215  
-           - Boolean to throw an error if validation failed
  215
+           - Boolean to optionally throw an error if validation failed
216 216
 
217 217
 =cut
218 218
 
219 219
 sub validate_qual {
220 220
     my ($self, $qualstr, $throw) = @_;
221  
-    $qualstr = '' if not defined $qualstr;
222  
-    $throw   = 0  if not defined $throw ; # 0 for backward compatiblity
223  
-    if ( (CORE::length $qualstr > 0) &&
224  
-         ($qualstr !~ /\d/         ) ) {
  221
+    if ( (defined $qualstr                ) &&
  222
+         ($qualstr !~ /^[$MATCHPATTERN]*$/) ) {
225 223
         if ($throw) {
226 224
             $self->throw("Failed validation of quality score from  '".
227 225
                (defined($self->id)||'[unidentified sequence]')."'. No numeric ".
2  Changes
@@ -55,6 +55,8 @@ CPAN releases are branched from 'master'.
55 55
         - now handles IUPAC degenerate residues [fangly]
56 56
     * Bio::PrimarySeq and Bio::PrimarySeqI
57 57
         - speed improvements for large sequences [Ben Woodcroft, fangly]
  58
+    * Bio::PrimaryQual
  59
+        - tightened and optimized quality string validation [fangly]
58 60
 
59 61
     [Bug fixes]
60 62
 
23  t/Seq/PrimaryQual.t
@@ -7,7 +7,7 @@ BEGIN {
7 7
     use lib '.';
8 8
     use Bio::Root::Test;
9 9
     
10  
-    test_begin(-tests => 67);
  10
+    test_begin(-tests => 70);
11 11
 
12 12
     use_ok('Bio::SeqIO');
13 13
     use_ok('Bio::Seq::Quality');
@@ -52,17 +52,20 @@ my @newqualarray = split / /,$newqualstring;
52 52
 ok $qualobj->qual(\@newqualarray);
53 53
 is join(' ', @{$qualobj->qual()}), $newqualstring;
54 54
 
55  
-is $qualobj->validate_qual($string_quals), 1;
56  
-is $qualobj->validate_qual(""           ), 1;
57  
-is $qualobj->validate_qual("0"          ), 1;
58  
-is $qualobj->validate_qual(undef        ), 1;
59  
-is $qualobj->validate_qual("10 20 30 30"), 1;
60  
-is $qualobj->validate_qual(" 20  9.3 5 "), 1;
61  
-is $qualobj->validate_qual(" 4"         ), 1;
62  
-is $qualobj->validate_qual("chad"       ), 0;
63  
-is $qualobj->validate_qual("   "        ), 0;
  55
+is $qualobj->validate_qual($string_quals ), 1;
  56
+is $qualobj->validate_qual(""            ), 1;
  57
+is $qualobj->validate_qual("0"           ), 1;
  58
+is $qualobj->validate_qual(undef         ), 1;
  59
+is $qualobj->validate_qual("   "         ), 1;
  60
+is $qualobj->validate_qual("10 20 30 30" ), 1;
  61
+is $qualobj->validate_qual(" 20  9 5   " ), 1;
  62
+is $qualobj->validate_qual("+1 9.3 50e-1"), 1;
  63
+is $qualobj->validate_qual(" 4"          ), 1;
  64
+is $qualobj->validate_qual("chad"        ), 0;
  65
+is $qualobj->validate_qual("10 one"      ), 0;
64 66
 
65 67
 ok $qualobj->qual("10 20 30 30");
  68
+ok $qualobj->qual("+1 9.3 50e-1");
66 69
 throws_ok { $qualobj->qual("chad"); } qr/.+/;
67 70
 throws_ok { $qualobj->validate_qual("chad", 1) } qr/.+/;
68 71
 

0 notes on commit 09f5ae8

Please sign in to comment.
Something went wrong with that request. Please try again.