diff --git a/Bio/SearchIO/blast.pm b/Bio/SearchIO/blast.pm index 03e8693dd1..f490118fc5 100644 --- a/Bio/SearchIO/blast.pm +++ b/Bio/SearchIO/blast.pm @@ -447,7 +447,7 @@ sub next_result { my $data = ''; my $flavor = ''; $self->{'_seentop'} = 0; # start next report at top - $self->{'_seentop'} = 0; + my ( $reporttype, $seenquery, $reportline, $reportversion ); my ( $seeniteration, $found_again ); my $incl_threshold = $self->inclusion_threshold; @@ -462,7 +462,7 @@ sub next_result { next if (/^\s+$/); # skip empty lines next if (/CPU time:/); next if (/^>\s*$/); - next if (/\Q[*]+\s+No hits found\s+[*]+\E/); + next if (/[*]+\s+No hits found\s+[*]+/); if ( /^((?:\S+?)?BLAST[NPX]?)\s+(.+)$/i # NCBI BLAST, PSIBLAST # RPSBLAST, MEGABLAST @@ -630,6 +630,27 @@ sub next_result { 'Data' => "$acc$version" } ) if $acc; + + # these elements are dropped with some multiquery reports; add + # back here + $self->element( + { + 'Name' => 'BlastOutput_db-len', + 'Data' => $self->{'_blsdb_length'} + } + ) if $self->{'_blsdb_length'}; + $self->element( + { + 'Name' => 'BlastOutput_db-let', + 'Data' => $self->{'_blsdb_letters'} + } + ) if $self->{'_blsdb_letters'}; + $self->element( + { + 'Name' => 'BlastOutput_db', + 'Data' => $self->{'_blsdb'} + } + ) if $self->{'_blsdb_letters'}; } # added check for WU-BLAST -echofilter option (bug 2388) elsif (/^>Unfiltered[+-]1$/) { @@ -655,26 +676,6 @@ sub next_result { if ( !$self->in_element('iteration') ) { $self->start_element( { 'Name' => 'Iteration' } ); } - # these elements are dropped with some multiquery reports; add - # back here - $self->element( - { - 'Name' => 'BlastOutput_db-len', - 'Data' => $self->{'_blsdb_length'} - } - ) if $self->{'_blsdb_length'}; - $self->element( - { - 'Name' => 'BlastOutput_db-let', - 'Data' => $self->{'_blsdb_letters'} - } - ) if $self->{'_blsdb_letters'}; - $self->element( - { - 'Name' => 'BlastOutput_db', - 'Data' => $self->{'_blsdb'} - } - ) if $self->{'_blsdb_letters'}; # changed 8/28/2008 to exit hit table if blank line is found after an # appropriate line @@ -757,7 +758,7 @@ sub next_result { } } - elsif (/^Database:\s*(.+)$/) { + elsif (/^Database:\s*(.+?)\s*$/) { $self->debug("blast.pm: Database: $1\n"); my $db = $1; diff --git a/t/SearchIO/blast.t b/t/SearchIO/blast.t index 861f08ed64..23f62ed5ed 100644 --- a/t/SearchIO/blast.t +++ b/t/SearchIO/blast.t @@ -8,7 +8,7 @@ BEGIN { use lib '.'; use Bio::Root::Test; - test_begin(-tests => 1357); + test_begin(-tests => 1360); use_ok('Bio::SearchIO'); } @@ -1171,7 +1171,7 @@ is( $r->algorithm, 'MEGABLAST' ); is( $r->query_name, '503384' ); is( $r->query_description, '11337 bp 2 contigs' ); is( $r->query_length, 11337 ); -is( $r->database_name, 'cneoA.nt ' ); +is( $r->database_name, 'cneoA.nt' ); is( $r->database_letters, 17206226 ); is( $r->database_entries, 4935 ); is( $r->get_statistic('querylength'), 11318 ); @@ -2211,3 +2211,13 @@ $searchio = Bio::SearchIO->new( is ($searchio->next_result->algorithm_version, '2.2.25+', "testing Bug 3298"); is ($searchio->next_result->algorithm_version, '2.2.25+', "testing Bug 3298"); is ($searchio->next_result->algorithm_version, '2.2.25+', "testing Bug 3298"); + +# testing for Bug #3251 +$searchio = Bio::SearchIO->new( + '-format' => 'blast', + '-file' => test_input_file('rpsblast_no_hits.bls') +); + +is ($searchio->next_result->database_name, 'CDD.v.2.13', "testing Bug 3251"); +is ($searchio->next_result->database_name, 'CDD.v.2.13', "testing Bug 3251"); +is ($searchio->next_result->database_name, 'CDD.v.2.13', "testing Bug 3251"); diff --git a/t/data/rpsblast_no_hits.bls b/t/data/rpsblast_no_hits.bls new file mode 100644 index 0000000000..f750dec58b --- /dev/null +++ b/t/data/rpsblast_no_hits.bls @@ -0,0 +1,58 @@ +RPS-BLAST 2.2.18 [Mar-02-2008] + +Database: CDD.v.2.13 + 24,083 sequences; 5,982,884 total letters + +Searching..................................................done + +Query= lcl|YP_134044.1|Plus1 F 4437 4742 NC_006391 hypothetical +protein {Haloarcula marismortui ATCC 43049} + (101 letters) + + ***** No hits found ****** + + +Query= lcl|YP_134045.1|Plus1 F 4742 5065 NC_006391 transcription +regulator {Haloarcula marismortui ATCC 43049} + (107 letters) + + ***** No hits found ****** + + +Query= lcl|YP_134046.1|Plus1 F 5275 5808 NC_006391 hypothetical +protein {Haloarcula marismortui ATCC 43049} + (177 letters) + + ***** No hits found ****** + + + Database: CDD.v.2.13 + Posted date: Nov 8, 2007 6:30 PM + Number of letters in database: 5,982,884 + Number of sequences in database: 24,083 + +Lambda K H + 0.308 0.126 0.352 + +Gapped +Lambda K H + 0.267 0.0606 0.140 + + +Matrix: BLOSUM62 +Gap Penalties: Existence: 11, Extension: 1 +Number of Sequences: 24083 +Number of Hits to DB: 200,397,591 +Number of extensions: 14170162 +Number of successful extensions: 17150 +Number of sequences better than 1.0e-005: 6 +Number of HSP's gapped: 17124 +Number of HSP's successfully gapped: 29 +Length of database: 5,982,884 +Neighboring words threshold: 11 +Window for multiple hits: 40 +X1: 15 ( 6.7 bits) +X2: 38 (14.6 bits) +X3: 64 (24.7 bits) +S1: 40 (20.8 bits) +S2: 102 (43.3 bits)