Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

SearchResultEventBuilder & IteratedSearchResultEventBuilder:

Fixed Feature #2615. Moved "_init_parse_params", "max_significance,
"signif", "min_score", "min_bits, and "hit_filter" methods from
'IteratedSearchResultEventBuilder' to parent 'SearchResultEventBuilder',
also merged the "end_result" code from both modules since
it was very similar. This means that 1) IteratedSearch now only
have the code that is different and specific for Blast iterations,
and 2) that the moved methods will now work with other Bio::SearchIO
formats besides Blast, closing the inconsistent effect in different
formats of the related Bio::SearchIO->new() parameters.
Added tests for all moved methods using HMMER outputs and run
the full test suite and everything pass.
  • Loading branch information...
commit 3165a97e3de6fd7cec34e238196f9e8581255e53 1 parent 120fd91
Francisco J. Ossandon authored January 14, 2014
429  Bio/SearchIO/IteratedSearchResultEventBuilder.pm
@@ -2,7 +2,7 @@
2 2
 #
3 3
 # BioPerl module for Bio::SearchIO::IteratedSearchResultEventBuilder
4 4
 #
5  
-# Please direct questions and support issues to <bioperl-l@bioperl.org> 
  5
+# Please direct questions and support issues to <bioperl-l@bioperl.org>
6 6
 #
7 7
 # Cared for by Steve Chervitz <sac@bioperl.org> and Jason Stajich <jason@bioperl.org>
8 8
 #
@@ -39,15 +39,15 @@ the Bioperl mailing list.  Your participation is much appreciated.
39 39
   bioperl-l@bioperl.org                  - General discussion
40 40
   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
41 41
 
42  
-=head2 Support 
  42
+=head2 Support
43 43
 
44 44
 Please direct usage questions or support issues to the mailing list:
45 45
 
46 46
 I<bioperl-l@bioperl.org>
47 47
 
48  
-rather than to the module maintainer directly. Many experienced and 
49  
-reponsive experts will be able look at the problem and quickly 
50  
-address it. Please include a thorough description of the problem 
  48
+rather than to the module maintainer directly. Many experienced and
  49
+reponsive experts will be able look at the problem and quickly
  50
+address it. Please include a thorough description of the problem
51 51
 with code and data examples if at all possible.
52 52
 
53 53
 =head2 Reporting Bugs
@@ -81,9 +81,6 @@ Internal methods are usually preceded with a _
81 81
 
82 82
 
83 83
 package Bio::SearchIO::IteratedSearchResultEventBuilder;
84  
-use vars qw(%KNOWNEVENTS             $DEFAULT_INCLUSION_THRESHOLD
85  
-            $MAX_HSP_OVERLAP
86  
-);
87 84
 
88 85
 use strict;
89 86
 
@@ -91,19 +88,11 @@ use Bio::Factory::ObjectFactory;
91 88
 
92 89
 use base qw(Bio::SearchIO::SearchResultEventBuilder);
93 90
 
94  
-# e-value threshold for inclusion in the PSI-BLAST score matrix model (blastpgp)
95  
-# NOTE: Executing `blastpgp -` incorrectly reports that the default is 0.005.
96  
-#       (version 2.2.2 [Jan-08-2002])
97  
-$DEFAULT_INCLUSION_THRESHOLD = 0.001;
98  
-
99  
-
100  
-$MAX_HSP_OVERLAP  = 2;  # Used when tiling multiple HSPs.
101  
-
102 91
 =head2 new
103 92
 
104 93
  Title   : new
105 94
  Usage   : my $obj = Bio::SearchIO::IteratedSearchResultEventBuilder->new();
106  
- Function: Builds a new Bio::SearchIO::IteratedSearchResultEventBuilder object 
  95
+ Function: Builds a new Bio::SearchIO::IteratedSearchResultEventBuilder object
107 96
  Returns : Bio::SearchIO::IteratedSearchResultEventBuilder
108 97
  Args    : -hsp_factory    => Bio::Factory::ObjectFactoryI
109 98
            -hit_factory    => Bio::Factory::ObjectFactoryI
@@ -120,67 +109,44 @@ $MAX_HSP_OVERLAP  = 2;  # Used when tiling multiple HSPs.
120 109
            -hit_filter  => reference to a function to be used for
121 110
                            filtering hits based on arbitrary criteria.
122 111
 
123  
-
124 112
 See L<Bio::SearchIO::SearchResultEventBuilder> for more information
125 113
 
126 114
 =cut
127 115
 
128  
-sub new { 
  116
+sub new {
129 117
     my ($class,@args) = @_;
130 118
     my $self = $class->SUPER::new(@args);
131  
-    my ($hitF, $resultF, $hspF, $iterationF) =
132  
-        $self->_rearrange([qw(
133  
-                              HIT_FACTORY
134  
-                              RESULT_FACTORY
135  
-			      HSP_FACTORY
  119
+    my ($resultF, $iterationF, $hitF,  $hspF) =
  120
+        $self->_rearrange([qw(RESULT_FACTORY
136 121
                               ITERATION_FACTORY
137  
-                             )],@args);
138  
-
  122
+                              HIT_FACTORY
  123
+                              HSP_FACTORY)],@args);
139 124
     $self->_init_parse_params(@args);
140 125
 
141 126
     # Note that we need to override the setting of result and factories here
142 127
     # so that we can set different default factories than are set by the super class.
143  
-    $self->register_factory('result', $resultF || 
  128
+    $self->register_factory('result', $resultF ||
144 129
                             Bio::Factory::ObjectFactory->new(
145  
-                                 -type      => 'Bio::Search::Result::BlastResult',
146  
-                                 -interface => 'Bio::Search::Result::ResultI'));
  130
+                                -type      => 'Bio::Search::Result::BlastResult',
  131
+                                -interface => 'Bio::Search::Result::ResultI'));
147 132
 
148  
-    $self->register_factory('hit', $hitF || 
  133
+    $self->register_factory('hit', $hitF ||
149 134
                             Bio::Factory::ObjectFactory->new(
150  
-                                 -type      => 'Bio::Search::Hit::BlastHit',
151  
-                                 -interface => 'Bio::Search::Hit::HitI'));
  135
+                                -type      => 'Bio::Search::Hit::BlastHit',
  136
+                                -interface => 'Bio::Search::Hit::HitI'));
152 137
 
153  
-    $self->register_factory('hsp', $hspF || 
  138
+    $self->register_factory('hsp', $hspF ||
154 139
                             Bio::Factory::ObjectFactory->new(
155  
-                                 -type      => 'Bio::Search::HSP::GenericHSP',
156  
-                                 -interface => 'Bio::Search::HSP::HSPI'));
  140
+                                -type      => 'Bio::Search::HSP::GenericHSP',
  141
+                                -interface => 'Bio::Search::HSP::HSPI'));
157 142
 
158 143
     # TODO: Change this to BlastIteration (maybe)
159  
-    $self->register_factory('iteration', $iterationF || 
  144
+    $self->register_factory('iteration', $iterationF ||
160 145
                             Bio::Factory::ObjectFactory->new(
161  
-                                 -type      => 'Bio::Search::Iteration::GenericIteration',
162  
-                                 -interface => 'Bio::Search::Iteration::IterationI'));
163  
-    return $self;
164  
-}
165  
-
166  
-
167  
-#Initializes parameters used during parsing of Blast reports.
168  
-sub _init_parse_params {
169  
-
170  
-    my ($self, @args) = @_;
171  
-    # -FILT_FUNC has been replaced by -HIT_FILTER.
172  
-    # Leaving -FILT_FUNC in place for backward compatibility
173  
-    my($ithresh, $signif, $score, $bits, $hit_filter, $filt_func) =
174  
-           $self->_rearrange([qw(INCLUSION_THRESHOLD
175  
-                                 SIGNIF SCORE BITS HIT_FILTER FILT_FUNC
176  
-                                )], @args);
  146
+                                -type      => 'Bio::Search::Iteration::GenericIteration',
  147
+                                -interface => 'Bio::Search::Iteration::IterationI'));
177 148
 
178  
-    $self->inclusion_threshold( defined($ithresh) ? $ithresh : $DEFAULT_INCLUSION_THRESHOLD);
179  
-    my $hit_filt = $hit_filter || $filt_func;
180  
-    defined $hit_filter && $self->hit_filter($hit_filt);
181  
-    defined $signif     && $self->max_significance($signif);
182  
-    defined $score      && $self->min_score($score);
183  
-    defined $bits       && $self->min_bits($bits);
  149
+    return $self;
184 150
 }
185 151
 
186 152
 =head2 will_handle
@@ -191,14 +157,13 @@ sub _init_parse_params {
191 157
  Returns : boolean
192 158
  Args    : event type name
193 159
 
194  
-
195 160
 =cut
196 161
 
197 162
 sub will_handle{
198 163
    my ($self,$type) = @_;
199 164
    # these are the events we recognize
200  
-   return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result' || $type eq 'iteration' ||
201  
-            $type eq 'newhits' || $type eq 'oldhits' );
  165
+   return (   $type eq 'hsp' || $type eq 'hit' || $type eq 'result'
  166
+           || $type eq 'iteration' || $type eq 'newhits' || $type eq 'oldhits' );
202 167
 }
203 168
 
204 169
 =head2 SAX methods
@@ -210,7 +175,7 @@ sub will_handle{
210 175
  Title   : start_result
211 176
  Usage   : $handler->start_result($resulttype)
212 177
  Function: Begins a result event cycle
213  
- Returns : none 
  178
+ Returns : none
214 179
  Args    : Type of Report
215 180
 
216 181
 =cut
@@ -226,75 +191,88 @@ sub start_result {
226 191
    return;
227 192
 }
228 193
 
229  
-=head2 end_result
  194
+=head2 start_iteration
230 195
 
231  
- Title   : end_result
232  
- Usage   : my @results = $parser->end_result
233  
- Function: Finishes a result handler cycle 
234  
- Returns : A Bio::Search::Result::ResultI
235  
- Args    : none
  196
+ Title   : start_iteration
  197
+ Usage   : $handler->start_iteration()
  198
+ Function: Starts an Iteration event cycle
  199
+ Returns : none
  200
+ Args    : type of event and associated hashref
236 201
 
237 202
 =cut
238 203
 
239  
-sub end_result {
240  
-    my ($self,$type,$data) = @_;
241  
-    #print STDERR "ISREB: end_result\n";
242  
-    ## How is runid getting set? Purpose?
243  
-    if( defined $data->{'runid'} &&
244  
-        $data->{'runid'} !~ /^\s+$/ ) {        
245  
-
246  
-        if( $data->{'runid'} !~ /^lcl\|/) { 
247  
-            $data->{"RESULT-query_name"}= $data->{'runid'};
248  
-        } else { 
249  
-            ($data->{"RESULT-query_name"},$data->{"RESULT-query_description"}) = 
250  
-                split(/\s+/,$data->{"RESULT-query_description"},2);
251  
-        }
252  
-        
253  
-        if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
254  
-            my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
255  
-            # this is for |123|gb|ABC1.1|
256  
-            $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
257  
-            $data->{"RESULT-query_accession"}= $acc;
258  
-        }
259  
-        delete $data->{'runid'};
260  
-    }
261  
-    my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); } 
262  
-               grep { /^RESULT/ } keys %{$data};
263  
-    
264  
-    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
265  
-                               $data->{'RESULT-algorithm_name'} || $type);
266  
-
267  
-    $args{'-iterations'} = $self->{'_iterations'};
268  
-
269  
-    my $result = $self->factory('result')->create_object(%args);
270  
-    $result->hit_factory($self->factory('hit'));
271  
-    $self->{'_iterations'} = [];
272  
-    return $result;
  204
+sub start_iteration {
  205
+    my ($self,$type) = @_;
  206
+
  207
+    #print STDERR "ISREB: start_iteration()\n";
  208
+    $self->{'_iteration_count'}++;
  209
+
  210
+    # Reset arrays for the various classes of hits.
  211
+#    $self->{'_newhits_unclassified'}     = [];
  212
+    $self->{'_newhits_below'}        = [];
  213
+    $self->{'_newhits_not_below'}    = [];
  214
+    $self->{'_oldhits_below'}        = [];
  215
+    $self->{'_oldhits_newly_below'}  = [];
  216
+    $self->{'_oldhits_not_below'}    = [];
  217
+    $self->{'_hitcount'} = 0;
  218
+    return;
273 219
 }
274 220
 
275 221
 
  222
+=head2 end_iteration
  223
+
  224
+ Title   : end_iteration
  225
+ Usage   : $handler->end_iteration()
  226
+ Function: Ends an Iteration event cycle
  227
+ Returns : Bio::Search::Iteration object
  228
+ Args    : type of event and associated hashref
  229
+
  230
+=cut
  231
+
  232
+sub end_iteration {
  233
+    my ($self,$type,$data) = @_;
  234
+
  235
+    # print STDERR "ISREB: end_iteration()\n";
  236
+
  237
+    my %args = map { my $v = $data->{$_}; s/ITERATION//; ($_ => $v); }
  238
+    grep { /^ITERATION/ } keys %{$data};
  239
+
  240
+    $args{'-number'} = $self->{'_iteration_count'};
  241
+    $args{'-oldhits_below'} = $self->{'_oldhits_below'};
  242
+    $args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
  243
+    $args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
  244
+    $args{'-newhits_below'} = $self->{'_newhits_below'};
  245
+    $args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
  246
+    $args{'-hit_factory'} = $self->factory('hit');
  247
+
  248
+    my $it = $self->factory('iteration')->create_object(%args);
  249
+    push @{$self->{'_iterations'}}, $it;
  250
+    return $it;
  251
+}
  252
+
276 253
 # Title   : _add_hit (private function for internal use only)
277 254
 # Purpose : Applies hit filtering and calls _store_hit if it passes filtering.
278  
-# Argument: Bio::Search::Hit::HitI object 
  255
+# Argument: Bio::Search::Hit::HitI object
279 256
 
280 257
 sub _add_hit {
281 258
     my ($self, $hit) = @_;
282  
-	
283  
-    my $hit_name = uc($hit->{-name});
  259
+
  260
+    my $hit_name   = uc($hit->{-name});
284 261
     my $hit_signif = $hit->{-significance};
285  
-    my $ithresh = $self->{'_inclusion_threshold'};
286  
-	
  262
+    my $ithresh    = $self->{'_inclusion_threshold'};
  263
+
287 264
     # Test significance using custom function (if supplied)
288 265
     my $add_hit = 1;
289  
-	
  266
+
290 267
     my $hit_filter = $self->{'_hit_filter'};
291  
-	
  268
+
292 269
     if($hit_filter) {
293 270
         # since &hit_filter is out of our control and would expect a HitI object,
294 271
         # we're forced to make one for it
295 272
         $hit = $self->factory('hit')->create_object(%{$hit});
296 273
         $add_hit = 0 unless &$hit_filter($hit);
297  
-    } else {
  274
+    }
  275
+    else {
298 276
         if($self->{'_confirm_significance'}) {
299 277
             $add_hit = 0 unless $hit_signif <= $self->{'_max_significance'};
300 278
         }
@@ -307,16 +285,16 @@ sub _add_hit {
307 285
             $add_hit = 0 unless $hit_bits >= $self->{'_min_bits'};
308 286
         }
309 287
     }
310  
-	
  288
+
311 289
     $add_hit && $self->_store_hit($hit, $hit_name, $hit_signif);
312  
-    # Building hit lookup hashes for determining if the hit is old/new and 
  290
+    # Building hit lookup hashes for determining if the hit is old/new and
313 291
     # above/below threshold.
314 292
     $self->{'_old_hit_names'}->{$hit_name}++;
315 293
     $self->{'_hit_names_below'}->{$hit_name}++ if $hit_signif <= $ithresh;
316 294
 }
317 295
 
318 296
 # Title   : _store_hit (private function for internal use only)
319  
-# Purpose : Collects hit objects into defined sets that are useful for 
  297
+# Purpose : Collects hit objects into defined sets that are useful for
320 298
 #           analyzing PSI-blast results.
321 299
 #           These are ultimately added to the iteration object in end_iteration().
322 300
 #
@@ -325,15 +303,15 @@ sub _add_hit {
325 303
 #   Secondary split = below vs. above threshold
326 304
 #   1. Has this hit occurred in a previous iteration?
327 305
 #   1.1. If yes, was it below threshold?
328  
-#   1.1.1. If yes, ---> [oldhits_below] 
  306
+#   1.1.1. If yes, ---> [oldhits_below]
329 307
 #   1.1.2. If no, is it now below threshold?
330  
-#   1.1.2.1. If yes, ---> [oldhits_newly_below] 
331  
-#   1.1.2.2. If no, ---> [oldhits_not_below] 
  308
+#   1.1.2.1. If yes, ---> [oldhits_newly_below]
  309
+#   1.1.2.2. If no, ---> [oldhits_not_below]
332 310
 #   1.2. If no, is it below threshold?
333  
-#   1.2.1. If yes, ---> [newhits_below] 
334  
-#   1.2.2. If no, ---> [newhits_not_below] 
335  
-#   1.2.3. If don't know (no inclusion threshold data), ---> [newhits_unclassified] 
336  
-#   Note: As long as there's a default inclusion threshold, 
  311
+#   1.2.1. If yes, ---> [newhits_below]
  312
+#   1.2.2. If no, ---> [newhits_not_below]
  313
+#   1.2.3. If don't know (no inclusion threshold data), ---> [newhits_unclassified]
  314
+#   Note: As long as there's a default inclusion threshold,
337 315
 #         there won't be an unclassified set.
338 316
 #
339 317
 # For the first iteration, it might be nice to detect non-PSI blast reports
@@ -342,18 +320,17 @@ sub _add_hit {
342 320
 # for non-PSI blast reports since they'll get flattened out in the
343 321
 # result and iteration search objects.
344 322
 
345  
-
346 323
 sub _store_hit {
347 324
     my ($self, $hit, $hit_name, $hit_signif) = @_;
348 325
 
349 326
     my $ithresh = $self->{'_inclusion_threshold'};
350  
-    
  327
+
351 328
     # This is the assumption leading to Bug 1986. The assumption here is that
352 329
     # the hit name is unique (and thus new), therefore any subsequent encounters
353 330
     # with a hit containing the same name are filed as old hits. This isn't
354 331
     # always true (see the bug report for a few examples). Adding an explicit
355 332
     # check for the presence of iterations, adding to new hits otherwise.
356  
-    
  333
+
357 334
     if (exists $self->{'_old_hit_names'}->{$hit_name}
358 335
         && scalar @{$self->{_iterations}}) {
359 336
         if (exists $self->{'_hit_names_below'}->{$hit_name}) {
@@ -373,210 +350,4 @@ sub _store_hit {
373 350
     $self->{'_hitcount'}++;
374 351
 }
375 352
 
376  
-=head2 start_iteration
377  
-
378  
- Title   : start_iteration
379  
- Usage   : $handler->start_iteration()
380  
- Function: Starts an Iteration event cycle
381  
- Returns : none
382  
- Args    : type of event and associated hashref
383  
-
384  
-=cut
385  
-
386  
-sub start_iteration {
387  
-    my ($self,$type) = @_;
388  
-
389  
-    #print STDERR "ISREB: start_iteration()\n";
390  
-    $self->{'_iteration_count'}++;
391  
-
392  
-    # Reset arrays for the various classes of hits.
393  
-#    $self->{'_newhits_unclassified'}     = [];
394  
-    $self->{'_newhits_below'}        = [];
395  
-    $self->{'_newhits_not_below'}    = [];
396  
-    $self->{'_oldhits_below'}        = [];
397  
-    $self->{'_oldhits_newly_below'}  = [];
398  
-    $self->{'_oldhits_not_below'}    = [];
399  
-    $self->{'_hitcount'} = 0;
400  
-    return;
401  
-}
402  
-
403  
-
404  
-=head2 end_iteration
405  
-
406  
- Title   : end_iteration
407  
- Usage   : $handler->end_iteration()
408  
- Function: Ends an Iteration event cycle
409  
- Returns : Bio::Search::Iteration object
410  
- Args    : type of event and associated hashref
411  
-
412  
-
413  
-=cut
414  
-
415  
-sub end_iteration {
416  
-    my ($self,$type,$data) = @_;   
417  
-
418  
-    # print STDERR "ISREB: end_iteration()\n";
419  
-
420  
-    my %args = map { my $v = $data->{$_}; s/ITERATION//; ($_ => $v); } 
421  
-    grep { /^ITERATION/ } keys %{$data};
422  
-
423  
-    $args{'-number'} = $self->{'_iteration_count'};
424  
-    $args{'-oldhits_below'} = $self->{'_oldhits_below'};
425  
-    $args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
426  
-    $args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
427  
-    $args{'-newhits_below'} = $self->{'_newhits_below'};
428  
-    $args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
429  
-    $args{'-hit_factory'} = $self->factory('hit');
430  
-
431  
-    my $it = $self->factory('iteration')->create_object(%args);
432  
-    push @{$self->{'_iterations'}}, $it;
433  
-    return $it;
434  
-}
435  
-
436  
-=head2 max_significance
437  
-
438  
- Usage     : $obj->max_significance();
439  
- Purpose   : Set/Get the P or Expect value used as significance screening cutoff.
440  
-             This is the value of the -signif parameter supplied to new().
441  
-             Hits with P or E-value above this are skipped.
442  
- Returns   : Scientific notation number with this format: 1.0e-05.
443  
- Argument  : Number (sci notation, float, integer) (when setting)
444  
- Throws    : Bio::Root::BadParameter exception if the supplied argument is
445  
-           : not a valid number.
446  
- Comments  : Screening of significant hits uses the data provided on the
447  
-           : description line. For NCBI BLAST1 and WU-BLAST, this data 
448  
-           : is P-value. for NCBI BLAST2 it is an Expect value.
449  
-
450  
-=cut
451  
-
452  
-sub max_significance {
453  
-    my $self = shift;
454  
-    if (@_) {
455  
-        my $sig = shift;
456  
-        if( $sig =~ /[^\d.e-]/ or $sig <= 0) {
457  
-            $self->throw(-class => 'Bio::Root::BadParameter',
458  
-                         -text => "Invalid significance value: $sig\n".
459  
-                         "Must be a number greater than zero.",
460  
-                         -value=>$sig);
461  
-        }
462  
-        $self->{'_confirm_significance'} = 1;
463  
-        $self->{'_max_significance'} = $sig;
464  
-    }
465  
-    sprintf "%.1e", $self->{'_max_significance'};
466  
-}
467  
-
468  
-
469  
-=head2 signif
470  
-
471  
-Synonym for L<max_significance()|max_significance>
472  
-
473  
-=cut
474  
-
475  
-sub signif { shift->max_significance }
476  
-
477  
-=head2 min_score
478  
-
479  
- Usage     : $obj->min_score();
480  
- Purpose   : Gets the Blast score used as screening cutoff.
481  
-             This is the value of the -score parameter supplied to new().
482  
-             Hits with scores below this are skipped.
483  
- Returns   : Integer (or undef if not set)
484  
- Argument  : Integer (when setting)
485  
- Throws    : Bio::Root::BadParameter exception if the supplied argument is
486  
-           : not a valid number.
487  
- Comments  : Screening of significant hits uses the data provided on the
488  
-           : description line. 
489  
-
490  
-=cut
491  
-
492  
-sub min_score {
493  
-    my $self = shift;
494  
-    if (@_) {
495  
-        my $score = shift;
496  
-        if( $score =~ /[^\de+]/ or $score <= 0) {
497  
-            $self->throw(-class => 'Bio::Root::BadParameter',
498  
-                         -text => "Invalid score value: $score\n".
499  
-                                  "Must be an integer greater than zero.",
500  
-                        -value => $score);
501  
-        }
502  
-        $self->{'_confirm_score'} = 1;
503  
-        $self->{'_min_score'} = $score;
504  
-    }
505  
-    return $self->{'_min_score'};
506  
-}
507  
-
508  
-
509  
-=head2 min_bits
510  
-
511  
- Usage     : $obj->min_bits();
512  
- Purpose   : Gets the Blast bit score used as screening cutoff.
513  
-             This is the value of the -bits parameter supplied to new().
514  
-             Hits with bits score below this are skipped.
515  
- Returns   : Integer (or undef if not set)
516  
- Argument  : Integer (when setting)
517  
- Throws    : Bio::Root::BadParameter exception if the supplied argument is
518  
-           : not a valid number.
519  
- Comments  : Screening of significant hits uses the data provided on the
520  
-           : description line. 
521  
-
522  
-=cut
523  
-
524  
-sub min_bits {
525  
-    my $self = shift;
526  
-    if (@_) {
527  
-        my $bits = shift;
528  
-        if( $bits =~ /[^\de+]/ or $bits <= 0) {
529  
-            $self->throw(-class => 'Bio::Root::BadParameter',
530  
-                         -text => "Invalid bits value: $bits\n".
531  
-                                  "Must be an integer greater than zero.",
532  
-                        -value => $bits);
533  
-        }
534  
-        $self->{'_confirm_bits'} = 1;
535  
-        $self->{'_min_bits'} = $bits;
536  
-    }
537  
-    return $self->{'_min_bits'};
538  
-}
539  
-
540  
-
541  
-=head2 hit_filter
542  
-
543  
- Usage     : $obj->hit_filter();
544  
- Purpose   : Set/Get a function reference used for filtering out hits.
545  
-             This is the value of the -hit_filter parameter supplied to new().
546  
-             Hits that fail to pass the filter are skipped.
547  
- Returns   : Function ref (or undef if not set)
548  
- Argument  : Function ref (when setting)
549  
- Throws    : Bio::Root::BadParameter exception if the supplied argument is
550  
-           : not a function reference.
551  
-
552  
-=cut
553  
-
554  
-sub hit_filter {
555  
-    my $self = shift;
556  
-    if (@_) {
557  
-        my $func = shift;
558  
-        if(not ref $func eq 'CODE') {
559  
-            $self->throw(-class=>'Bio::Root::BadParameter',
560  
-                         -text=>"Not a function reference: $func\n".
561  
-                                "The -hit_filter parameter must be function reference.",
562  
-                         -value=> $func);
563  
-        }
564  
-        $self->{'_hit_filter'} = $func;
565  
-    }
566  
-    return $self->{'_hit_filter'};
567  
-}
568  
-
569  
-=head2 inclusion_threshold
570  
-
571  
-See L<Bio::SearchIO::blast::inclusion_threshold>.
572  
-
573  
-=cut
574  
-
575  
-sub inclusion_threshold {
576  
-    my $self = shift;
577  
-    return $self->{'_inclusion_threshold'} = shift if @_;
578  
-    return $self->{'_inclusion_threshold'};
579  
-}
580  
-
581  
-
582 353
 1;
389  Bio/SearchIO/SearchResultEventBuilder.pm
... ...
@@ -1,7 +1,7 @@
1 1
 #
2 2
 # BioPerl module for Bio::SearchIO::SearchResultEventBuilder
3 3
 #
4  
-# Please direct questions and support issues to <bioperl-l@bioperl.org> 
  4
+# Please direct questions and support issues to <bioperl-l@bioperl.org>
5 5
 #
6 6
 # Cared for by Jason Stajich <jason@bioperl.org>
7 7
 #
@@ -36,15 +36,15 @@ the Bioperl mailing list.  Your participation is much appreciated.
36 36
   bioperl-l@bioperl.org                  - General discussion
37 37
   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
38 38
 
39  
-=head2 Support 
  39
+=head2 Support
40 40
 
41 41
 Please direct usage questions or support issues to the mailing list:
42 42
 
43 43
 I<bioperl-l@bioperl.org>
44 44
 
45  
-rather than to the module maintainer directly. Many experienced and 
46  
-reponsive experts will be able look at the problem and quickly 
47  
-address it. Please include a thorough description of the problem 
  45
+rather than to the module maintainer directly. Many experienced and
  46
+reponsive experts will be able look at the problem and quickly
  47
+address it. Please include a thorough description of the problem
48 48
 with code and data examples if at all possible.
49 49
 
50 50
 =head2 Reporting Bugs
@@ -75,52 +75,93 @@ Internal methods are usually preceded with a _
75 75
 
76 76
 
77 77
 package Bio::SearchIO::SearchResultEventBuilder;
78  
-use vars qw(%KNOWNEVENTS);
  78
+
79 79
 use strict;
80 80
 
81 81
 use Bio::Factory::ObjectFactory;
82 82
 
83 83
 use base qw(Bio::Root::Root Bio::SearchIO::EventHandlerI);
84 84
 
  85
+use vars qw($DEFAULT_INCLUSION_THRESHOLD
  86
+            $MAX_HSP_OVERLAP
  87
+);
  88
+
  89
+# e-value threshold for inclusion in the PSI-BLAST score matrix model (blastpgp)
  90
+# NOTE: Executing `blastpgp -` incorrectly reports that the default is 0.005.
  91
+#       (version 2.2.2 [Jan-08-2002])
  92
+$DEFAULT_INCLUSION_THRESHOLD = 0.001;
  93
+
  94
+$MAX_HSP_OVERLAP  = 2;  # Used when tiling multiple HSPs.
  95
+
85 96
 =head2 new
86 97
 
87 98
  Title   : new
88 99
  Usage   : my $obj = Bio::SearchIO::SearchResultEventBuilder->new();
89  
- Function: Builds a new Bio::SearchIO::SearchResultEventBuilder object 
  100
+ Function: Builds a new Bio::SearchIO::SearchResultEventBuilder object
90 101
  Returns : Bio::SearchIO::SearchResultEventBuilder
91 102
  Args    : -hsp_factory    => Bio::Factory::ObjectFactoryI
92 103
            -hit_factory    => Bio::Factory::ObjectFactoryI
93 104
            -result_factory => Bio::Factory::ObjectFactoryI
  105
+           -inclusion_threshold => e-value threshold for inclusion in the
  106
+                                   PSI-BLAST score matrix model (blastpgp)
  107
+           -signif      => float or scientific notation number to be used
  108
+                           as a P- or Expect value cutoff
  109
+           -score       => integer or scientific notation number to be used
  110
+                           as a blast score value cutoff
  111
+           -bits        => integer or scientific notation number to be used
  112
+                           as a bit score value cutoff
  113
+           -hit_filter  => reference to a function to be used for
  114
+                           filtering hits based on arbitrary criteria.
94 115
 
95 116
 See L<Bio::Factory::ObjectFactoryI> for more information
96 117
 
97 118
 =cut
98 119
 
99  
-sub new { 
  120
+sub new {
100 121
     my ($class,@args) = @_;
101 122
     my $self = $class->SUPER::new(@args);
102  
-    my ($hspF,$hitF,$resultF) = $self->_rearrange([qw(HSP_FACTORY
103  
-                                                      HIT_FACTORY
104  
-                                                      RESULT_FACTORY)],@args);
105  
-    $self->register_factory('hsp', $hspF || 
  123
+    my ($resultF, $hitF, $hspF) =
  124
+        $self->_rearrange([qw(RESULT_FACTORY
  125
+                              HIT_FACTORY
  126
+                              HSP_FACTORY)],@args);
  127
+    $self->_init_parse_params(@args);
  128
+
  129
+    $self->register_factory('result', $resultF ||
106 130
                             Bio::Factory::ObjectFactory->new(
107  
-                                     -type      => 'Bio::Search::HSP::GenericHSP',
108  
-                                     -interface => 'Bio::Search::HSP::HSPI'));
  131
+                                -type      => 'Bio::Search::Result::GenericResult',
  132
+                                -interface => 'Bio::Search::Result::ResultI'));
109 133
 
110 134
     $self->register_factory('hit', $hitF ||
111 135
                             Bio::Factory::ObjectFactory->new(
112  
-                                      -type      => 'Bio::Search::Hit::GenericHit',
113  
-                                      -interface => 'Bio::Search::Hit::HitI'));
  136
+                                -type      => 'Bio::Search::Hit::GenericHit',
  137
+                                -interface => 'Bio::Search::Hit::HitI'));
114 138
 
115  
-    $self->register_factory('result', $resultF ||
  139
+    $self->register_factory('hsp', $hspF ||
116 140
                             Bio::Factory::ObjectFactory->new(
117  
-                                      -type      => 'Bio::Search::Result::GenericResult',
118  
-                                      -interface => 'Bio::Search::Result::ResultI'));
  141
+                                -type      => 'Bio::Search::HSP::GenericHSP',
  142
+                                -interface => 'Bio::Search::HSP::HSPI'));
119 143
 
120 144
     return $self;
121 145
 }
122 146
 
123  
-# new comes from the superclass
  147
+# Initializes parameters used during parsing of reports.
  148
+sub _init_parse_params {
  149
+
  150
+    my ($self, @args) = @_;
  151
+    # -FILT_FUNC has been replaced by -HIT_FILTER.
  152
+    # Leaving -FILT_FUNC in place for backward compatibility
  153
+    my($ithresh, $signif, $score, $bits, $hit_filter, $filt_func) =
  154
+           $self->_rearrange([qw(INCLUSION_THRESHOLD SIGNIF SCORE BITS
  155
+                                 HIT_FILTER FILT_FUNC
  156
+                                )], @args);
  157
+
  158
+    $self->inclusion_threshold( defined($ithresh) ? $ithresh : $DEFAULT_INCLUSION_THRESHOLD);
  159
+    my $hit_filt = $hit_filter || $filt_func;
  160
+    defined $hit_filter && $self->hit_filter($hit_filt);
  161
+    defined $signif     && $self->max_significance($signif);
  162
+    defined $score      && $self->min_score($score);
  163
+    defined $bits       && $self->min_bits($bits);
  164
+}
124 165
 
125 166
 =head2 will_handle
126 167
 
@@ -130,7 +171,6 @@ sub new {
130 171
  Returns : boolean
131 172
  Args    : event type name
132 173
 
133  
-
134 174
 =cut
135 175
 
136 176
 sub will_handle{
@@ -148,7 +188,7 @@ sub will_handle{
148 188
  Title   : start_result
149 189
  Usage   : $handler->start_result($resulttype)
150 190
  Function: Begins a result event cycle
151  
- Returns : none 
  191
+ Returns : none
152 192
  Args    : Type of Report
153 193
 
154 194
 =cut
@@ -156,7 +196,7 @@ sub will_handle{
156 196
 sub start_result {
157 197
    my ($self,$type) = @_;
158 198
    $self->{'_resulttype'} = $type;
159  
-   $self->{'_hits'} = [];   
  199
+   $self->{'_hits'} = [];
160 200
    $self->{'_hsps'} = [];
161 201
    $self->{'_hitcount'} = 0;
162 202
    return;
@@ -166,7 +206,7 @@ sub start_result {
166 206
 
167 207
  Title   : end_result
168 208
  Usage   : my @results = $parser->end_result
169  
- Function: Finishes a result handler cycle 
  209
+ Function: Finishes a result handler cycle
170 210
  Returns : A Bio::Search::Result::ResultI
171 211
  Args    : none
172 212
 
@@ -176,19 +216,19 @@ sub start_result {
176 216
 # so keep that in mind when debugging
177 217
 
178 218
 sub end_result {
179  
-    my ($self,$type,$data) = @_;    
  219
+    my ($self,$type,$data) = @_;
180 220
 
181 221
     if( defined $data->{'runid'} &&
182  
-        $data->{'runid'} !~ /^\s+$/ ) {        
  222
+        $data->{'runid'} !~ /^\s+$/ ) {
183 223
 
184  
-        if( $data->{'runid'} !~ /^lcl\|/) { 
185  
-            $data->{"RESULT-query_name"}= $data->{'runid'};
186  
-        } else { 
  224
+        if( $data->{'runid'} !~ /^lcl\|/) {
  225
+            $data->{"RESULT-query_name"} = $data->{'runid'};
  226
+        } else {
187 227
             ($data->{"RESULT-query_name"},
188  
-	     $data->{"RESULT-query_description"}) = 
189  
-		 split(/\s+/,$data->{"RESULT-query_description"},2);
  228
+             $data->{"RESULT-query_description"}) =
  229
+                split(/\s+/,$data->{"RESULT-query_description"},2);
190 230
         }
191  
-        
  231
+
192 232
         if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
193 233
             my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
194 234
             # this is for |123|gb|ABC1.1|
@@ -197,15 +237,23 @@ sub end_result {
197 237
         }
198 238
         delete $data->{'runid'};
199 239
     }
200  
-    my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); } 
  240
+    my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); }
201 241
                grep { /^RESULT/ } keys %{$data};
202  
-    
203  
-    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
204  
-                               $data->{'RESULT-algorithm_name'} || $type);
205  
-    $args{'-hits'}      =  $self->{'_hits'};
  242
+
  243
+    $args{'-algorithm'} =  uc(   $args{'-algorithm_name'}
  244
+                              || $data->{'RESULT-algorithm_name'}
  245
+                              || $type);
  246
+    ($self->isa('Bio::SearchIO::IteratedSearchResultEventBuilder')) ?
  247
+          ( $args{'-iterations'} = $self->{'_iterations'} )
  248
+        : ( $args{'-hits'}       = $self->{'_hits'} );
  249
+
206 250
     my $result = $self->factory('result')->create_object(%args);
207 251
     $result->hit_factory($self->factory('hit'));
208  
-    $self->{'_hits'} = [];
  252
+
  253
+    ($self->isa('Bio::SearchIO::IteratedSearchResultEventBuilder')) ?
  254
+          ( $self->{'_iterations'} = [] )
  255
+        : ( $self->{'_hits'}       = [] );
  256
+
209 257
     return $result;
210 258
 }
211 259
 
@@ -215,7 +263,7 @@ sub end_result {
215 263
  Usage   : $handler->start_hsp($name,$data)
216 264
  Function: Begins processing a HSP event
217 265
  Returns : none
218  
- Args    : type of element 
  266
+ Args    : type of element
219 267
            associated data (hashref)
220 268
 
221 269
 =cut
@@ -240,16 +288,16 @@ sub end_hsp {
240 288
     my ($self,$type,$data) = @_;
241 289
 
242 290
     if( defined $data->{'runid'} &&
243  
-        $data->{'runid'} !~ /^\s+$/ ) {        
  291
+        $data->{'runid'} !~ /^\s+$/ ) {
244 292
 
245  
-        if( $data->{'runid'} !~ /^lcl\|/) { 
  293
+        if( $data->{'runid'} !~ /^lcl\|/) {
246 294
             $data->{"RESULT-query_name"}= $data->{'runid'};
247  
-        } else { 
  295
+        } else {
248 296
             ($data->{"RESULT-query_name"},
249  
-	     $data->{"RESULT-query_description"}) = 
250  
-		 split(/\s+/,$data->{"RESULT-query_description"},2);
  297
+             $data->{"RESULT-query_description"}) =
  298
+                 split(/\s+/,$data->{"RESULT-query_description"},2);
251 299
         }
252  
-        
  300
+
253 301
         if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
254 302
             my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
255 303
             # this is for |123|gb|ABC1.1|
@@ -262,27 +310,27 @@ sub end_hsp {
262 310
     # this code is to deal with the fact that Blast XML data
263 311
     # always has start < end and one has to infer strandedness
264 312
     # from the frame which is a problem for the Search::HSP object
265  
-    # which expect to be able to infer strand from the order of 
  313
+    # which expect to be able to infer strand from the order of
266 314
     # of the begin/end of the query and hit coordinates
267 315
     if( defined $data->{'HSP-query_frame'} && # this is here to protect from undefs
268  
-        (( $data->{'HSP-query_frame'} < 0 && 
269  
-           $data->{'HSP-query_start'} < $data->{'HSP-query_end'} ) ||       
270  
-         $data->{'HSP-query_frame'} > 0 && 
271  
-         ( $data->{'HSP-query_start'} > $data->{'HSP-query_end'} ) ) 
  316
+        (( $data->{'HSP-query_frame'} < 0 &&
  317
+           $data->{'HSP-query_start'} < $data->{'HSP-query_end'} ) ||
  318
+         $data->{'HSP-query_frame'} > 0 &&
  319
+         ( $data->{'HSP-query_start'} > $data->{'HSP-query_end'} ) )
272 320
         )
273  
-    { 
  321
+    {
274 322
         # swap
275 323
         ($data->{'HSP-query_start'},
276 324
          $data->{'HSP-query_end'}) = ($data->{'HSP-query_end'},
277 325
                                       $data->{'HSP-query_start'});
278  
-    } 
  326
+    }
279 327
     if( defined $data->{'HSP-hit_frame'} && # this is here to protect from undefs
280  
-        ((defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} < 0 && 
281  
-          $data->{'HSP-hit_start'} < $data->{'HSP-hit_end'} ) ||       
282  
-         defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} > 0 && 
  328
+        ((defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} < 0 &&
  329
+          $data->{'HSP-hit_start'} < $data->{'HSP-hit_end'} ) ||
  330
+         defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} > 0 &&
283 331
          ( $data->{'HSP-hit_start'} > $data->{'HSP-hit_end'} ) )
284  
-        ) 
285  
-    { 
  332
+        )
  333
+    {
286 334
         # swap
287 335
         ($data->{'HSP-hit_start'},
288 336
          $data->{'HSP-hit_end'}) = ($data->{'HSP-hit_end'},
@@ -295,49 +343,48 @@ sub end_hsp {
295 343
     $data->{'HSP-hit_length'}   ||= $data->{'HIT-length'};
296 344
     # If undefined lengths, use alignment length deducting the gaps
297 345
     if (not defined $data->{'HSP-query_length'}) {
298  
-	if (defined $data->{'HSP-query_seq'}) {
299  
-	    my $hsp_qry_gaps = ($data->{'RESULT-algorithm'} eq 'ERPIN') ? scalar( $data->{'HSP-query_seq'} =~ tr/\-//)
300  
-			     :  scalar( $data->{'HSP-query_seq'} =~ tr/\-\.// ); # HMMER3 and Infernal uses '.' and '-'
301  
-	    $data->{'HSP-query_length'} = length ($data->{'HSP-query_seq'}) - $hsp_qry_gaps;
302  
-	}
303  
-	else {
304  
-	    $data->{'HSP-query_length'} = 0;
305  
-	}
  346
+        if (defined $data->{'HSP-query_seq'}) {
  347
+            my $hsp_qry_gaps = ($data->{'RESULT-algorithm'} eq 'ERPIN') ? scalar( $data->{'HSP-query_seq'} =~ tr/\-//)
  348
+                             :  scalar( $data->{'HSP-query_seq'} =~ tr/\-\.// ); # HMMER3 and Infernal uses '.' and '-'
  349
+            $data->{'HSP-query_length'} = length ($data->{'HSP-query_seq'}) - $hsp_qry_gaps;
  350
+        }
  351
+        else {
  352
+            $data->{'HSP-query_length'} = 0;
  353
+        }
306 354
     }
307 355
     if (not defined $data->{'HSP-hit_length'}) {
308  
-	if (defined $data->{'HSP-hit_seq'}) {
309  
-	    my $hsp_hit_gaps = ($data->{'RESULT-algorithm'} eq 'ERPIN') ? scalar( $data->{'HSP-hit_seq'} =~ tr/\-//)
310  
-			     :  scalar( $data->{'HSP-hit_seq'} =~ tr/\-\.// ); # HMMER3 and Infernal uses '.' and '-'
311  
-	    $data->{'HSP-hit_length'} = length ($data->{'HSP-query_seq'}) - $hsp_hit_gaps;
312  
-	}
313  
-	else {
314  
-	    $data->{'HSP-hit_length'} = 0;
315  
-	}
  356
+        if (defined $data->{'HSP-hit_seq'}) {
  357
+            my $hsp_hit_gaps = ($data->{'RESULT-algorithm'} eq 'ERPIN') ? scalar( $data->{'HSP-hit_seq'} =~ tr/\-//)
  358
+                             :  scalar( $data->{'HSP-hit_seq'} =~ tr/\-\.// ); # HMMER3 and Infernal uses '.' and '-'
  359
+            $data->{'HSP-hit_length'} = length ($data->{'HSP-query_seq'}) - $hsp_hit_gaps;
  360
+        }
  361
+        else {
  362
+            $data->{'HSP-hit_length'} = 0;
  363
+        }
316 364
     }
317 365
     $data->{'HSP-hsp_length'}   ||= length ($data->{'HSP-homology_seq'} || '');
318  
-    
319  
-    my %args = map { my $v = $data->{$_}; s/HSP//; ($_ => $v) } 
  366
+
  367
+    my %args = map { my $v = $data->{$_}; s/HSP//; ($_ => $v) }
320 368
                grep { /^HSP/ } keys %{$data};
321  
-    
322  
-    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
  369
+
  370
+    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} ||
323 371
                                $data->{'RESULT-algorithm_name'} || $type);
324 372
     # copy this over from result
325 373
     $args{'-query_name'} = $data->{'RESULT-query_name'};
326 374
     $args{'-hit_name'} = $data->{'HIT-name'};
327 375
     my ($rank) = scalar @{$self->{'_hsps'} || []} + 1;
328 376
     $args{'-rank'} = $rank;
329  
-    
  377
+
330 378
     $args{'-hit_desc'} = $data->{'HIT-description'};
331 379
     $args{'-query_desc'} = $data->{'RESULT-query_description'};
332  
-    
  380
+
333 381
     my $bits = $args{'-bits'};
334 382
     my $hsp = \%args;
335 383
     push @{$self->{'_hsps'}}, $hsp;
336  
-    
  384
+
337 385
     return $hsp;
338 386
 }
339 387
 
340  
-
341 388
 =head2 start_hit
342 389
 
343 390
  Title   : start_hit
@@ -346,7 +393,6 @@ sub end_hsp {
346 393
  Returns : none
347 394
  Args    : type of event and associated hashref
348 395
 
349  
-
350 396
 =cut
351 397
 
352 398
 sub start_hit{
@@ -355,7 +401,6 @@ sub start_hit{
355 401
     return;
356 402
 }
357 403
 
358  
-
359 404
 =head2 end_hit
360 405
 
361 406
  Title   : end_hit
@@ -364,7 +409,6 @@ sub start_hit{
364 409
  Returns : Bio::Search::Hit::HitI object
365 410
  Args    : type of event and associated hashref
366 411
 
367  
-
368 412
 =cut
369 413
 
370 414
 sub end_hit{
@@ -384,13 +428,13 @@ sub end_hit{
384 428
     if(exists $args{'-name'} && $args{'-name'} =~ /BL_ORD_ID/ ) {
385 429
         ($args{'-name'}, $args{'-description'}) = split(/\s+/,$args{'-description'},2);
386 430
     }
387  
-    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
  431
+    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} ||
388 432
                                $data->{'RESULT-algorithm_name'} || $type);
389 433
     $args{'-hsps'}      = $self->{'_hsps'};
390 434
     $args{'-query_len'} =  $data->{'RESULT-query_length'};
391 435
     $args{'-rank'}      = $self->{'_hitcount'} + 1;
392 436
     unless( defined $args{'-significance'} ) {
393  
-        if( defined $args{'-hsps'} && 
  437
+        if( defined $args{'-hsps'} &&
394 438
             $args{'-hsps'}->[0] ) {
395 439
             # use pvalue if present (WU-BLAST), otherwise evalue (NCBI BLAST)
396 440
             $args{'-significance'} = $args{'-hsps'}->[0]->{'-pvalue'} || $args{'-hsps'}->[0]->{'-evalue'};
@@ -403,10 +447,39 @@ sub end_hit{
403 447
     return $hit;
404 448
 }
405 449
 
406  
-# TODO: Optionally impose hit filtering here
  450
+# Title   : _add_hit (private function for internal use only)
  451
+# Purpose : Applies hit filtering and store it if it passes filtering.
  452
+# Argument: Bio::Search::Hit::HitI object
  453
+
407 454
 sub _add_hit {
408 455
     my ($self, $hit) = @_;
409  
-    push @{$self->{'_hits'}}, $hit;
  456
+    my $hit_signif   = $hit->{-significance};
  457
+
  458
+    # Test significance using custom function (if supplied)
  459
+    my $add_hit = 1;
  460
+
  461
+    my $hit_filter = $self->{'_hit_filter'};
  462
+    if($hit_filter) {
  463
+        # since &hit_filter is out of our control and would expect a HitI object,
  464
+        # we're forced to make one for it
  465
+        $hit     = $self->factory('hit')->create_object(%{$hit});
  466
+        $add_hit = 0 unless &$hit_filter($hit);
  467
+    }
  468
+    else {
  469
+        if($self->{'_confirm_significance'}) {
  470
+            $add_hit = 0 unless $hit_signif <= $self->{'_max_significance'};
  471
+        }
  472
+        if($self->{'_confirm_score'}) {
  473
+            my $hit_score = $hit->{-score} || $hit->{-hsps}->[0]->{-score};
  474
+            $add_hit = 0 unless $hit_score >= $self->{'_min_score'};
  475
+        }
  476
+        if($self->{'_confirm_bits'}) {
  477
+            my $hit_bits = $hit->{-bits} || $hit->{-hsps}->[0]->{-bits};
  478
+            $add_hit = 0 unless $hit_bits >= $self->{'_min_bits'};
  479
+        }
  480
+    }
  481
+
  482
+    $add_hit && push @{$self->{'_hits'}}, $hit;;
410 483
     $self->{'_hitcount'} = scalar @{$self->{'_hits'}};
411 484
 }
412 485
 
@@ -429,20 +502,19 @@ See L<Bio::Factory::ObjectFactoryI> for more information
429 502
 
430 503
 sub register_factory{
431 504
    my ($self, $type,$f) = @_;
432  
-   if( ! defined $f || ! ref($f) || 
433  
-       ! $f->isa('Bio::Factory::ObjectFactoryI') ) { 
  505
+   if( ! defined $f || ! ref($f) ||
  506
+       ! $f->isa('Bio::Factory::ObjectFactoryI') ) {
434 507
        $self->throw("Cannot set factory to value $f".ref($f)."\n");
435 508
    }
436 509
    $self->{'_factories'}->{lc($type)} = $f;
437 510
 }
438 511
 
439  
-
440 512
 =head2 factory
441 513
 
442 514
  Title   : factory
443 515
  Usage   : my $f = $handler->factory('TYPE');
444 516
  Function: Retrieves the associated factory for requested 'TYPE'
445  
- Returns : a Bio::Factory::ObjectFactoryI 
  517
+ Returns : a Bio::Factory::ObjectFactoryI
446 518
  Throws  : Bio::Root::BadParameter if none registered for the supplied type
447 519
  Args    : name of factory class to retrieve
448 520
 
@@ -452,7 +524,7 @@ See L<Bio::Factory::ObjectFactoryI> for more information
452 524
 
453 525
 sub factory{
454 526
    my ($self,$type) = @_;
455  
-   return $self->{'_factories'}->{lc($type)} ||