Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

removing searchio-blastxml files

  • Loading branch information...
commit f56ef893a991d2f5b49af9eb1cb92cee144a85be 1 parent b7d042d
pcantalupo authored February 17, 2013 Chris Fields committed February 20, 2013
315  Bio/SearchIO/XML/BlastHandler.pm
... ...
@@ -1,315 +0,0 @@
1  
-#
2  
-# BioPerl module for Bio::SearchIO::XML::BlastHandler
3  
-#
4  
-# Please direct questions and support issues to <bioperl-l@bioperl.org> 
5  
-#
6  
-# Cared for by Jason Stajich, Chris Fields
7  
-#
8  
-# Copyright Jason Stajich
9  
-#
10  
-# You may distribute this module under the same terms as perl itself
11  
-
12  
-# POD documentation - main docs before the code
13  
-
14  
-=head1 NAME
15  
-
16  
-Bio::SearchIO::XML::BlastHandler - XML Handler for NCBI Blast XML parsing.
17  
-
18  
-=head1 SYNOPSIS
19  
-
20  
-  # This is not to be used directly.
21  
-
22  
-=head1 DESCRIPTION
23  
-
24  
-This is the XML handler for BLAST XML parsing. Currently it passes elements off
25  
-to the event handler, which is ultimately responsible for Bio::Search object
26  
-generation.
27  
-
28  
-This was recently split off from the original code for Bio::SearchIO::blastxml
29  
-primarily for maintenance purposes.
30  
-
31  
-=head1 DEPENDENCIES
32  
-
33  
-In addition to parts of the Bio:: hierarchy, this module uses:
34  
-
35  
- XML::SAX::Base
36  
-
37  
-which comes with the XML::SAX distribution.
38  
-
39  
-=head1 FEEDBACK
40  
-
41  
-=head2 Mailing Lists
42  
-
43  
-User feedback is an integral part of the evolution of this and other
44  
-Bioperl modules. Send your comments and suggestions preferably to
45  
-the Bioperl mailing list.  Your participation is much appreciated.
46  
-
47  
-  bioperl-l@bioperl.org                  - General discussion
48  
-  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
49  
-
50  
-=head2 Support 
51  
-
52  
-Please direct usage questions or support issues to the mailing list:
53  
-
54  
-I<bioperl-l@bioperl.org>
55  
-
56  
-rather than to the module maintainer directly. Many experienced and 
57  
-reponsive experts will be able look at the problem and quickly 
58  
-address it. Please include a thorough description of the problem 
59  
-with code and data examples if at all possible.
60  
-
61  
-=head2 Reporting Bugs
62  
-
63  
-Report bugs to the Bioperl bug tracking system to help us keep track
64  
-of the bugs and their resolution. Bug reports can be submitted via the
65  
-web:
66  
-
67  
-  https://redmine.open-bio.org/projects/bioperl/
68  
-
69  
-=head1 AUTHOR - Jason Stajich, Chris Fields
70  
-
71  
-Email jason-at-bioperl.org
72  
-Email cjfields-at-uiuc dot edu
73  
-
74  
-=head1 APPENDIX
75  
-
76  
-The rest of the documentation details each of the object methods.
77  
-Internal methods are usually preceded with a _
78  
-
79  
-=cut
80  
-
81  
-# Let the code begin...
82  
-package Bio::SearchIO::XML::BlastHandler;
83  
-use base qw(Bio::Root::Root XML::SAX::Base);
84  
-
85  
-my %MODEMAP = (
86  
-                'Iteration'   => 'result',
87  
-                'Hit'         => 'hit',
88  
-                'Hsp'         => 'hsp'
89  
-);
90  
-
91  
-# major post 2.2.12 BLAST XML changes
92  
-# 1) moved XML Handler to it's own class
93  
-# 2) reconfigure blastxml to deal with old and new BLAST XML output
94  
-
95  
-my %MAPPING = (
96  
-                # Result-specific fields
97  
-                'BlastOutput_program'   => 'RESULT-algorithm_name',
98  
-                'BlastOutput_version'   => 'RESULT-algorithm_version',
99  
-                'BlastOutput_db'        => 'RESULT-database_name',
100  
-                'BlastOutput_reference' => 'RESULT-program_reference',
101  
-                'BlastOutput_query-def' => 'RESULT-query_description',
102  
-                'BlastOutput_query-len' => 'RESULT-query_length',
103  
-                'BlastOutput_query-ID'  => 'runid',                
104  
-                'Parameters_matrix'     => { 'RESULT-parameters' => 'matrix'},
105  
-                'Parameters_expect'     => { 'RESULT-parameters' => 'expect'},
106  
-                'Parameters_include'    => { 'RESULT-parameters' => 'include'},
107  
-                'Parameters_sc-match'   => { 'RESULT-parameters' => 'match'},
108  
-                'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
109  
-                'Parameters_gap-open'   => { 'RESULT-parameters' => 'gapopen'},
110  
-                'Parameters_gap-extend' => { 'RESULT-parameters' => 'gapext'},
111  
-                'Parameters_filter'     => {'RESULT-parameters' => 'filter'},
112  
-                'Statistics_db-num'     => 'RESULT-database_entries',
113  
-                'Statistics_db-len'     => 'RESULT-database_letters',
114  
-                'Statistics_hsp-len'    => { 'RESULT-statistics' => 'hsplength'},
115  
-                'Statistics_eff-space'  => { 'RESULT-statistics' => 'effectivespace'},
116  
-                'Statistics_kappa'      => { 'RESULT-statistics' => 'kappa' },
117  
-                'Statistics_lambda'     => { 'RESULT-statistics' => 'lambda' },
118  
-                'Statistics_entropy'    => { 'RESULT-statistics' => 'entropy'},
119  
-                
120  
-                # HSP specific fields
121  
-                'Hsp_bit-score'  => 'HSP-bits',
122  
-                'Hsp_score'      => 'HSP-score',
123  
-                'Hsp_evalue'     => 'HSP-evalue',
124  
-                'Hsp_query-from' => 'HSP-query_start',
125  
-                'Hsp_query-to'   => 'HSP-query_end',
126  
-                'Hsp_hit-from'   => 'HSP-hit_start',
127  
-                'Hsp_hit-to'     => 'HSP-hit_end',
128  
-                'Hsp_positive'   => 'HSP-conserved',
129  
-                'Hsp_identity'   => 'HSP-identical',
130  
-                'Hsp_gaps'       => 'HSP-gaps',
131  
-                'Hsp_hitgaps'    => 'HSP-hit_gaps',
132  
-                'Hsp_querygaps'  => 'HSP-query_gaps',
133  
-                'Hsp_qseq'       => 'HSP-query_seq',
134  
-                'Hsp_hseq'       => 'HSP-hit_seq',
135  
-                'Hsp_midline'    => 'HSP-homology_seq',
136  
-                'Hsp_align-len'  => 'HSP-hsp_length',
137  
-                'Hsp_query-frame'=> 'HSP-query_frame',
138  
-                'Hsp_hit-frame'  => 'HSP-hit_frame',
139  
-
140  
-                # Hit specific fields
141  
-                'Hit_id'               => 'HIT-name',
142  
-                'Hit_len'              => 'HIT-length',
143  
-                'Hit_accession'        => 'HIT-accession',
144  
-                'Hit_def'              => 'HIT-description',
145  
-                'Hit_num'              => 'HIT-order',
146  
-                'Iteration_iter-num'   => 'HIT-iteration',
147  
-                'Iteration_stat'       => 'HIT-iteration_statistic',
148  
-                
149  
-                # if these tags are present, they will overwrite the
150  
-                # above with more current data (i.e. multiquery hits)
151  
-                'Iteration_query-def'   => 'RESULT-query_description',
152  
-                'Iteration_query-len'   => 'RESULT-query_length',       
153  
-                'Iteration_query-ID'    => 'runid',
154  
-               );
155  
-
156  
-# these XML tags are ignored for now
157  
-my %IGNOREDTAGS = (
158  
-                'Hsp_num'              => 1,#'HSP-order',
159  
-                'Hsp_pattern-from'     => 1,#'patternend',
160  
-                'Hsp_pattern-to'       => 1,#'patternstart',
161  
-                'Hsp_density'          => 1,#'hspdensity',
162  
-                'Iteration_message'    => 1,
163  
-                'Hit_hsps'             => 1,
164  
-                'BlastOutput_param'    => 1,
165  
-                'Iteration_hits'       => 1,
166  
-                'Statistics'           => 1,
167  
-                'Parameters'           => 1,
168  
-                'BlastOutput'          => 1,
169  
-                'BlastOutput_iterations' => 1,     
170  
-                   );
171  
-
172  
-=head2 SAX methods
173  
-
174  
-=cut
175  
-
176  
-=head2 start_document
177  
-
178  
- Title   : start_document
179  
- Usage   : $parser->start_document;
180  
- Function: SAX method to indicate starting to parse a new document
181  
- Returns : none
182  
- Args    : none
183  
-
184  
-=cut
185  
-
186  
-sub start_document{
187  
-    my ($self) = @_;
188  
-    $self->{'_lasttype'} = '';
189  
-    $self->{'_values'} = {};
190  
-    $self->{'_result'}= [];
191  
-}
192  
-
193  
-=head2 end_document
194  
-
195  
- Title   : end_document
196  
- Usage   : $parser->end_document;
197  
- Function: SAX method to indicate finishing parsing a new document
198  
- Returns : Bio::Search::Result::ResultI object
199  
- Args    : none
200  
-
201  
-=cut
202  
-
203  
-sub end_document{
204  
-   my ($self,@args) = @_;
205  
-   
206  
-   # reset data carried throughout parse
207  
-   $self->{'_resultdata'} = undef;
208  
-   
209  
-   # pass back ref to results queue; caller must reset handler results queue
210  
-   return $self->{'_result'};
211  
-}
212  
-
213  
-=head2 start_element
214  
-
215  
- Title   : start_element
216  
- Usage   : $parser->start_element($data)
217  
- Function: SAX method to indicate starting a new element
218  
- Returns : none
219  
- Args    : hash ref for data
220  
-
221  
-=cut
222  
-
223  
-sub start_element{
224  
-    my ($self,$data) = @_;
225  
-    # we currently don't care about attributes
226  
-    my $nm = $data->{'Name'};
227  
-
228  
-    if( my $type = $MODEMAP{$nm} ) {
229  
-        if( $self->eventHandler->will_handle($type) ) {
230  
-            my $func = sprintf("start_%s",lc $type);
231  
-            $self->eventHandler->$func($data->{'Attributes'});
232  
-        }                                                    
233  
-    }
234  
-}
235  
-
236  
-=head2 end_element
237  
-
238  
- Title   : end_element
239  
- Usage   : $parser->end_element($data)
240  
- Function: Signals finishing an element
241  
- Returns : Bio::Search object dpending on what type of element
242  
- Args    : hash ref for data
243  
-
244  
-=cut
245  
-
246  
-sub end_element{
247  
-    my ($self,$data) = @_;
248  
-
249  
-    my $nm = $data->{'Name'};
250  
-    my $rc;
251  
-    if($nm eq 'BlastOutput_program' &&
252  
-       $self->{'_last_data'} =~ /(t?blast[npx])/i ) {
253  
-        $self->{'_type'} = uc $1; 
254  
-    }
255  
-    if ($nm eq 'Iteration') {
256  
-        map {
257  
-            $self->{'_values'}->{$_} = $self->{'_resultdata'}->{$_};
258  
-            } keys %{ $self->{'_resultdata'} };
259  
-    }
260  
-    if( my $type = $MODEMAP{$nm} ) {
261  
-        if( $self->eventHandler->will_handle($type) ) {
262  
-            my $func = sprintf("end_%s",lc $type);
263  
-            $rc = $self->eventHandler->$func($self->{'_type'},
264  
-                                              $self->{'_values'});
265  
-        }
266  
-    }
267  
-    elsif( exists $MAPPING{$nm} ) { 
268  
-        if ( ref($MAPPING{$nm}) =~ /hash/i ) {
269  
-            my $key = (keys %{$MAPPING{$nm}})[0];
270  
-            $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
271  
-        } else {
272  
-            $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
273  
-        }
274  
-    }
275  
-    elsif( exists $IGNOREDTAGS{$nm} ){
276  
-        # ignores these elements for now
277  
-    }
278  
-    else {      
279  
-        $self->debug("ignoring unrecognized element type $nm\n");
280  
-    }
281  
-    $self->{'_last_data'} = ''; # remove read data if we are at 
282  
-                                # end of an element
283  
-                                
284  
-    # add to ResultI array
285  
-    $self->{'_result'} = $rc if( $nm eq 'Iteration' );
286  
-    # reset values for each Result round
287  
-    if ($nm eq 'Iteration') {
288  
-        $self->{'_values'} = {};
289  
-    }
290  
-}
291  
-
292  
-=head2 characters
293  
-
294  
- Title   : characters
295  
- Usage   : $parser->characters($data)
296  
- Function: Signals new characters to be processed
297  
- Returns : characters read
298  
- Args    : hash ref with the key 'Data'
299  
-
300  
-
301  
-=cut
302  
-
303  
-sub characters{
304  
-   my ($self,$data) = @_;
305  
-   return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
306  
-   $self->{'_last_data'} .= $data->{'Data'};
307  
-}
308  
-
309  
-sub eventHandler {
310  
-    my $self = shift;
311  
-    return $self->{'_handler'} = shift if @_;
312  
-    return $self->{'_handler'};
313  
-}
314  
-
315  
-1;
312  Bio/SearchIO/XML/PsiBlastHandler.pm
... ...
@@ -1,312 +0,0 @@
1  
-#
2  
-# BioPerl module for Bio::SearchIO::XML::PsiBlastHandler
3  
-#
4  
-# Please direct questions and support issues to <bioperl-l@bioperl.org> 
5  
-#
6  
-# Cared for by Jason Stajich, Chris Fields
7  
-#
8  
-# Copyright Chris Fields
9  
-#
10  
-# You may distribute this module under the same terms as perl itself
11  
-
12  
-# POD documentation - main docs before the code
13  
-
14  
-=head1 NAME
15  
-
16  
-Bio::SearchIO::XML::PsiBlastHandler - XML Handler for NCBI Blast PSIBLAST XML parsing.
17  
-
18  
-=head1 SYNOPSIS
19  
-
20  
-  # This is not to be used directly.
21  
-
22  
-=head1 DESCRIPTION
23  
-
24  
-This is the XML handler for BLAST PSIBLAST XML parsing. Currently it passes
25  
-elements off to the event handler, which is ultimately responsible for
26  
-Bio::Search object generation.
27  
-
28  
-This was recently split off from the original code for Bio::SearchIO::blastxml
29  
-primarily for maintenance purposes.
30  
-
31  
-=head1 DEPENDENCIES
32  
-
33  
-In addition to parts of the Bio:: hierarchy, this module uses:
34  
-
35  
- XML::SAX::Base
36  
-
37  
-which comes with the XML::SAX distribution.
38  
-
39  
-=head1 FEEDBACK
40  
-
41  
-=head2 Mailing Lists
42  
-
43  
-User feedback is an integral part of the evolution of this and other
44  
-Bioperl modules. Send your comments and suggestions preferably to
45  
-the Bioperl mailing list.  Your participation is much appreciated.
46  
-
47  
-  bioperl-l@bioperl.org                  - General discussion
48  
-  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
49  
-
50  
-=head2 Support 
51  
-
52  
-Please direct usage questions or support issues to the mailing list:
53  
-
54  
-I<bioperl-l@bioperl.org>
55  
-
56  
-rather than to the module maintainer directly. Many experienced and 
57  
-reponsive experts will be able look at the problem and quickly 
58  
-address it. Please include a thorough description of the problem 
59  
-with code and data examples if at all possible.
60  
-
61  
-=head2 Reporting Bugs
62  
-
63  
-Report bugs to the Bioperl bug tracking system to help us keep track
64  
-of the bugs and their resolution. Bug reports can be submitted via the
65  
-web:
66  
-
67  
-  https://redmine.open-bio.org/projects/bioperl/
68  
-
69  
-=head1 AUTHOR - Jason Stajich, Chris Fields
70  
-
71  
-Email jason-at-bioperl.org
72  
-Email cjfields-at-uiuc dot edu
73  
-
74  
-=head1 APPENDIX
75  
-
76  
-The rest of the documentation details each of the object methods.
77  
-Internal methods are usually preceded with a _
78  
-
79  
-=cut
80  
-
81  
-# Let the code begin...
82  
-package Bio::SearchIO::XML::PsiBlastHandler;
83  
-use base qw(Bio::Root::Root XML::SAX::Base);
84  
-
85  
-my %MODEMAP = (
86  
-    'BlastOutput'   => 'result',
87  
-    'Iteration'     => 'iteration',
88  
-    'Hit'           => 'hit',
89  
-    'Hsp'           => 'hsp'
90  
-);
91  
-
92  
-# MAPPING is distinct from BlastHandler, can't really mix the two...
93  
-
94  
-my %MAPPING = (
95  
-                # Result-specific fields
96  
-                'BlastOutput_program'   => 'RESULT-algorithm_name',
97  
-                'BlastOutput_version'   => 'RESULT-algorithm_version',
98  
-                'BlastOutput_db'        => 'RESULT-database_name',
99  
-                'BlastOutput_reference' => 'RESULT-program_reference',
100  
-                'BlastOutput_query-def' => 'RESULT-query_description',
101  
-                'BlastOutput_query-len' => 'RESULT-query_length',
102  
-                'BlastOutput_query-ID'  => 'runid',
103  
-                'Parameters_matrix'     => { 'RESULT-parameters' => 'matrix'},
104  
-                'Parameters_expect'     => { 'RESULT-parameters' => 'expect'},
105  
-                'Parameters_include'    => { 'RESULT-parameters' => 'include'},
106  
-                'Parameters_sc-match'   => { 'RESULT-parameters' => 'match'},
107  
-                'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
108  
-                'Parameters_gap-open'   => { 'RESULT-parameters' => 'gapopen'},
109  
-                'Parameters_gap-extend' => { 'RESULT-parameters' => 'gapext'},
110  
-                'Parameters_filter'     => {'RESULT-parameters' => 'filter'},
111  
-                'Statistics_db-num'     => 'RESULT-database_entries',
112  
-                'Statistics_db-len'     => 'RESULT-database_letters',
113  
-                'Statistics_hsp-len'    => { 'RESULT-statistics' => 'hsplength'},
114  
-                'Statistics_eff-space'  => { 'RESULT-statistics' => 'effectivespace'},
115  
-                'Statistics_kappa'      => { 'RESULT-statistics' => 'kappa' },
116  
-                'Statistics_lambda'     => { 'RESULT-statistics' => 'lambda' },
117  
-                'Statistics_entropy'    => { 'RESULT-statistics' => 'entropy'},
118  
-
119  
-                # Iteration-specific parameters
120  
-                'Iteration_iter-num'  => 'ITERATION-number',
121  
-                'Iteration_converged' => 'ITERATION-converged',
122  
-
123  
-                # HSP specific fields
124  
-                'Hsp_bit-score'  => 'HSP-bits',
125  
-                'Hsp_score'      => 'HSP-score',
126  
-                'Hsp_evalue'     => 'HSP-evalue',
127  
-                'Hsp_query-from' => 'HSP-query_start',
128  
-                'Hsp_query-to'   => 'HSP-query_end',
129  
-                'Hsp_hit-from'   => 'HSP-hit_start',
130  
-                'Hsp_hit-to'     => 'HSP-hit_end',
131  
-                'Hsp_positive'   => 'HSP-conserved',
132  
-                'Hsp_identity'   => 'HSP-identical',
133  
-                'Hsp_gaps'       => 'HSP-gaps',
134  
-                'Hsp_hitgaps'    => 'HSP-hit_gaps',
135  
-                'Hsp_querygaps'  => 'HSP-query_gaps',
136  
-                'Hsp_qseq'       => 'HSP-query_seq',
137  
-                'Hsp_hseq'       => 'HSP-hit_seq',
138  
-                'Hsp_midline'    => 'HSP-homology_seq',
139  
-                'Hsp_align-len'  => 'HSP-hsp_length',
140  
-                'Hsp_query-frame'=> 'HSP-query_frame',
141  
-                'Hsp_hit-frame'  => 'HSP-hit_frame',
142  
-
143  
-                # Hit specific fields
144  
-                'Hit_id'               => 'HIT-name',
145  
-                'Hit_len'              => 'HIT-length',
146  
-                'Hit_accession'        => 'HIT-accession',
147  
-                'Hit_def'              => 'HIT-description',
148  
-                'Hit_num'              => 'HIT-order',
149  
-                'Iteration_iter-num'   => 'HIT-iteration',
150  
-                'Iteration_stat'       => 'HIT-iteration_statistic',
151  
-               );
152  
-
153  
-# these XML tags are ignored for now
154  
-my %IGNOREDTAGS = (
155  
-                'Hsp_num'              => 1,#'HSP-order',
156  
-                'Hsp_pattern-from'     => 1,#'patternend',
157  
-                'Hsp_pattern-to'       => 1,#'patternstart',
158  
-                'Hsp_density'          => 1,#'hspdensity',
159  
-                'Iteration_message'    => 1,
160  
-                'Hit_hsps'             => 1,
161  
-                'BlastOutput_param'    => 1,
162  
-                'Iteration_hits'       => 1,
163  
-                'Statistics'           => 1,
164  
-                'Parameters'           => 1,
165  
-                'BlastOutput'          => 1,
166  
-                'BlastOutput_iterations' => 1,
167  
-                   );
168  
-
169  
-=head2 SAX methods
170  
-
171  
-=cut
172  
-
173  
-=head2 start_document
174  
-
175  
- Title   : start_document
176  
- Usage   : $parser->start_document;
177  
- Function: SAX method to indicate starting to parse a new document
178  
- Returns : none
179  
- Args    : none
180  
-
181  
-=cut
182  
-
183  
-sub start_document{
184  
-    my ($self) = @_;
185  
-    $self->{'_lasttype'} = '';
186  
-    $self->{'_values'} = {};
187  
-    $self->{'_result'}= [];
188  
-}
189  
-
190  
-=head2 end_document
191  
-
192  
- Title   : end_document
193  
- Usage   : $parser->end_document;
194  
- Function: SAX method to indicate finishing parsing a new document
195  
- Returns : Bio::Search::Result::ResultI object
196  
- Args    : none
197  
-
198  
-=cut
199  
-
200  
-sub end_document{
201  
-   my ($self,@args) = @_;
202  
-   
203  
-   # reset data carried throughout parse
204  
-   $self->{'_resultdata'} = undef;
205  
-   
206  
-   # pass back ref to results queue; caller must reset handler results queue
207  
-   return $self->{'_result'};
208  
-}
209  
-
210  
-=head2 start_element
211  
-
212  
- Title   : start_element
213  
- Usage   : $parser->start_element($data)
214  
- Function: SAX method to indicate starting a new element
215  
- Returns : none
216  
- Args    : hash ref for data
217  
-
218  
-=cut
219  
-
220  
-sub start_element{
221  
-    my ($self,$data) = @_;
222  
-    # we currently don't care about attributes
223  
-    my $nm = $data->{'Name'};
224  
-
225  
-    if( my $type = $MODEMAP{$nm} ) {
226  
-        if( $self->eventHandler->will_handle($type) ) {
227  
-            my $func = sprintf("start_%s",lc $type);
228  
-            $self->eventHandler->$func($data->{'Attributes'});
229  
-        }                                                    
230  
-    }
231  
-}
232  
-
233  
-=head2 end_element
234  
-
235  
- Title   : end_element
236  
- Usage   : $parser->end_element($data)
237  
- Function: Signals finishing an element
238  
- Returns : Bio::Search object dpending on what type of element
239  
- Args    : hash ref for data
240  
-
241  
-=cut
242  
-
243  
-sub end_element{
244  
-    my ($self,$data) = @_;
245  
-
246  
-    my $nm = $data->{'Name'};
247  
-    my $rc;
248  
-    if($nm eq 'BlastOutput_program' &&
249  
-       $self->{'_last_data'} =~ /(t?blast[npx])/i ) {
250  
-        $self->{'_type'} = uc $1; 
251  
-    }
252  
-    if ($nm eq 'Iteration') {
253  
-        map {
254  
-            $self->{'_values'}->{$_} = $self->{'_resultdata'}->{$_};
255  
-            } keys %{ $self->{'_resultdata'} };
256  
-    }
257  
-    if( my $type = $MODEMAP{$nm} ) {
258  
-        if( $self->eventHandler->will_handle($type) ) {
259  
-            my $func = sprintf("end_%s",lc $type);
260  
-            $rc = $self->eventHandler->$func($self->{'_type'},
261  
-                                              $self->{'_values'});
262  
-        }
263  
-    }
264  
-    elsif( exists $MAPPING{$nm} ) {
265  
-        if ( ref($MAPPING{$nm}) =~ /hash/i ) {
266  
-            my $key = (keys %{$MAPPING{$nm}})[0];
267  
-            $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
268  
-        } else {
269  
-            $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
270  
-        }
271  
-    }
272  
-    elsif( exists $IGNOREDTAGS{$nm} ){
273  
-        # ignores these elements for now
274  
-    }
275  
-    else {      
276  
-        $self->debug("ignoring unrecognized element type $nm\n");
277  
-    }
278  
-    $self->{'_last_data'} = ''; # remove read data if we are at 
279  
-                                # end of an element
280  
-                                
281  
-    # add to ResultI array
282  
-    $self->{'_result'} = $rc if( $nm eq 'BlastOutput' );
283  
-    # reset values for each Result round
284  
-    if ($nm eq 'BlastOutput') {
285  
-        $self->{'_values'} = {};
286  
-    }
287  
-}
288  
-
289  
-=head2 characters
290  
-
291  
- Title   : characters
292  
- Usage   : $parser->characters($data)
293  
- Function: Signals new characters to be processed
294  
- Returns : characters read
295  
- Args    : hash ref with the key 'Data'
296  
-
297  
-
298  
-=cut
299  
-
300  
-sub characters{
301  
-   my ($self,$data) = @_;
302  
-   return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
303  
-   $self->{'_last_data'} .= $data->{'Data'};
304  
-}
305  
-
306  
-sub eventHandler {
307  
-    my $self = shift;
308  
-    return $self->{'_handler'} = shift if @_;
309  
-    return $self->{'_handler'};
310  
-}
311  
-
312  
-1;
474  Bio/SearchIO/blastxml.pm
... ...
@@ -1,474 +0,0 @@
1  
-#
2  
-# BioPerl module for Bio::SearchIO::blastxml
3  
-#
4  
-# Please direct questions and support issues to <bioperl-l@bioperl.org>
5  
-#
6  
-# Cared for by Jason Stajich <jason@bioperl.org>
7  
-#
8  
-# Copyright Jason Stajich
9  
-#
10  
-# You may distribute this module under the same terms as perl itself
11  
-
12  
-# POD documentation - main docs before the code
13  
-
14  
-=head1 NAME
15  
-
16  
-Bio::SearchIO::blastxml - A SearchIO implementation of NCBI Blast XML parsing.
17  
-
18  
-=head1 SYNOPSIS
19  
-
20  
-    use Bio::SearchIO;
21  
-    my $searchin = Bio::SearchIO->new(-format => 'blastxml',
22  
-				     -file   => 't/data/plague_yeast.bls.xml');
23  
-
24  
-    while( my $result = $searchin->next_result ) {
25  
-        ....
26  
-    }
27  
-
28  
-    # one can also request that the parser NOT keep the XML data in memory
29  
-    # by using the tempfile initialization flag.
30  
-
31  
-    $searchin = Bio::SearchIO->new(-tempfile => 1,
32  
-				     -format => 'blastxml',
33  
-				     -file   => 't/data/plague_yeast.bls.xml');
34  
-
35  
-    while( my $result = $searchin->next_result ) {
36  
-       ....
37  
-    }
38  
-
39  
-    # PSI-BLAST parsing (default is normal BLAST)
40  
-    $searchin = Bio::SearchIO->new(
41  
-                     -format => 'blastxml',
42  
-                     -blasttype => 'psiblast',
43  
-				     -file   => 't/data/plague_yeast.bls.xml');
44  
-
45  
-    while( my $result = $searchin->next_result ) {
46  
-       ....
47  
-    }
48  
-
49  
-=head1 DESCRIPTION
50  
-
51  
-This object implements a NCBI Blast XML parser.  It requires XML::SAX; it is
52  
-also recommended (for faster parsing) that XML::SAX::ExpatXS or XML::LibXML
53  
-be installed.  Either 'XML::SAX::ExpatXS' or 'XML::LibXML::SAX::Parser' should
54  
-be set as the default parser in ParserDetails.ini.  This file is located in the
55  
-SAX subdirectory of XML in your local perl library (normally in the 'site'
56  
-directory).
57  
-
58  
-Two different XML handlers currently exist to deal with logical differences
59  
-between how normal BLAST reports and PSI-BLAST reports are logically parsed into
60  
-BioPerl objects; this is explicitly settable using the B<-blasttype> parameter.
61  
-The default is for parsing a normal BLAST report ('blast'), but if one is
62  
-expecting PSI-BLAST report parsing, -blasttype B<must> be set explicitly to
63  
-'psiblast'. This is due to a lack of any information in the XML output which
64  
-tells the parser the report is derived from a PSI-BLAST run vs. a normal BLAST
65  
-run.
66  
-
67  
-There is one additional initialization flag from the SearchIO defaults. That is
68  
-the B<-tempfile> flag. If specified as true, then the parser will write out each
69  
-report to a temporary filehandle rather than holding the entire report as a
70  
-string in memory. The reason this is done in the first place is NCBI reports
71  
-have an uncessary E<lt>?xml version="1.0"?E<gt> at the beginning of each report
72  
-and RPS-BLAST reports have an additional unnecessary RPS-BLAST tag at the top of
73  
-each report. So we currently have implemented the work around by preparsing the
74  
-file (yes it makes the process slower, but it works). We are open to suggestions
75  
-on how to optimize this in the future.
76  
-
77  
-=head1 DEPENDENCIES
78  
-
79  
-In addition to parts of the Bio:: hierarchy, this module uses:
80  
-
81  
- XML::SAX
82  
-
83  
-It is also recommended that XML::SAX::ExpatXS be installed and made the default
84  
-XML::SAX parser using , along with the Expat library () for faster parsing.
85  
-XML::SAX::Expat is not recommended; XML::SAX::ExpatXS is considered the current
86  
-replacement for XML::SAX:Expat and is actively being considered to replace
87  
-XML::SAX::Expat. XML::SAX::Expat will work, but only if you have local copies of
88  
-the NCBI BLAST DTDs. This is due to issues with NCBI's BLAST XML format. The
89  
-DTDs and the web address to obtain them are:
90  
-
91  
-  NCBI_BlastOutput.dtd
92  
-  NCBI_BlastOutput.mod.dtd
93  
-
94  
-  http://www.ncbi.nlm.nih.gov/data_specs/dtd/
95  
-
96  
-=head1 FEEDBACK
97  
-
98  
-=head2 Mailing Lists
99  
-
100  
-User feedback is an integral part of the evolution of this and other
101  
-Bioperl modules. Send your comments and suggestions preferably to
102  
-the Bioperl mailing list.  Your participation is much appreciated.
103  
-
104  
-  bioperl-l@bioperl.org                  - General discussion
105  
-  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
106  
-
107  
-=head2 Support
108  
-
109  
-Please direct usage questions or support issues to the mailing list:
110  
-
111  
-I<bioperl-l@bioperl.org>
112  
-
113  
-rather than to the module maintainer directly. Many experienced and
114  
-reponsive experts will be able look at the problem and quickly
115  
-address it. Please include a thorough description of the problem
116  
-with code and data examples if at all possible.
117  
-
118  
-=head2 Reporting Bugs
119  
-
120  
-Report bugs to the Bioperl bug tracking system to help us keep track
121  
-of the bugs and their resolution. Bug reports can be submitted via the
122  
-web:
123  
-
124  
-  https://redmine.open-bio.org/projects/bioperl/
125  
-
126  
-=head1 AUTHOR - Jason Stajich
127  
-
128  
-Email jason-at-bioperl.org
129  
-
130  
-=head1 APPENDIX
131  
-
132  
-The rest of the documentation details each of the object methods.
133  
-Internal methods are usually preceded with a _
134  
-
135  
-=cut
136  
-
137  
-# Let the code begin...
138  
-
139  
-package Bio::SearchIO::blastxml;
140  
-use strict;
141  
-# Object preamble - inherits from Bio::Root::Root
142  
-
143  
-use base qw(Bio::SearchIO);
144  
-use Bio::Root::Root;
145  
-use XML::SAX;
146  
-use IO::File;
147  
-use Bio::SearchIO::XML::BlastHandler;
148  
-use Bio::SearchIO::IteratedSearchResultEventBuilder;
149  
-
150  
-our $DEBUG;
151  
-
152  
-my %VALID_TYPE = (
153  
-    'BLAST'      => 'Bio::SearchIO::XML::BlastHandler',
154  
-    'PSIBLAST'   => 'Bio::SearchIO::XML::PsiBlastHandler',
155  
-    'PSI-BLAST'  => 'Bio::SearchIO::XML::PsiBlastHandler'
156  
-    );
157  
-
158  
-# mapping of NCBI Blast terms to Bioperl hash keys
159  
-
160  
-=head2 new
161  
-
162  
- Title   : new
163  
- Usage   : my $searchio = Bio::SearchIO->new(-format => 'blastxml',
164  
-					    -file   => 'filename',
165  
-					    -tempfile => 1);
166  
- Function: Initializes the object - this is chained through new in SearchIO
167  
- Returns : Bio::SearchIO::blastxml object
168  
- Args    : One additional argument from the format and file/fh parameters.
169  
-           -tempfile    => boolean.  Defaults to false.  Write out XML data
170  
-                           to a temporary filehandle to send to PerlSAX parser.
171  
-
172  
-=cut
173  
-
174  
-=head2 _initialize
175  
-
176  
- Title   : _initialize
177  
- Usage   : private
178  
- Function: Initializes the object - this is chained through new in SearchIO
179  
-
180  
-=cut
181  
-
182  
-sub _initialize{
183  
-    my ($self,@args) = @_;
184  
-    $self->SUPER::_initialize(@args);
185  
-    my ($usetempfile, $blasttype,$xmlcompact) = $self->_rearrange([qw(
186  
-                                            TEMPFILE
187  
-                                            BLASTTYPE
188  
-                                            XMLCOMPACT)],@args);
189  
-    $blasttype ||= 'BLAST';
190  
-    $self->{_xml_compact} = $xmlcompact || 0;
191  
-    $self->blasttype(uc $blasttype);
192  
-    defined $usetempfile && $self->use_tempfile($usetempfile);
193  
-    $self->{_result_count} = 0;
194  
-    eval {  require Time::HiRes };
195  
-    if( $@ ) { $DEBUG = 0; }
196  
-    $DEBUG = 1 if( ! defined $DEBUG && ($self->verbose > 0));
197  
-}
198  
-
199  
-sub attach_EventHandler {
200  
-    my ($self,$handler) = @_;
201  
-
202  
-    $self->SUPER::attach_EventHandler($handler);
203  
-
204  
-	# Make sure if there is an XML parser present already, the internal Handler
205  
-	# is set
206  
-	if (exists $self->{'_xmlparser'}) {
207  
-		$self->{'_xmlparser'}->get_handler->eventHandler($handler);
208  
-	}
209  
-
210  
-    # Optimization: caching the EventHandler since it is used a lot
211  
-    # during the parse.
212  
-
213  
-    $self->{'_handler_cache'} = $handler;
214  
-    return;
215  
-}
216  
-
217  
-=head2 next_result
218  
-
219  
- Title   : next_result
220  
- Usage   : my $hit = $searchio->next_result;
221  
- Function: Returns the next Result from a search
222  
- Returns : Bio::Search::Result::ResultI object
223  
- Args    : none
224  
-
225  
-=cut
226  
-
227  
-sub next_result {
228  
-    my ($self) = @_;
229  
-
230  
-    my $result;
231  
-
232  
-    my ($tfh);
233  
-
234  
-    # XMLCOMPACT
235  
-    # WU-BLAST has an XML_COMPACT option which needs to be preprocessed before
236  
-    # passing on to the parser.
237  
-    if ($self->{_xml_compact}) {
238  
-        $self->debug("XMLCOMPACT mode\n");
239  
-        my ($tfh2, $filename) = IO::File->new_tmpfile or $self->throw("Unable to open temp file: $!");
240  
-        $tfh2->autoflush(1);
241  
-        my $fh = $self->_fh;
242  
-        while (my $line = <$fh>) {
243  
-            $line =~ s/></>\n</g;
244  
-            print $tfh2 $line;
245  
-        }
246  
-        seek($tfh2,0,0);
247  
-        close $fh;
248  
-        # redirect self's IO to use new tempfile
249  
-        $self->_fh($tfh2);
250  
-    }
251  
-
252  
-    if( $self->use_tempfile ) {
253  
-        $tfh = IO::File->new_tmpfile or $self->throw("Unable to open temp file: $!");
254  
-        $tfh->autoflush(1);
255  
-    }
256  
-
257  
-    my $okaytoprocess = ($self->blasttype =~ /PSI/) ? $self->_chunk_psiblast($tfh) :
258  
-        $self->_chunk_normalblast($tfh);
259  
-
260  
-    return unless( $okaytoprocess);
261  
-
262  
-    my %parser_args;
263  
-    if( defined $tfh ) {
264  
-	seek($tfh,0,0);
265  
-	%parser_args = ('Source' => { 'ByteStream' => $tfh });
266  
-    } else {
267  
-	%parser_args = ('Source' => { 'String' => $self->{'_blastdata'} });
268  
-    }
269  
-
270  
-    my $starttime;
271  
-    if(  $DEBUG ) {  $starttime = [ Time::HiRes::gettimeofday() ]; }
272  
-
273  
-    eval {
274  
-	$result = $self->{'_xmlparser'}->parse(%parser_args);
275  
-    };
276  
-
277  
-    if( $@ ) {
278  
-	$self->warn("error in parsing a report:\n $@");
279  
-	$result = undef;
280  
-    }
281  
-    if( $DEBUG ) {
282  
-	$self->debug( sprintf("parsing took %f seconds\n", Time::HiRes::tv_interval($starttime)));
283  
-    }
284  
-    # parsing magic here - but we call event handlers rather than
285  
-    # instantiating things
286  
-    if (defined $result) {
287  
-        # result count is handled here, as the BLASTXML reports are
288  
-        # broken up into smaller easier to digest bits
289  
-        $self->{_result_count}++;
290  
-        return $result;
291  
-    } else {
292  
-        return;
293  
-    }
294  
-}
295  
-
296  
-=head2 result_count
297  
-
298  
- Title   : result_count
299  
- Usage   : $num = $stream->result_count;
300  
- Function: Gets the number of Blast results that have been successfully parsed
301  
-           at the point of the method call.  This is not the total # of results
302  
-           in the file.
303  
- Returns : integer
304  
- Args    : none
305  
- Throws  : none
306  
-
307  
-=cut
308  
-
309  
-sub result_count {
310  
-    my $self = shift;
311  
-    return $self->{_result_count};
312  
-}
313  
-
314  
-=head2 use_tempfile
315  
-
316  
- Title   : use_tempfile
317  
- Usage   : $obj->use_tempfile($newval)
318  
- Function: Get/Set boolean flag on whether or not use a tempfile
319  
- Example :
320  
- Returns : value of use_tempfile
321  
- Args    : newvalue (optional)
322  
-
323  
-=cut
324  
-
325  
-sub use_tempfile{
326  
-   my ($self,$value) = @_;
327  
-   if( defined $value) {
328  
-      $self->{'_use_tempfile'} = $value;
329  
-    }
330  
-    return $self->{'_use_tempfile'};
331  
-}
332  
-
333  
-=head2 blasttype
334  
-
335  
- Title   : blasttype
336  
- Usage   : $obj->blasttype($newtype)
337  
- Function: Get/Set BLAST report type.
338  
- Returns : BLAST report type
339  
- Args    : case-insensitive string of types BLAST or PSIBLAST (default: BLAST)
340  
- Note    : this is used to determine how reports are 'chunked' (in cases
341  
-           where multiple queries are submitted) and which XML handler
342  
-           to use when parsing the report(s)
343  
-
344  
-=cut
345  
-
346  
-sub blasttype{
347  
-    my ($self,$value) = @_;
348  
-    if ($value) {
349  
-        $self->throw("$value is not a supported BLAST type") unless exists $VALID_TYPE{$value};
350  
-        my $ok;
351  
-        eval {
352  
-            $ok = $self->_load_module($VALID_TYPE{$value});
353  
-        };
354  
-        if ($@) {
355  
-            print STDERR <<END;
356  
-$self: data module $VALID_TYPE{$value} cannot be found
357  
-Exception $@
358  
-For more information about the Bio::SearchIO::blastxml system please see the Bio::SearchIO::blastxml.
359  
-END
360  
-            return unless $ok;
361  
-        }
362  
-        # BlastHandler does the heavy lifting
363  
-        my $xmlhandler = $VALID_TYPE{$value}->new(-verbose => $self->verbose);
364  
-
365  
-        # The XML handler does the heavy work, passes data to object handler
366  
-        if ($value =~ /^PSI/) {
367  
-            my $handler = Bio::SearchIO::IteratedSearchResultEventBuilder->new();
368  
-            $self->{'_handler'} = $handler; # cache
369  
-        }
370  
-        $xmlhandler->eventHandler($self->_eventHandler());
371  
-
372  
-        # start up the parser factory
373  
-        my $parserfactory = XML::SAX::ParserFactory->parser(
374  
-            Handler => $xmlhandler);
375  
-        $self->{'_xmlparser'} = $parserfactory;
376  
-        $self->saxparser(ref($parserfactory));
377  
-
378  
-        $self->{'_blasttype'} = $value;
379  
-    }
380  
-    return $self->{'_blasttype'};
381  
-}
382  
-
383  
-sub saxparser {
384  
-    my $self = shift;
385  
-    return ref($self->{'_xmlparser'});
386  
-}
387  
-
388  
-sub _chunk_normalblast {
389  
-    my ($self, $tfh) = @_;
390  
-
391  
-    local $/ = "\n";
392  
-    local $_;
393  
-    $self->{'_blastdata'} = '';
394  
-
395  
-    my ($sawxmlheader, $okaytoprocess);
396  
-
397  
-    my $mode = 'header';
398  
-
399  
-    my $tail = << 'XML_END';
400  
-  </BlastOutput_iterations>
401  
-</BlastOutput>
402  
-XML_END
403  
-
404  
-    # no buffering needed (famous last words...)
405  
-    my $fh = $self->_fh;
406  
-
407  
-    #chop up XML into edible bits for the parser
408  
-    while( defined( my $line = <$fh>) ) {
409  
-        next if $line =~ m{^\s*</BlastOutput_iterations>}xmso || $line =~ m{^</BlastOutput>}xmso;
410  
-        if( $line =~ m{^RPS-BLAST}i ) {
411  
-            $self->{'_type'} = 'RPS-BLAST';
412  
-            next;
413  
-        } elsif ($line =~ m{^<\?xml\sversion="1.0"}xms) {# <?xml version="1.0"?> & <?xml version="1.0" encoding="UTF-8"?>
414  
-            delete $self->{'_header'} if exists $self->{'_header'};
415  
-            $sawxmlheader++;
416  
-            $mode = 'header';
417  
-        } elsif ($line =~ m{^\s*<Iteration>}xmso) {
418  
-            if (!$sawxmlheader) {
419  
-                if (defined $tfh) {
420  
-                    print $tfh $self->{'_header'}
421  
-                } else {
422  
-                    $self->{'_blastdata'} .= $self->{'_header'};
423  
-                }
424  
-            }
425  
-            $mode = 'iteration';
426  
-        } elsif ($line =~ m{^\s*</Iteration>}xmso) {
427  
-            if (defined $tfh) {
428  
-                print $tfh $line.$tail;
429  
-            } else {
430  
-                $self->{'_blastdata'} .= $line.$tail;
431  
-            }
432  
-            $okaytoprocess++;
433  
-            last;
434  
-        }
435  
-        if (defined $tfh) {
436  
-            print $tfh $line;
437  
-        } else {
438  
-            $self->{'_blastdata'} .= $line;
439  
-        }
440  
-        $self->{"_$mode"} .= $line if $mode eq 'header';
441  
-    }
442  
-    return $okaytoprocess;
443  
-}
444  
-
445  
-sub _chunk_psiblast {
446  
-    my ($self, $tfh) = @_;
447  
-
448  
-    local $/ = "\n";
449  
-    local $_;
450  
-    $self->{'_blastdata'} = '';
451  
-
452  
-    my ($sawxmlheader, $okaytoprocess);
453  
-
454  
-    # no buffering needed (famous last words...)
455  
-    my $fh = $self->_fh;
456  
-
457  
-    #chop up XML into edible bits for the parser
458  
-    while( defined( my $line = <$fh>) ) {
459  
-        if (defined $tfh) {
460  
-            print $tfh $line;
461  
-        } else {
462  
-            $self->{'_blastdata'} .= $line;
463  
-        }
464  
-        #$self->{"_$mode"} .= $line;
465  
-        if ($line =~ m{^</BlastOutput>}xmso) {
466  
-            $okaytoprocess++;
467  
-            last;
468  
-        }
469  
-    }
470  
-    #$self->debug($self->{'_blastdata'}."\n");
471  
-    return $okaytoprocess;
472  
-}
473  
-
474  
-1;
531  t/SearchIO/blastxml.t
... ...
@@ -1,531 +0,0 @@
1  
-# -*-Perl-*- Test Harness script for Bioperl
2  
-# $Id: SearchIO.t 14995 2008-11-16 06:20:00Z cjfields $
3  
-
4  
-use strict;
5  
-
6  
-BEGIN {
7  
-	use lib '.';
8  
-    use Bio::Root::Test;
9  
-    
10  
-    test_begin(-tests => 391,
11  
-			   -requires_module => 'XML::SAX');
12  
-	
13  
-	use_ok('Bio::SearchIO');
14  
-}
15  
-
16  
-my ($searchio, $result,$iter,$hit,$hsp);
17  
-
18  
-# XML encoding/decoding done within XML::SAX now, though some parsers
19  
-# do not work properly (XML::SAX::PurePerl, XML::LibXML::SAX)
20  
-
21  
-eval {
22  
-	# test with RPSBLAST data first
23  
-	# this needs to be eval'd b/c the XML::SAX parser object is
24  
-	# instantiated in the constructor
25  
-	$searchio = Bio::SearchIO->new('-tempfile' => 1,
26  
-		   '-format' => 'blastxml',
27  
-		   '-file'   => test_input_file('ecoli_domains.rps.xml'),
28  
-		   '-blasttype' => 'blast',
29  
-		   '-verbose' => -1);
30  
-	# PurePerl works with these BLAST reports, so removed verbose promotion
31  
-	$result = $searchio->next_result;
32  
-	die if !defined $result;
33  
-};
34  
-
35  
-SKIP: {
36  
-	# this should be fixed with newer installations of XML::SAX::Expat, but as we
37  
-	# don't require a certain version (multiple backends can be used) we catch
38  
-	# and skip if needed 
39  
-	if ($@ && $@ =~ m{Handler could not resolve external entity}) {
40  
-		skip("Older versions of XML::SAX::Expat may not work with XML tests; skipping",297);
41  
-	} elsif ($@) {
42  
-		skip("Problem with XML::SAX setup: $@. Check ParserDetails.ini; skipping XML tests",297);
43  
-	}
44 <