Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'topic/split_searchio_blastxml'

* topic/split_searchio_blastxml:
  removing searchio-blastxml files
  • Loading branch information...
commit bc0ebfd157ad5723e6a9cc6d7d6d91671fee21ea 2 parents d999055 + a69193e
@pcantalupo pcantalupo authored
View
315 Bio/SearchIO/XML/BlastHandler.pm
@@ -1,315 +0,0 @@
-#
-# BioPerl module for Bio::SearchIO::XML::BlastHandler
-#
-# Please direct questions and support issues to <bioperl-l@bioperl.org>
-#
-# Cared for by Jason Stajich, Chris Fields
-#
-# Copyright Jason Stajich
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::SearchIO::XML::BlastHandler - XML Handler for NCBI Blast XML parsing.
-
-=head1 SYNOPSIS
-
- # This is not to be used directly.
-
-=head1 DESCRIPTION
-
-This is the XML handler for BLAST XML parsing. Currently it passes elements off
-to the event handler, which is ultimately responsible for Bio::Search object
-generation.
-
-This was recently split off from the original code for Bio::SearchIO::blastxml
-primarily for maintenance purposes.
-
-=head1 DEPENDENCIES
-
-In addition to parts of the Bio:: hierarchy, this module uses:
-
- XML::SAX::Base
-
-which comes with the XML::SAX distribution.
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to
-the Bioperl mailing list. Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/wiki/Mailing_lists - About the mailing lists
-
-=head2 Support
-
-Please direct usage questions or support issues to the mailing list:
-
-I<bioperl-l@bioperl.org>
-
-rather than to the module maintainer directly. Many experienced and
-reponsive experts will be able look at the problem and quickly
-address it. Please include a thorough description of the problem
-with code and data examples if at all possible.
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-of the bugs and their resolution. Bug reports can be submitted via the
-web:
-
- https://redmine.open-bio.org/projects/bioperl/
-
-=head1 AUTHOR - Jason Stajich, Chris Fields
-
-Email jason-at-bioperl.org
-Email cjfields-at-uiuc dot edu
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object methods.
-Internal methods are usually preceded with a _
-
-=cut
-
-# Let the code begin...
-package Bio::SearchIO::XML::BlastHandler;
-use base qw(Bio::Root::Root XML::SAX::Base);
-
-my %MODEMAP = (
- 'Iteration' => 'result',
- 'Hit' => 'hit',
- 'Hsp' => 'hsp'
-);
-
-# major post 2.2.12 BLAST XML changes
-# 1) moved XML Handler to it's own class
-# 2) reconfigure blastxml to deal with old and new BLAST XML output
-
-my %MAPPING = (
- # Result-specific fields
- 'BlastOutput_program' => 'RESULT-algorithm_name',
- 'BlastOutput_version' => 'RESULT-algorithm_version',
- 'BlastOutput_db' => 'RESULT-database_name',
- 'BlastOutput_reference' => 'RESULT-program_reference',
- 'BlastOutput_query-def' => 'RESULT-query_description',
- 'BlastOutput_query-len' => 'RESULT-query_length',
- 'BlastOutput_query-ID' => 'runid',
- 'Parameters_matrix' => { 'RESULT-parameters' => 'matrix'},
- 'Parameters_expect' => { 'RESULT-parameters' => 'expect'},
- 'Parameters_include' => { 'RESULT-parameters' => 'include'},
- 'Parameters_sc-match' => { 'RESULT-parameters' => 'match'},
- 'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
- 'Parameters_gap-open' => { 'RESULT-parameters' => 'gapopen'},
- 'Parameters_gap-extend' => { 'RESULT-parameters' => 'gapext'},
- 'Parameters_filter' => {'RESULT-parameters' => 'filter'},
- 'Statistics_db-num' => 'RESULT-database_entries',
- 'Statistics_db-len' => 'RESULT-database_letters',
- 'Statistics_hsp-len' => { 'RESULT-statistics' => 'hsplength'},
- 'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'},
- 'Statistics_kappa' => { 'RESULT-statistics' => 'kappa' },
- 'Statistics_lambda' => { 'RESULT-statistics' => 'lambda' },
- 'Statistics_entropy' => { 'RESULT-statistics' => 'entropy'},
-
- # HSP specific fields
- 'Hsp_bit-score' => 'HSP-bits',
- 'Hsp_score' => 'HSP-score',
- 'Hsp_evalue' => 'HSP-evalue',
- 'Hsp_query-from' => 'HSP-query_start',
- 'Hsp_query-to' => 'HSP-query_end',
- 'Hsp_hit-from' => 'HSP-hit_start',
- 'Hsp_hit-to' => 'HSP-hit_end',
- 'Hsp_positive' => 'HSP-conserved',
- 'Hsp_identity' => 'HSP-identical',
- 'Hsp_gaps' => 'HSP-gaps',
- 'Hsp_hitgaps' => 'HSP-hit_gaps',
- 'Hsp_querygaps' => 'HSP-query_gaps',
- 'Hsp_qseq' => 'HSP-query_seq',
- 'Hsp_hseq' => 'HSP-hit_seq',
- 'Hsp_midline' => 'HSP-homology_seq',
- 'Hsp_align-len' => 'HSP-hsp_length',
- 'Hsp_query-frame'=> 'HSP-query_frame',
- 'Hsp_hit-frame' => 'HSP-hit_frame',
-
- # Hit specific fields
- 'Hit_id' => 'HIT-name',
- 'Hit_len' => 'HIT-length',
- 'Hit_accession' => 'HIT-accession',
- 'Hit_def' => 'HIT-description',
- 'Hit_num' => 'HIT-order',
- 'Iteration_iter-num' => 'HIT-iteration',
- 'Iteration_stat' => 'HIT-iteration_statistic',
-
- # if these tags are present, they will overwrite the
- # above with more current data (i.e. multiquery hits)
- 'Iteration_query-def' => 'RESULT-query_description',
- 'Iteration_query-len' => 'RESULT-query_length',
- 'Iteration_query-ID' => 'runid',
- );
-
-# these XML tags are ignored for now
-my %IGNOREDTAGS = (
- 'Hsp_num' => 1,#'HSP-order',
- 'Hsp_pattern-from' => 1,#'patternend',
- 'Hsp_pattern-to' => 1,#'patternstart',
- 'Hsp_density' => 1,#'hspdensity',
- 'Iteration_message' => 1,
- 'Hit_hsps' => 1,
- 'BlastOutput_param' => 1,
- 'Iteration_hits' => 1,
- 'Statistics' => 1,
- 'Parameters' => 1,
- 'BlastOutput' => 1,
- 'BlastOutput_iterations' => 1,
- );
-
-=head2 SAX methods
-
-=cut
-
-=head2 start_document
-
- Title : start_document
- Usage : $parser->start_document;
- Function: SAX method to indicate starting to parse a new document
- Returns : none
- Args : none
-
-=cut
-
-sub start_document{
- my ($self) = @_;
- $self->{'_lasttype'} = '';
- $self->{'_values'} = {};
- $self->{'_result'}= [];
-}
-
-=head2 end_document
-
- Title : end_document
- Usage : $parser->end_document;
- Function: SAX method to indicate finishing parsing a new document
- Returns : Bio::Search::Result::ResultI object
- Args : none
-
-=cut
-
-sub end_document{
- my ($self,@args) = @_;
-
- # reset data carried throughout parse
- $self->{'_resultdata'} = undef;
-
- # pass back ref to results queue; caller must reset handler results queue
- return $self->{'_result'};
-}
-
-=head2 start_element
-
- Title : start_element
- Usage : $parser->start_element($data)
- Function: SAX method to indicate starting a new element
- Returns : none
- Args : hash ref for data
-
-=cut
-
-sub start_element{
- my ($self,$data) = @_;
- # we currently don't care about attributes
- my $nm = $data->{'Name'};
-
- if( my $type = $MODEMAP{$nm} ) {
- if( $self->eventHandler->will_handle($type) ) {
- my $func = sprintf("start_%s",lc $type);
- $self->eventHandler->$func($data->{'Attributes'});
- }
- }
-}
-
-=head2 end_element
-
- Title : end_element
- Usage : $parser->end_element($data)
- Function: Signals finishing an element
- Returns : Bio::Search object dpending on what type of element
- Args : hash ref for data
-
-=cut
-
-sub end_element{
- my ($self,$data) = @_;
-
- my $nm = $data->{'Name'};
- my $rc;
- if($nm eq 'BlastOutput_program' &&
- $self->{'_last_data'} =~ /(t?blast[npx])/i ) {
- $self->{'_type'} = uc $1;
- }
- if ($nm eq 'Iteration') {
- map {
- $self->{'_values'}->{$_} = $self->{'_resultdata'}->{$_};
- } keys %{ $self->{'_resultdata'} };
- }
- if( my $type = $MODEMAP{$nm} ) {
- if( $self->eventHandler->will_handle($type) ) {
- my $func = sprintf("end_%s",lc $type);
- $rc = $self->eventHandler->$func($self->{'_type'},
- $self->{'_values'});
- }
- }
- elsif( exists $MAPPING{$nm} ) {
- if ( ref($MAPPING{$nm}) =~ /hash/i ) {
- my $key = (keys %{$MAPPING{$nm}})[0];
- $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
- } else {
- $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
- }
- }
- elsif( exists $IGNOREDTAGS{$nm} ){
- # ignores these elements for now
- }
- else {
- $self->debug("ignoring unrecognized element type $nm\n");
- }
- $self->{'_last_data'} = ''; # remove read data if we are at
- # end of an element
-
- # add to ResultI array
- $self->{'_result'} = $rc if( $nm eq 'Iteration' );
- # reset values for each Result round
- if ($nm eq 'Iteration') {
- $self->{'_values'} = {};
- }
-}
-
-=head2 characters
-
- Title : characters
- Usage : $parser->characters($data)
- Function: Signals new characters to be processed
- Returns : characters read
- Args : hash ref with the key 'Data'
-
-
-=cut
-
-sub characters{
- my ($self,$data) = @_;
- return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
- $self->{'_last_data'} .= $data->{'Data'};
-}
-
-sub eventHandler {
- my $self = shift;
- return $self->{'_handler'} = shift if @_;
- return $self->{'_handler'};
-}
-
-1;
View
312 Bio/SearchIO/XML/PsiBlastHandler.pm
@@ -1,312 +0,0 @@
-#
-# BioPerl module for Bio::SearchIO::XML::PsiBlastHandler
-#
-# Please direct questions and support issues to <bioperl-l@bioperl.org>
-#
-# Cared for by Jason Stajich, Chris Fields
-#
-# Copyright Chris Fields
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::SearchIO::XML::PsiBlastHandler - XML Handler for NCBI Blast PSIBLAST XML parsing.
-
-=head1 SYNOPSIS
-
- # This is not to be used directly.
-
-=head1 DESCRIPTION
-
-This is the XML handler for BLAST PSIBLAST XML parsing. Currently it passes
-elements off to the event handler, which is ultimately responsible for
-Bio::Search object generation.
-
-This was recently split off from the original code for Bio::SearchIO::blastxml
-primarily for maintenance purposes.
-
-=head1 DEPENDENCIES
-
-In addition to parts of the Bio:: hierarchy, this module uses:
-
- XML::SAX::Base
-
-which comes with the XML::SAX distribution.
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to
-the Bioperl mailing list. Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/wiki/Mailing_lists - About the mailing lists
-
-=head2 Support
-
-Please direct usage questions or support issues to the mailing list:
-
-I<bioperl-l@bioperl.org>
-
-rather than to the module maintainer directly. Many experienced and
-reponsive experts will be able look at the problem and quickly
-address it. Please include a thorough description of the problem
-with code and data examples if at all possible.
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-of the bugs and their resolution. Bug reports can be submitted via the
-web:
-
- https://redmine.open-bio.org/projects/bioperl/
-
-=head1 AUTHOR - Jason Stajich, Chris Fields
-
-Email jason-at-bioperl.org
-Email cjfields-at-uiuc dot edu
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object methods.
-Internal methods are usually preceded with a _
-
-=cut
-
-# Let the code begin...
-package Bio::SearchIO::XML::PsiBlastHandler;
-use base qw(Bio::Root::Root XML::SAX::Base);
-
-my %MODEMAP = (
- 'BlastOutput' => 'result',
- 'Iteration' => 'iteration',
- 'Hit' => 'hit',
- 'Hsp' => 'hsp'
-);
-
-# MAPPING is distinct from BlastHandler, can't really mix the two...
-
-my %MAPPING = (
- # Result-specific fields
- 'BlastOutput_program' => 'RESULT-algorithm_name',
- 'BlastOutput_version' => 'RESULT-algorithm_version',
- 'BlastOutput_db' => 'RESULT-database_name',
- 'BlastOutput_reference' => 'RESULT-program_reference',
- 'BlastOutput_query-def' => 'RESULT-query_description',
- 'BlastOutput_query-len' => 'RESULT-query_length',
- 'BlastOutput_query-ID' => 'runid',
- 'Parameters_matrix' => { 'RESULT-parameters' => 'matrix'},
- 'Parameters_expect' => { 'RESULT-parameters' => 'expect'},
- 'Parameters_include' => { 'RESULT-parameters' => 'include'},
- 'Parameters_sc-match' => { 'RESULT-parameters' => 'match'},
- 'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
- 'Parameters_gap-open' => { 'RESULT-parameters' => 'gapopen'},
- 'Parameters_gap-extend' => { 'RESULT-parameters' => 'gapext'},
- 'Parameters_filter' => {'RESULT-parameters' => 'filter'},
- 'Statistics_db-num' => 'RESULT-database_entries',
- 'Statistics_db-len' => 'RESULT-database_letters',
- 'Statistics_hsp-len' => { 'RESULT-statistics' => 'hsplength'},
- 'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'},
- 'Statistics_kappa' => { 'RESULT-statistics' => 'kappa' },
- 'Statistics_lambda' => { 'RESULT-statistics' => 'lambda' },
- 'Statistics_entropy' => { 'RESULT-statistics' => 'entropy'},
-
- # Iteration-specific parameters
- 'Iteration_iter-num' => 'ITERATION-number',
- 'Iteration_converged' => 'ITERATION-converged',
-
- # HSP specific fields
- 'Hsp_bit-score' => 'HSP-bits',
- 'Hsp_score' => 'HSP-score',
- 'Hsp_evalue' => 'HSP-evalue',
- 'Hsp_query-from' => 'HSP-query_start',
- 'Hsp_query-to' => 'HSP-query_end',
- 'Hsp_hit-from' => 'HSP-hit_start',
- 'Hsp_hit-to' => 'HSP-hit_end',
- 'Hsp_positive' => 'HSP-conserved',
- 'Hsp_identity' => 'HSP-identical',
- 'Hsp_gaps' => 'HSP-gaps',
- 'Hsp_hitgaps' => 'HSP-hit_gaps',
- 'Hsp_querygaps' => 'HSP-query_gaps',
- 'Hsp_qseq' => 'HSP-query_seq',
- 'Hsp_hseq' => 'HSP-hit_seq',
- 'Hsp_midline' => 'HSP-homology_seq',
- 'Hsp_align-len' => 'HSP-hsp_length',
- 'Hsp_query-frame'=> 'HSP-query_frame',
- 'Hsp_hit-frame' => 'HSP-hit_frame',
-
- # Hit specific fields
- 'Hit_id' => 'HIT-name',
- 'Hit_len' => 'HIT-length',
- 'Hit_accession' => 'HIT-accession',
- 'Hit_def' => 'HIT-description',
- 'Hit_num' => 'HIT-order',
- 'Iteration_iter-num' => 'HIT-iteration',
- 'Iteration_stat' => 'HIT-iteration_statistic',
- );
-
-# these XML tags are ignored for now
-my %IGNOREDTAGS = (
- 'Hsp_num' => 1,#'HSP-order',
- 'Hsp_pattern-from' => 1,#'patternend',
- 'Hsp_pattern-to' => 1,#'patternstart',
- 'Hsp_density' => 1,#'hspdensity',
- 'Iteration_message' => 1,
- 'Hit_hsps' => 1,
- 'BlastOutput_param' => 1,
- 'Iteration_hits' => 1,
- 'Statistics' => 1,
- 'Parameters' => 1,
- 'BlastOutput' => 1,
- 'BlastOutput_iterations' => 1,
- );
-
-=head2 SAX methods
-
-=cut
-
-=head2 start_document
-
- Title : start_document
- Usage : $parser->start_document;
- Function: SAX method to indicate starting to parse a new document
- Returns : none
- Args : none
-
-=cut
-
-sub start_document{
- my ($self) = @_;
- $self->{'_lasttype'} = '';
- $self->{'_values'} = {};
- $self->{'_result'}= [];
-}
-
-=head2 end_document
-
- Title : end_document
- Usage : $parser->end_document;
- Function: SAX method to indicate finishing parsing a new document
- Returns : Bio::Search::Result::ResultI object
- Args : none
-
-=cut
-
-sub end_document{
- my ($self,@args) = @_;
-
- # reset data carried throughout parse
- $self->{'_resultdata'} = undef;
-
- # pass back ref to results queue; caller must reset handler results queue
- return $self->{'_result'};
-}
-
-=head2 start_element
-
- Title : start_element
- Usage : $parser->start_element($data)
- Function: SAX method to indicate starting a new element
- Returns : none
- Args : hash ref for data
-
-=cut
-
-sub start_element{
- my ($self,$data) = @_;
- # we currently don't care about attributes
- my $nm = $data->{'Name'};
-
- if( my $type = $MODEMAP{$nm} ) {
- if( $self->eventHandler->will_handle($type) ) {
- my $func = sprintf("start_%s",lc $type);
- $self->eventHandler->$func($data->{'Attributes'});
- }
- }
-}
-
-=head2 end_element
-
- Title : end_element
- Usage : $parser->end_element($data)
- Function: Signals finishing an element
- Returns : Bio::Search object dpending on what type of element
- Args : hash ref for data
-
-=cut
-
-sub end_element{
- my ($self,$data) = @_;
-
- my $nm = $data->{'Name'};
- my $rc;
- if($nm eq 'BlastOutput_program' &&
- $self->{'_last_data'} =~ /(t?blast[npx])/i ) {
- $self->{'_type'} = uc $1;
- }
- if ($nm eq 'Iteration') {
- map {
- $self->{'_values'}->{$_} = $self->{'_resultdata'}->{$_};
- } keys %{ $self->{'_resultdata'} };
- }
- if( my $type = $MODEMAP{$nm} ) {
- if( $self->eventHandler->will_handle($type) ) {
- my $func = sprintf("end_%s",lc $type);
- $rc = $self->eventHandler->$func($self->{'_type'},
- $self->{'_values'});
- }
- }
- elsif( exists $MAPPING{$nm} ) {
- if ( ref($MAPPING{$nm}) =~ /hash/i ) {
- my $key = (keys %{$MAPPING{$nm}})[0];
- $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
- } else {
- $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
- }
- }
- elsif( exists $IGNOREDTAGS{$nm} ){
- # ignores these elements for now
- }
- else {
- $self->debug("ignoring unrecognized element type $nm\n");
- }
- $self->{'_last_data'} = ''; # remove read data if we are at
- # end of an element
-
- # add to ResultI array
- $self->{'_result'} = $rc if( $nm eq 'BlastOutput' );
- # reset values for each Result round
- if ($nm eq 'BlastOutput') {
- $self->{'_values'} = {};
- }
-}
-
-=head2 characters
-
- Title : characters
- Usage : $parser->characters($data)
- Function: Signals new characters to be processed
- Returns : characters read
- Args : hash ref with the key 'Data'
-
-
-=cut
-
-sub characters{
- my ($self,$data) = @_;
- return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
- $self->{'_last_data'} .= $data->{'Data'};
-}
-
-sub eventHandler {
- my $self = shift;
- return $self->{'_handler'} = shift if @_;
- return $self->{'_handler'};
-}
-
-1;
View
474 Bio/SearchIO/blastxml.pm
@@ -1,474 +0,0 @@
-#
-# BioPerl module for Bio::SearchIO::blastxml
-#
-# Please direct questions and support issues to <bioperl-l@bioperl.org>
-#
-# Cared for by Jason Stajich <jason@bioperl.org>
-#
-# Copyright Jason Stajich
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::SearchIO::blastxml - A SearchIO implementation of NCBI Blast XML parsing.
-
-=head1 SYNOPSIS
-
- use Bio::SearchIO;
- my $searchin = Bio::SearchIO->new(-format => 'blastxml',
- -file => 't/data/plague_yeast.bls.xml');
-
- while( my $result = $searchin->next_result ) {
- ....
- }
-
- # one can also request that the parser NOT keep the XML data in memory
- # by using the tempfile initialization flag.
-
- $searchin = Bio::SearchIO->new(-tempfile => 1,
- -format => 'blastxml',
- -file => 't/data/plague_yeast.bls.xml');
-
- while( my $result = $searchin->next_result ) {
- ....
- }
-
- # PSI-BLAST parsing (default is normal BLAST)
- $searchin = Bio::SearchIO->new(
- -format => 'blastxml',
- -blasttype => 'psiblast',
- -file => 't/data/plague_yeast.bls.xml');
-
- while( my $result = $searchin->next_result ) {
- ....
- }
-
-=head1 DESCRIPTION
-
-This object implements a NCBI Blast XML parser. It requires XML::SAX; it is
-also recommended (for faster parsing) that XML::SAX::ExpatXS or XML::LibXML
-be installed. Either 'XML::SAX::ExpatXS' or 'XML::LibXML::SAX::Parser' should
-be set as the default parser in ParserDetails.ini. This file is located in the
-SAX subdirectory of XML in your local perl library (normally in the 'site'
-directory).
-
-Two different XML handlers currently exist to deal with logical differences
-between how normal BLAST reports and PSI-BLAST reports are logically parsed into
-BioPerl objects; this is explicitly settable using the B<-blasttype> parameter.
-The default is for parsing a normal BLAST report ('blast'), but if one is
-expecting PSI-BLAST report parsing, -blasttype B<must> be set explicitly to
-'psiblast'. This is due to a lack of any information in the XML output which
-tells the parser the report is derived from a PSI-BLAST run vs. a normal BLAST
-run.
-
-There is one additional initialization flag from the SearchIO defaults. That is
-the B<-tempfile> flag. If specified as true, then the parser will write out each
-report to a temporary filehandle rather than holding the entire report as a
-string in memory. The reason this is done in the first place is NCBI reports
-have an uncessary E<lt>?xml version="1.0"?E<gt> at the beginning of each report
-and RPS-BLAST reports have an additional unnecessary RPS-BLAST tag at the top of
-each report. So we currently have implemented the work around by preparsing the
-file (yes it makes the process slower, but it works). We are open to suggestions
-on how to optimize this in the future.
-
-=head1 DEPENDENCIES
-
-In addition to parts of the Bio:: hierarchy, this module uses:
-
- XML::SAX
-
-It is also recommended that XML::SAX::ExpatXS be installed and made the default
-XML::SAX parser using , along with the Expat library () for faster parsing.
-XML::SAX::Expat is not recommended; XML::SAX::ExpatXS is considered the current
-replacement for XML::SAX:Expat and is actively being considered to replace
-XML::SAX::Expat. XML::SAX::Expat will work, but only if you have local copies of
-the NCBI BLAST DTDs. This is due to issues with NCBI's BLAST XML format. The
-DTDs and the web address to obtain them are:
-
- NCBI_BlastOutput.dtd
- NCBI_BlastOutput.mod.dtd
-
- http://www.ncbi.nlm.nih.gov/data_specs/dtd/
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to
-the Bioperl mailing list. Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/wiki/Mailing_lists - About the mailing lists
-
-=head2 Support
-
-Please direct usage questions or support issues to the mailing list:
-
-I<bioperl-l@bioperl.org>
-
-rather than to the module maintainer directly. Many experienced and
-reponsive experts will be able look at the problem and quickly
-address it. Please include a thorough description of the problem
-with code and data examples if at all possible.
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-of the bugs and their resolution. Bug reports can be submitted via the
-web:
-
- https://redmine.open-bio.org/projects/bioperl/
-
-=head1 AUTHOR - Jason Stajich
-
-Email jason-at-bioperl.org
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object methods.
-Internal methods are usually preceded with a _
-
-=cut
-
-# Let the code begin...
-
-package Bio::SearchIO::blastxml;
-use strict;
-# Object preamble - inherits from Bio::Root::Root
-
-use base qw(Bio::SearchIO);
-use Bio::Root::Root;
-use XML::SAX;
-use IO::File;
-use Bio::SearchIO::XML::BlastHandler;
-use Bio::SearchIO::IteratedSearchResultEventBuilder;
-
-our $DEBUG;
-
-my %VALID_TYPE = (
- 'BLAST' => 'Bio::SearchIO::XML::BlastHandler',
- 'PSIBLAST' => 'Bio::SearchIO::XML::PsiBlastHandler',
- 'PSI-BLAST' => 'Bio::SearchIO::XML::PsiBlastHandler'
- );
-
-# mapping of NCBI Blast terms to Bioperl hash keys
-
-=head2 new
-
- Title : new
- Usage : my $searchio = Bio::SearchIO->new(-format => 'blastxml',
- -file => 'filename',
- -tempfile => 1);
- Function: Initializes the object - this is chained through new in SearchIO
- Returns : Bio::SearchIO::blastxml object
- Args : One additional argument from the format and file/fh parameters.
- -tempfile => boolean. Defaults to false. Write out XML data
- to a temporary filehandle to send to PerlSAX parser.
-
-=cut
-
-=head2 _initialize
-
- Title : _initialize
- Usage : private
- Function: Initializes the object - this is chained through new in SearchIO
-
-=cut
-
-sub _initialize{
- my ($self,@args) = @_;
- $self->SUPER::_initialize(@args);
- my ($usetempfile, $blasttype,$xmlcompact) = $self->_rearrange([qw(
- TEMPFILE
- BLASTTYPE
- XMLCOMPACT)],@args);
- $blasttype ||= 'BLAST';
- $self->{_xml_compact} = $xmlcompact || 0;
- $self->blasttype(uc $blasttype);
- defined $usetempfile && $self->use_tempfile($usetempfile);
- $self->{_result_count} = 0;
- eval { require Time::HiRes };
- if( $@ ) { $DEBUG = 0; }
- $DEBUG = 1 if( ! defined $DEBUG && ($self->verbose > 0));
-}
-
-sub attach_EventHandler {
- my ($self,$handler) = @_;
-
- $self->SUPER::attach_EventHandler($handler);
-
- # Make sure if there is an XML parser present already, the internal Handler
- # is set
- if (exists $self->{'_xmlparser'}) {
- $self->{'_xmlparser'}->get_handler->eventHandler($handler);
- }
-
- # Optimization: caching the EventHandler since it is used a lot
- # during the parse.
-
- $self->{'_handler_cache'} = $handler;
- return;
-}
-
-=head2 next_result
-
- Title : next_result
- Usage : my $hit = $searchio->next_result;
- Function: Returns the next Result from a search
- Returns : Bio::Search::Result::ResultI object
- Args : none
-
-=cut
-
-sub next_result {
- my ($self) = @_;
-
- my $result;
-
- my ($tfh);
-
- # XMLCOMPACT
- # WU-BLAST has an XML_COMPACT option which needs to be preprocessed before
- # passing on to the parser.
- if ($self->{_xml_compact}) {
- $self->debug("XMLCOMPACT mode\n");
- my ($tfh2, $filename) = IO::File->new_tmpfile or $self->throw("Unable to open temp file: $!");
- $tfh2->autoflush(1);
- my $fh = $self->_fh;
- while (my $line = <$fh>) {
- $line =~ s/></>\n</g;
- print $tfh2 $line;
- }
- seek($tfh2,0,0);
- close $fh;
- # redirect self's IO to use new tempfile
- $self->_fh($tfh2);
- }
-
- if( $self->use_tempfile ) {
- $tfh = IO::File->new_tmpfile or $self->throw("Unable to open temp file: $!");
- $tfh->autoflush(1);
- }
-
- my $okaytoprocess = ($self->blasttype =~ /PSI/) ? $self->_chunk_psiblast($tfh) :
- $self->_chunk_normalblast($tfh);
-
- return unless( $okaytoprocess);
-
- my %parser_args;
- if( defined $tfh ) {
- seek($tfh,0,0);
- %parser_args = ('Source' => { 'ByteStream' => $tfh });
- } else {
- %parser_args = ('Source' => { 'String' => $self->{'_blastdata'} });
- }
-
- my $starttime;
- if( $DEBUG ) { $starttime = [ Time::HiRes::gettimeofday() ]; }
-
- eval {
- $result = $self->{'_xmlparser'}->parse(%parser_args);
- };
-
- if( $@ ) {
- $self->warn("error in parsing a report:\n $@");
- $result = undef;
- }
- if( $DEBUG ) {
- $self->debug( sprintf("parsing took %f seconds\n", Time::HiRes::tv_interval($starttime)));
- }
- # parsing magic here - but we call event handlers rather than
- # instantiating things
- if (defined $result) {
- # result count is handled here, as the BLASTXML reports are
- # broken up into smaller easier to digest bits
- $self->{_result_count}++;
- return $result;
- } else {
- return;
- }
-}
-
-=head2 result_count
-
- Title : result_count
- Usage : $num = $stream->result_count;
- Function: Gets the number of Blast results that have been successfully parsed
- at the point of the method call. This is not the total # of results
- in the file.
- Returns : integer
- Args : none
- Throws : none
-
-=cut
-
-sub result_count {
- my $self = shift;
- return $self->{_result_count};
-}
-
-=head2 use_tempfile
-
- Title : use_tempfile
- Usage : $obj->use_tempfile($newval)
- Function: Get/Set boolean flag on whether or not use a tempfile
- Example :
- Returns : value of use_tempfile
- Args : newvalue (optional)
-
-=cut
-
-sub use_tempfile{
- my ($self,$value) = @_;
- if( defined $value) {
- $self->{'_use_tempfile'} = $value;
- }
- return $self->{'_use_tempfile'};
-}
-
-=head2 blasttype
-
- Title : blasttype
- Usage : $obj->blasttype($newtype)
- Function: Get/Set BLAST report type.
- Returns : BLAST report type
- Args : case-insensitive string of types BLAST or PSIBLAST (default: BLAST)
- Note : this is used to determine how reports are 'chunked' (in cases
- where multiple queries are submitted) and which XML handler
- to use when parsing the report(s)
-
-=cut
-
-sub blasttype{
- my ($self,$value) = @_;
- if ($value) {
- $self->throw("$value is not a supported BLAST type") unless exists $VALID_TYPE{$value};
- my $ok;
- eval {
- $ok = $self->_load_module($VALID_TYPE{$value});
- };
- if ($@) {
- print STDERR <<END;
-$self: data module $VALID_TYPE{$value} cannot be found
-Exception $@
-For more information about the Bio::SearchIO::blastxml system please see the Bio::SearchIO::blastxml.
-END
- return unless $ok;
- }
- # BlastHandler does the heavy lifting
- my $xmlhandler = $VALID_TYPE{$value}->new(-verbose => $self->verbose);
-
- # The XML handler does the heavy work, passes data to object handler
- if ($value =~ /^PSI/) {
- my $handler = Bio::SearchIO::IteratedSearchResultEventBuilder->new();
- $self->{'_handler'} = $handler; # cache
- }
- $xmlhandler->eventHandler($self->_eventHandler());
-
- # start up the parser factory
- my $parserfactory = XML::SAX::ParserFactory->parser(
- Handler => $xmlhandler);
- $self->{'_xmlparser'} = $parserfactory;
- $self->saxparser(ref($parserfactory));
-
- $self->{'_blasttype'} = $value;
- }
- return $self->{'_blasttype'};
-}
-
-sub saxparser {
- my $self = shift;
- return ref($self->{'_xmlparser'});
-}
-
-sub _chunk_normalblast {
- my ($self, $tfh) = @_;
-
- local $/ = "\n";
- local $_;
- $self->{'_blastdata'} = '';
-
- my ($sawxmlheader, $okaytoprocess);
-
- my $mode = 'header';
-
- my $tail = << 'XML_END';
- </BlastOutput_iterations>
-</BlastOutput>
-XML_END
-
- # no buffering needed (famous last words...)
- my $fh = $self->_fh;
-
- #chop up XML into edible bits for the parser
- while( defined( my $line = <$fh>) ) {
- next if $line =~ m{^\s*</BlastOutput_iterations>}xmso || $line =~ m{^</BlastOutput>}xmso;
- if( $line =~ m{^RPS-BLAST}i ) {
- $self->{'_type'} = 'RPS-BLAST';
- next;
- } elsif ($line =~ m{^<\?xml\sversion="1.0"}xms) {# <?xml version="1.0"?> & <?xml version="1.0" encoding="UTF-8"?>
- delete $self->{'_header'} if exists $self->{'_header'};
- $sawxmlheader++;
- $mode = 'header';
- } elsif ($line =~ m{^\s*<Iteration>}xmso) {
- if (!$sawxmlheader) {
- if (defined $tfh) {
- print $tfh $self->{'_header'}
- } else {
- $self->{'_blastdata'} .= $self->{'_header'};
- }
- }
- $mode = 'iteration';
- } elsif ($line =~ m{^\s*</Iteration>}xmso) {
- if (defined $tfh) {
- print $tfh $line.$tail;
- } else {
- $self->{'_blastdata'} .= $line.$tail;
- }
- $okaytoprocess++;
- last;
- }
- if (defined $tfh) {
- print $tfh $line;
- } else {
- $self->{'_blastdata'} .= $line;
- }
- $self->{"_$mode"} .= $line if $mode eq 'header';
- }
- return $okaytoprocess;
-}
-
-sub _chunk_psiblast {
- my ($self, $tfh) = @_;
-
- local $/ = "\n";
- local $_;
- $self->{'_blastdata'} = '';
-
- my ($sawxmlheader, $okaytoprocess);
-
- # no buffering needed (famous last words...)
- my $fh = $self->_fh;
-
- #chop up XML into edible bits for the parser
- while( defined( my $line = <$fh>) ) {
- if (defined $tfh) {
- print $tfh $line;
- } else {
- $self->{'_blastdata'} .= $line;
- }
- #$self->{"_$mode"} .= $line;
- if ($line =~ m{^</BlastOutput>}xmso) {
- $okaytoprocess++;
- last;
- }
- }
- #$self->debug($self->{'_blastdata'}."\n");
- return $okaytoprocess;
-}
-
-1;
View
531 t/SearchIO/blastxml.t
@@ -1,531 +0,0 @@
-# -*-Perl-*- Test Harness script for Bioperl
-# $Id: SearchIO.t 14995 2008-11-16 06:20:00Z cjfields $
-
-use strict;
-
-BEGIN {
- use lib '.';
- use Bio::Root::Test;
-
- test_begin(-tests => 391,
- -requires_module => 'XML::SAX');
-
- use_ok('Bio::SearchIO');
-}
-
-my ($searchio, $result,$iter,$hit,$hsp);
-
-# XML encoding/decoding done within XML::SAX now, though some parsers
-# do not work properly (XML::SAX::PurePerl, XML::LibXML::SAX)
-
-eval {
- # test with RPSBLAST data first
- # this needs to be eval'd b/c the XML::SAX parser object is
- # instantiated in the constructor
- $searchio = Bio::SearchIO->new('-tempfile' => 1,
- '-format' => 'blastxml',
- '-file' => test_input_file('ecoli_domains.rps.xml'),
- '-blasttype' => 'blast',
- '-verbose' => -1);
- # PurePerl works with these BLAST reports, so removed verbose promotion
- $result = $searchio->next_result;
- die if !defined $result;
-};
-
-SKIP: {
- # this should be fixed with newer installations of XML::SAX::Expat, but as we
- # don't require a certain version (multiple backends can be used) we catch
- # and skip if needed
- if ($@ && $@ =~ m{Handler could not resolve external entity}) {
- skip("Older versions of XML::SAX::Expat may not work with XML tests; skipping",297);
- } elsif ($@) {
- skip("Problem with XML::SAX setup: $@. Check ParserDetails.ini; skipping XML tests",297);
- }
- is($searchio->result_count, 1);
-
- # basic ResultI data
- isa_ok($result, 'Bio::Search::Result::ResultI');
- is($result->database_name, '/data_2/jason/db/cdd/cdd/Pfam', 'database_name()');
- is($result->query_name,'gi|1786182|gb|AAC73112.1|','query_name()');
- is($result->query_description, '(AE000111) thr operon leader peptide [Escherichia coli]');
- is($result->query_accession, 'AAC73112.1');
- is($result->query_gi, 1786182);
- is($result->query_length, 21);
- is($result->algorithm, 'BLASTP');
- is($result->algorithm_version, 'blastp 2.1.3 [Apr-1-2001]');
-
- # check parameters
- is($result->available_parameters, 8);
- is($result->get_parameter('matrix'), 'BLOSUM62');
- float_is($result->get_parameter('expect'), '1e-05');
- is($result->get_parameter('include'), 0);
- is($result->get_parameter('match'), 0);
- is($result->get_parameter('mismatch'), 0);
- is($result->get_parameter('gapopen'), 11);
- is($result->get_parameter('gapext'), 1);
- is($result->get_parameter('filter'), 'F');
-
- # check statistics
- is($result->available_statistics, 5);
- is($result->database_entries, 0);
- is($result->database_letters, 0);
- is($result->get_statistic('hsplength'), 0);
- float_is($result->get_statistic('effectivespace'), 4.16497e+11);
- is($result->get_statistic('lambda'), 0.267);
- is($result->get_statistic('kappa'), 0.041);
- is($result->get_statistic('entropy'), 0.14);
-
- # this result actually has a hit
- $result = $searchio->next_result;
-
- # does the parser catch everything in the next result?
- is($result->database_name, '/data_2/jason/db/cdd/cdd/Pfam', 'database_name()');
- is($result->query_name,'gi|1786183|gb|AAC73113.1|');
- is($result->query_description, '(AE000111) aspartokinase I, homoserine dehydrogenase I [Escherichia coli]');
- is($result->query_accession, 'AAC73113.1');
- is($result->query_gi, 1786183);
- is($result->query_length, 820);
- is($result->algorithm, 'BLASTP');
- is($result->algorithm_version, 'blastp 2.1.3 [Apr-1-2001]');
-
- is($searchio->result_count, 2);
-
- # check parameters
- is($result->available_parameters, 8);
- is($result->get_parameter('matrix'), 'BLOSUM62');
- float_is($result->get_parameter('expect'), '1e-05');
- is($result->get_parameter('include'), 0);
- is($result->get_parameter('match'), 0);
- is($result->get_parameter('mismatch'), 0);
- is($result->get_parameter('gapopen'), 11);
- is($result->get_parameter('gapext'), 1);
- is($result->get_parameter('filter'), 'F');
-
- # check statistics
- is($result->available_statistics, 5);
- is($result->database_entries, 0);
- is($result->database_letters, 0);
- is($result->get_statistic('hsplength'), 0);
- float_is($result->get_statistic('effectivespace'), 3.82682e+07);
- is($result->get_statistic('lambda'), 0.267);
- is($result->get_statistic('kappa'), 0.041);
- is($result->get_statistic('entropy'), 0.14);
-
- $hit = $result->next_hit;
- is($hit->name, 'gnl|Pfam|pfam00742');
- is($hit->description(), 'HomoS_dh, HomoS dehydrogenase');
- is($hit->accession, 'pfam00742');
- is($hit->ncbi_gi, ''); # not found
- is($hit->length, 310);
-
- $hsp = $hit->next_hsp;
- is($hsp->query->seq_id, $result->query_name,'query name on HSP');
- is($hsp->query->seqdesc, $result->query_description,'query desc on HSP');
- is($hsp->hit->seq_id, $hit->name,'hitname');
- is($hsp->hit->seqdesc, $hit->description,'hitdesc');
- is($hsp->pvalue, undef);
- float_is($hsp->evalue, 1.46134e-90);
- is($hsp->score, 838);
- is($hsp->bits,327.405);
- is($hsp->query->start, 498);
- is($hsp->query->end,815);
- is($hsp->hit->start, 3);
- is($hsp->hit->end, 310);
- is($hsp->query->frame,0);
- is($hsp->hit->frame,0);
- is(sprintf("%.2f", $hsp->percent_identity), 37.73);
- is(sprintf("%.4f", $hsp->frac_identical('hit')), 0.3994);
- is(sprintf("%.4f", $hsp->frac_identical('query')), 0.3868);
- is(sprintf("%.4f",$hsp->query->frac_identical), 0.3868);
-
- is(sprintf("%.4f",$hsp->frac_conserved('total')),0.5245);
- is(sprintf("%.4f",$hsp->frac_conserved('hit')),0.5552);
- is(sprintf("%.4f",$hsp->frac_conserved('query')),0.5377);
- # gaps should match calculated sequence indices for gaps and vice versa
- is($hsp->gaps('total'), $hsp->seq_inds('hit', 'gaps') + $hsp->seq_inds('query', 'gaps'));
- is($hsp->gaps('hit'), $hsp->seq_inds('hit', 'gaps'));
- is($hsp->gaps('query'), $hsp->seq_inds('query', 'gaps'));
- is($hsp->length('total'), 326);
- is($hsp->query_string, 'LRVCGVANSKALLTNVHGLNLENWQEELAQAKEPF-NLGRLIRLVKEYHLLN----PVIVDCTSSQAVAD-QYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDE-GMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARET-GRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLS');
- is($hsp->hit_string, 'GVVTGITDSREMLLSRIGLPLEIWKVALRDLEKPRKDLGKLDLTDDAFAVVDDPDIDVVVELTGGIEVARELYLDALEEGKHVVTANKALNASHGDEYLAL---AEKSGVDVLYEAAVAGGIPIIKTLRELLATGDRILKIEGIFNGTTNFILSEMDEKGLPFSDVLAEAQELGYTEADPRDDVEGIDAARKLAILARIAFGIELELDDVYVEGISPITAEDISSADEFGYTLKLLDEAMRQRVEDAESGGEVLRYPTLIPE-------------DHPLASVKGSDNAVAVEGEAYG--PLMFYGPGAGAEPTASAVVADIVRIAR');
- is($hsp->homology_string, ' V G+ +S+ +L + GL LE W+ L ++P +LG+L + +++ V+V+ T VA Y D L EG HVVT NK N S D Y L AEKS LY+ V G+P+I+ L+ LL GD ++K GI +G+ ++I ++DE G+ FS+ A+E+GYTE DPRDD+ G+D ARKL ILAR G ELEL D+ +E + P F L LD+ RV A G+VLRY I E + PL VK +NA+A Y PL+ G GAG + TA+ V AD++R ');
- is(join(' ', $hsp->seq_inds('query', 'gap',1)), '532 548 562 649 690');
- is($hsp->ambiguous_seq_inds, '');
-
- # one more
- $hit = $result->next_hit;
- isa_ok($hit,'Bio::Search::Hit::HitI');
-
- my $results_left = 8;
- while( $result = $searchio->next_result ) { ok($result); $results_left--; }
- is($results_left, 0);
-
- $searchio = Bio::SearchIO->new(-format => 'blastxml',
- -verbose => -1,
- -file => test_input_file('plague_yeast.bls.xml'));
-
- $result = $searchio->next_result;
- is($searchio->result_count, 1);
- is($result->database_name, 'yeast.aa');
- is($result->query_name, 'gi|5763811|emb|CAB53164.1|');
- is($result->query_description, 'putative transposase [Yersinia pestis]');
- is($result->query_accession, 'CAB53164.1');
- is($result->query_gi, 5763811);
- is($result->query_length, 340);
-
- $hit = $result->next_hit;
- ok(! $hit);
-
- $searchio = Bio::SearchIO->new(-format => 'blastxml',
- -verbose => -1,
- -file => test_input_file('mus.bls.xml'));
-
- $result = $searchio->next_result;
- is($searchio->result_count, 1);
- is($result->database_name,'Hs15_up1000');
- is($result->query_name,'NM_011441_up_1000_chr1_4505586_r');
- is($result->query_description,'chr1:4505586-4506585');
- is($result->query_accession,'NM_011441_up_1000_chr1_4505586_r');
- is($result->query_gi, '');
- is($result->query_length,'1000');
-
- # check parameters
- is($result->available_parameters, 6);
- is($result->get_parameter('matrix'), undef); # not set
- float_is($result->get_parameter('expect'), 10);
- is($result->get_parameter('include'), undef); # not set
- is($result->get_parameter('match'), 1);
- is($result->get_parameter('mismatch'), -3);
- is($result->get_parameter('gapopen'), 5);
- is($result->get_parameter('gapext'), 2);
- is($result->get_parameter('filter'), 'D');
-
- # check statistics
- is($result->available_statistics, 5);
- is($result->database_entries, 17516);
- is($result->database_letters, 17516000);
- is($result->get_statistic('hsplength'), 0);
- float_is($result->get_statistic('effectivespace'), 1.69255e+10);
- is($result->get_statistic('lambda'), 1.37407);
- is($result->get_statistic('kappa'), 0.710605);
- is($result->get_statistic('entropy'), 1.30725);
-
- $hit = $result->next_hit;
- is($hit->name,'NM_001938_up_1000_chr1_93161154_f');
- is($hit->description,'chr1:93161154-93162153');
- is($hit->ncbi_gi, ''); # none reported
- is($hit->accession,'3153');
- is($hit->length,'1000');
-
- # deal with new BLAST XML changes
- $searchio = Bio::SearchIO->new(-format => 'blastxml',
- -verbose => -1,
- -file => test_input_file('newblast.xml'));
-
- $result = $searchio->next_result;
- is($searchio->result_count, 1);
- is($result->database_name,'nr');
- is($result->algorithm,'BLASTP');
- is($result->algorithm_version,'BLASTP 2.2.15 [Oct-15-2006]');
- # some XML::SAX parsers (PurePerl, XML::SAX::LibXML) don't decode entities
- # properly, not fixable using decode_entities()
- like($result->algorithm_reference, qr{Nucleic Acids Res} );
- is($result->query_name,'gi|15600734|ref|NP_254228.1|');
- is($result->query_description,'dihydroorotase [Pseudomonas aeruginosa PAO1]');
- is($result->query_accession,'NP_254228.1');
- is($result->query_gi, 15600734);
- is($result->query_length,'445');
-
- # check parameters
- is($result->available_parameters, 4);
- is($result->get_parameter('matrix'), 'BLOSUM62');
- float_is($result->get_parameter('expect'), 10);
- is($result->get_parameter('include'), undef); # not set
- is($result->get_parameter('match'), undef); # not set
- is($result->get_parameter('mismatch'), undef);# not set
- is($result->get_parameter('gapopen'), 11);
- is($result->get_parameter('gapext'), 1);
- is($result->get_parameter('filter'), undef); # not set
-
- # check statistics
- is($result->available_statistics, 5);
- is($result->database_entries, 4299737);
- is($result->database_letters, 1479795817);
- is($result->get_statistic('hsplength'), 0);
- float_is($result->get_statistic('effectivespace'), 0);
- is($result->get_statistic('lambda'), 0.267);
- is($result->get_statistic('kappa'), 0.041);
- is($result->get_statistic('entropy'), 0.14);
-
- $hit = $result->next_hit;
- is($hit->name,'gi|15600734|ref|NP_254228.1|');
- is($hit->description,'dihydroorotase [Pseudomonas aeruginosa PAO1] '.
- '>gi|107104643|ref|ZP_01368561.1| hypothetical protein PaerPA_01005722 '.
- '[Pseudomonas aeruginosa PACS2] >gi|9951880|gb|AAG08926.1|AE004966_8 '.
- 'dihydroorotase [Pseudomonas aeruginosa PAO1]');
- is($hit->accession,'NP_254228');
- is($hit->length,'445');
- $hsp = $hit->next_hsp;
- is($hsp->query->seq_id, $result->query_name,'query name on HSP');
- is($hsp->query->seqdesc, $result->query_description,'query desc on HSP');
- is($hsp->hit->seq_id, $hit->name,'hitname');
- is($hsp->hit->seqdesc, $hit->description,'hitdesc');
- is($hsp->pvalue, undef);
- float_is($hsp->evalue, 0);
- is($hsp->score, 2251);
- is($hsp->bits,871.692);
- is($hsp->query->start, 1);
- is($hsp->query->end,445);
- is($hsp->hit->start, 1);
- is($hsp->hit->end, 445);
- is($hsp->query->frame,0);
- is($hsp->hit->frame,0);
-
- $result = $searchio->next_result;
- is($searchio->result_count, 2);
- is($result->database_name,'nr');
- is($result->algorithm,'BLASTP');
- is($result->algorithm_version,'BLASTP 2.2.15 [Oct-15-2006]');
- like($result->algorithm_reference, qr{Nucleic Acids Res} );
- is($result->query_name,'gi|15598723|ref|NP_252217.1|');
- is($result->query_description,'dihydroorotase [Pseudomonas aeruginosa PAO1]');
- is($result->query_accession,'NP_252217.1');
- is($result->query_gi, 15598723);
- is($result->query_length,'348');
-
- # check parameters
- is($result->available_parameters, 4);
- is($result->get_parameter('matrix'), 'BLOSUM62');
- float_is($result->get_parameter('expect'), 10);
- is($result->get_parameter('include'), undef); # not set
- is($result->get_parameter('match'), undef); # not set
- is($result->get_parameter('mismatch'), undef);# not set
- is($result->get_parameter('gapopen'), 11);
- is($result->get_parameter('gapext'), 1);
- is($result->get_parameter('filter'), undef); # not set
-
- # check statistics
- is($result->available_statistics, 5);
- is($result->database_entries, 4299737);
- is($result->database_letters, 1479795817);
- is($result->get_statistic('hsplength'), 0);
- float_is($result->get_statistic('effectivespace'), 0);
- is($result->get_statistic('lambda'), 0.267);
- is($result->get_statistic('kappa'), 0.041);
- is($result->get_statistic('entropy'), 0.14);
-
- $hit = $result->next_hit;
- is($hit->name,'gi|15598723|ref|NP_252217.1|');
- is($hit->description,'dihydroorotase [Pseudomonas aeruginosa PAO1] '.
- '>gi|6226683|sp|P72170|PYRC_PSEAE Dihydroorotase (DHOase) '.
- '>gi|9949676|gb|AAG06915.1|AE004773_4 dihydroorotase [Pseudomonas aeruginosa PAO1] '.
- '>gi|3868712|gb|AAC73109.1| dihydroorotase [Pseudomonas aeruginosa]');
- is($hit->ncbi_gi, 15598723);
- is($hit->accession,'NP_252217');
- is($hit->length,'348');
- $hsp = $hit->next_hsp;
- is($hsp->query->seq_id, $result->query_name,'query name on HSP');
- is($hsp->query->seqdesc, $result->query_description,'query desc on HSP');
- is($hsp->hit->seq_id, $hit->name,'hitname');
- is($hsp->hit->seqdesc, $hit->description,'hitdesc');
- is($hsp->pvalue, undef);
- float_is($hsp->evalue, 0);
- is($hsp->score, 1780);
- is($hsp->bits,690.263);
- is($hsp->query->start, 1);
- is($hsp->query->end,348);
- is($hsp->hit->start, 1);
- is($hsp->hit->end, 348);
- is($hsp->query->frame,0);
- is($hsp->hit->frame,0);
-
- # PSIBLAST XML parsing
-
- $searchio = Bio::SearchIO->new('-tempfile' => 1,
- '-format' => 'blastxml',
- '-file' => test_input_file('psiblast.xml'),
- '-blasttype' => 'psiblast');
-
- $result = $searchio->next_result;
- is($searchio->result_count, 1);
- is($result->database_name, 'AL591824.faa');
- is($result->algorithm, 'BLASTP');
- like($result->algorithm_version, qr/2\.2\.16/);
- is($result->query_name, 'gi|1373160|gb|AAB57770.1|');
- is($result->query_accession, 'AAB57770.1');
- is($result->query_gi, '1373160');
- is($result->query_length, 173);
-
- # check parameters
- is($result->available_parameters, 6);
- is($result->get_parameter('matrix'), 'BLOSUM62');
- float_is($result->get_parameter('expect'), 10);
- is($result->get_parameter('include'), 0.002);
- is($result->get_parameter('match'), undef); # not set
- is($result->get_parameter('mismatch'), undef);# not set
- is($result->get_parameter('gapopen'), 11);
- is($result->get_parameter('gapext'), 1);
- is($result->get_parameter('filter'), 'F');
-
- # check statistics
- is($result->available_statistics, 5);
- is($result->database_entries, 2846);
- is($result->database_letters, 870878);
- is($result->get_statistic('hsplength'), 75);
- float_is($result->get_statistic('effectivespace'), 6.44279e+07);
- is($result->get_statistic('lambda'), 0.267);
- is($result->get_statistic('kappa'), 0.0475563);
- is($result->get_statistic('entropy'), 0.14);
-
- my $iter_count = 0;
- my @valid_hit_data = ( [ 'gi|16411294|emb|CAC99918.1|', 183, 'CAC99918', 16411294, '4.5377e-56', 209.92],
- [ 'gi|16409584|emb|CAD00746.1|', 648, 'CAD00746', 16409584, '0.000286309', 37.7354],
- [ 'gi|16411285|emb|CAC99909.1|', 209, 'CAC99909', 16411285, '0.107059', 29.261]);
- my @valid_iter_data = ( [ 16, 16, 0, 2, 14, 0, 0, 0, 0],
- [ 16, 8, 8, 0, 8, 0, 2, 0, 6]);
-
- while (my $iter = $result->next_iteration) {
- $iter_count++;
- my $di = shift @valid_iter_data;
- is($iter->number, $iter_count);
- is($iter->num_hits, shift @$di);
- is($iter->num_hits_new, shift @$di);
- is($iter->num_hits_old, shift @$di);
- is(scalar($iter->newhits_below_threshold), shift @$di);
- is(scalar($iter->newhits_not_below_threshold), shift @$di);
- is(scalar($iter->newhits_unclassified), shift @$di);
- is(scalar($iter->oldhits_below_threshold), shift @$di);
- is(scalar($iter->oldhits_newly_below_threshold), shift @$di);
- is(scalar($iter->oldhits_not_below_threshold), shift @$di);
- my $hit_count = 0;
- if ($iter_count == 1) {
- while( my $hit = $result->next_hit ) {
- my $d = shift @valid_hit_data;
- is($hit->name, shift @$d);
- is($hit->length, shift @$d);
- is($hit->accession, shift @$d);
- is($hit->ncbi_gi, shift @$d);
- float_is($hit->significance, shift @$d);
- is($hit->bits, shift @$d );
- if( $hit_count == 1 ) {
- my $hsps_left = 1;
- while( my $hsp = $hit->next_hsp ){
- is($hsp->query->start, 4);
- is($hsp->query->end, 155);
- is($hsp->hit->start, 475);
- is($hsp->hit->end, 617);
- is($hsp->length('total'), 153);
- is($hsp->start('hit'), $hsp->hit->start);
- is($hsp->end('query'), $hsp->query->end);
- is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
- float_is($hsp->evalue, 0.000286309);
- is($hsp->score, 86);
- is($hsp->bits, 37.7354);
- is(sprintf("%.1f",$hsp->percent_identity), 20.9);
- is(sprintf("%.4f",$hsp->frac_identical('query')), 0.2105);
- is(sprintf("%.3f",$hsp->frac_identical('hit')), 0.224);
- is($hsp->gaps('total'), 11);
- $hsps_left--;
- }
- is($hsps_left, 0);
- }
- last if( $hit_count++ > @valid_hit_data );
- }
- }
- }
- is(@valid_hit_data, 0);
- is(@valid_iter_data, 0);
- is($iter_count, 2);
-
- $result = $searchio->next_result;
- is($searchio->result_count, 2);
- is($result->database_name, 'AL591824.faa');
- is($result->algorithm, 'BLASTP');
- like($result->algorithm_version, qr/2\.2\.16/);
- is($result->query_name, 'gi|154350371|gb|ABS72450.1|');
- is($result->query_accession, 'ABS72450.1');
- is($result->query_gi, '154350371');
- is($result->query_length, 378);
-
- # check parameters
- is($result->available_parameters, 6);
- is($result->get_parameter('matrix'), 'BLOSUM62');
- float_is($result->get_parameter('expect'), 10);
- is($result->get_parameter('include'), 0.002);
- is($result->get_parameter('match'), undef); # not set
- is($result->get_parameter('mismatch'), undef);# not set
- is($result->get_parameter('gapopen'), 11);
- is($result->get_parameter('gapext'), 1);
- is($result->get_parameter('filter'), 'F');
-
- # check statistics
- is($result->available_statistics, 5);
- is($result->database_entries, 2846);
- is($result->database_letters, 870878);
- is($result->get_statistic('hsplength'), 82);
- float_is($result->get_statistic('effectivespace'), 1.88702e+08);
- is($result->get_statistic('lambda'), 0.267);
- is($result->get_statistic('kappa'), 0.0450367);
- is($result->get_statistic('entropy'), 0.14);
-
- $iter_count = 0;
-
- @valid_hit_data = ( [ 'gi|16409361|emb|CAC98217.1|', 381, 'CAC98217', 16409361, '5.57178e-119', 420.239],
- [ 'gi|16409959|emb|CAC98662.1|', 776, 'CAC98662', 16409959, '0.0242028', 32.7278],
- [ 'gi|16410942|emb|CAC99591.1|', 382, 'CAC99591', 16410942, '0.340848', 28.8758]);
- @valid_iter_data = ( [ 11, 11, 0, 1, 10, 0, 0, 0, 0],
- [ 19, 11, 8, 0, 11, 0, 1, 0, 7]);
-
- while (my $iter = $result->next_iteration) {
- $iter_count++;
- my $di = shift @valid_iter_data;
- is($iter->number, $iter_count);
- is($iter->num_hits, shift @$di);
- is($iter->num_hits_new, shift @$di);
- is($iter->num_hits_old, shift @$di);
- is(scalar($iter->newhits_below_threshold), shift @$di);
- is(scalar($iter->newhits_not_below_threshold), shift @$di);
- is(scalar($iter->newhits_unclassified), shift @$di);
- is(scalar($iter->oldhits_below_threshold), shift @$di);
- is(scalar($iter->oldhits_newly_below_threshold), shift @$di);
- is(scalar($iter->oldhits_not_below_threshold), shift @$di);
- my $hit_count = 0;
- if ($iter_count == 1) {
- while( my $hit = $result->next_hit ) {
- my $d = shift @valid_hit_data;
- is($hit->name, shift @$d);
- is($hit->length, shift @$d);
- is($hit->accession, shift @$d);
- is($hit->ncbi_gi, shift @$d);
- float_is($hit->significance, shift @$d);
- is($hit->bits, shift @$d );
- if( $hit_count == 1 ) {
- my $hsps_left = 1;
- while( my $hsp = $hit->next_hsp ){
- is($hsp->query->start, 63);
- is($hsp->query->end, 181);
- is($hsp->hit->start, 304);
- is($hsp->hit->end, 432);
- is($hsp->length('total'), 129);
- is($hsp->start('hit'), $hsp->hit->start);
- is($hsp->end('query'), $hsp->query->end);
- is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
- float_is($hsp->evalue, 0.0242028);
- is($hsp->score, 73);
- is($hsp->bits, 32.7278);
- is(sprintf("%.1f",$hsp->percent_identity), '24.0');
- is(sprintf("%.4f",$hsp->frac_identical('query')), '0.2605');
- is(sprintf("%.3f",$hsp->frac_identical('hit')), '0.240');
- is($hsp->gaps, 10);
- $hsps_left--;
- }
- is($hsps_left, 0);
- }
- last if( $hit_count++ > @valid_hit_data );
- }
- }
- }
- is(@valid_hit_data, 0);
- is(@valid_iter_data, 0);
- is($iter_count, 2);
-}
View
610 t/data/ecoli_domains.rps.xml
@@ -1,610 +0,0 @@
-RPS-BLAST 2.1.3 [Apr-1-2001]
-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
- <BlastOutput_program>blastp</BlastOutput_program>
- <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
- <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
- <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
- <BlastOutput_query-ID>lcl|QUERY_0</BlastOutput_query-ID>
- <BlastOutput_query-def>gi|1786182|gb|AAC73112.1| (AE000111) thr operon leader peptide [Escherichia coli]</BlastOutput_query-def>
- <BlastOutput_query-len>21</BlastOutput_query-len>
- <BlastOutput_param>
- <Parameters>
- <Parameters_matrix>BLOSUM62</Parameters_matrix>
- <Parameters_expect>1e-05</Parameters_expect>
- <Parameters_include>0</Parameters_include>
- <Parameters_sc-match>0</Parameters_sc-match>
- <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
- <Parameters_gap-open>11</Parameters_gap-open>
- <Parameters_gap-extend>1</Parameters_gap-extend>
- <Parameters_filter>F</Parameters_filter>
- </Parameters>
- </BlastOutput_param>
- <BlastOutput_iterations>
- <Iteration>
- <Iteration_iter-num>1</Iteration_iter-num>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>0</Statistics_hsp-len>
- <Statistics_eff-space>4.16497e+11</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- </Iteration>
- </BlastOutput_iterations>
-</BlastOutput>
-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
- <BlastOutput_program>blastp</BlastOutput_program>
- <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
- <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
- <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
- <BlastOutput_query-ID>lcl|QUERY_1</BlastOutput_query-ID>
- <BlastOutput_query-def>gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I, homoserine dehydrogenase I [Escherichia coli]</BlastOutput_query-def>
- <BlastOutput_query-len>820</BlastOutput_query-len>
- <BlastOutput_param>
- <Parameters>
- <Parameters_matrix>BLOSUM62</Parameters_matrix>
- <Parameters_expect>1e-05</Parameters_expect>
- <Parameters_include>0</Parameters_include>
- <Parameters_sc-match>0</Parameters_sc-match>
- <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
- <Parameters_gap-open>11</Parameters_gap-open>
- <Parameters_gap-extend>1</Parameters_gap-extend>
- <Parameters_filter>F</Parameters_filter>
- </Parameters>
- </BlastOutput_param>
- <BlastOutput_iterations>
- <Iteration>
- <Iteration_iter-num>1</Iteration_iter-num>
- <Iteration_hits>
- <Hit>
- <Hit_num>1</Hit_num>
- <Hit_id>gnl|Pfam|pfam00742</Hit_id>
- <Hit_def>HomoS_dh, HomoS dehydrogenase</Hit_def>
- <Hit_accession>pfam00742</Hit_accession>
- <Hit_len>310</Hit_len>
- <Hit_hsps>
- <Hsp>
- <Hsp_num>1</Hsp_num>
- <Hsp_bit-score>327.405</Hsp_bit-score>
- <Hsp_score>838</Hsp_score>
- <Hsp_evalue>1.46134e-90</Hsp_evalue>
- <Hsp_query-from>498</Hsp_query-from>
- <Hsp_query-to>815</Hsp_query-to>
- <Hsp_hit-from>3</Hsp_hit-from>
- <Hsp_hit-to>310</Hsp_hit-to>
- <Hsp_pattern-from>0</Hsp_pattern-from>
- <Hsp_pattern-to>0</Hsp_pattern-to>
- <Hsp_query-frame>1</Hsp_query-frame>
- <Hsp_hit-frame>1</Hsp_hit-frame>
- <Hsp_identity>123</Hsp_identity>
- <Hsp_positive>171</Hsp_positive>
- <Hsp_gaps>26</Hsp_gaps>
- <Hsp_align-len>326</Hsp_align-len>
- <Hsp_density>0</Hsp_density>
- <Hsp_qseq>LRVCGVANSKALLTNVHGLNLENWQEELAQAKEPF-NLGRLIRLVKEYHLLN----PVIVDCTSSQAVAD-QYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDE-GMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARET-GRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLS</Hsp_qseq>
- <Hsp_hseq>GVVTGITDSREMLLSRIGLPLEIWKVALRDLEKPRKDLGKLDLTDDAFAVVDDPDIDVVVELTGGIEVARELYLDALEEGKHVVTANKALNASHGDEYLAL---AEKSGVDVLYEAAVAGGIPIIKTLRELLATGDRILKIEGIFNGTTNFILSEMDEKGLPFSDVLAEAQELGYTEADPRDDVEGIDAARKLAILARIAFGIELELDDVYVEGISPITAEDISSADEFGYTLKLLDEAMRQRVEDAESGGEVLRYPTLIPE-------------DHPLASVKGSDNAVAVEGEAYG--PLMFYGPGAGAEPTASAVVADIVRIAR</Hsp_hseq>
- <Hsp_midline> V G+ +S+ +L + GL LE W+ L ++P +LG+L + +++ V+V+ T VA Y D L EG HVVT NK N S D Y L AEKS LY+ V G+P+I+ L+ LL GD ++K GI +G+ ++I ++DE G+ FS+ A+E+GYTE DPRDD+ G+D ARKL ILAR G ELEL D+ +E + P F L LD+ RV A G+VLRY I E + PL VK +NA+A Y PL+ G GAG + TA+ V AD++R </Hsp_midline>
- </Hsp>
- </Hit_hsps>
- </Hit>
- <Hit>
- <Hit_num>2</Hit_num>
- <Hit_id>gnl|Pfam|pfam00696</Hit_id>
- <Hit_def>aakinase, Amino acid kinase family. This family includes kinases that phosphorylate a variety of amino acid substrates, as well as uridylate kinase and carbamate kinase. This family includes: Aspartokinase EC:2.7.2.4, AcetylE kinase EC:2.7.2.8, E 5-kinase EC:2.7.2.11, Uridylate kinase EC:2.7.4.-, Carbamate kinase EC:2.7.2.2</Hit_def>
- <Hit_accession>pfam00696</Hit_accession>
- <Hit_len>236</Hit_len>
- <Hit_hsps>
- <Hsp>
- <Hsp_num>1</Hsp_num>
- <Hsp_bit-score>123.25</Hsp_bit-score>
- <Hsp_score>308</Hsp_score>
- <Hsp_evalue>4.18565e-29</Hsp_evalue>
- <Hsp_query-from>1</Hsp_query-from>
- <Hsp_query-to>284</Hsp_query-to>
- <Hsp_hit-from>2</Hsp_hit-from>
- <Hsp_hit-to>236</Hsp_hit-to>
- <Hsp_pattern-from>0</Hsp_pattern-from>
- <Hsp_pattern-to>0</Hsp_pattern-to>
- <Hsp_query-frame>1</Hsp_query-frame>
- <Hsp_hit-frame>1</Hsp_hit-frame>
- <Hsp_identity>75</Hsp_identity>
- <Hsp_positive>115</Hsp_positive>
- <Hsp_gaps>71</Hsp_gaps>
- <Hsp_align-len>295</Hsp_align-len>
- <Hsp_density>0</Hsp_density>
- <Hsp_qseq>MRVLKFGGTSVANA--ERFLRVADILESNARQG-QVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAV--GHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSY------FGAKVLHPRTITPIAQFQIPCLIKN</Hsp_qseq>
- <Hsp_hseq>RIVIKLGGSALSDEDDERLERVAEEIAKLREEGREVVVVHGGGPQVGRLLLKL-----------------------------AKKPGSRVTDAATL--------------------------DALGAVGEGLSGALLSAALEAPGIPAGQLSGTEDFGIDAEGRGGNAVVDSVGVEKEAIEELLEKGKIPIVAGGGGVPV-----TLGRGDSDTAAALLAALLKADLLIILTDVDGVYTADPKKVPDAKLIDELSYEEALELAGGESGFGTGGMVPKVRAAILAVRSGIPVIITN</Hsp_hseq>
- <Hsp_midline> V+K GG+++++ ER RVA+ + +G +V V ++ L+ + A +PG + T AL GE +S A+++ LEA G + E G + VD + + + ++AG LGR SD +AA+LAA L+AD I TDVDGVYT DP++VPDA+L+ +SY+EA+EL+ G V R + IP +I N</Hsp_midline>
- </Hsp>
- </Hit_hsps>
- </Hit>
- </Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>0</Statistics_hsp-len>
- <Statistics_eff-space>3.82682e+07</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- </Iteration>
- </BlastOutput_iterations>
-</BlastOutput>
-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
- <BlastOutput_program>blastp</BlastOutput_program>
- <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
- <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
- <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
- <BlastOutput_query-ID>lcl|QUERY_2</BlastOutput_query-ID>
- <BlastOutput_query-def>gi|1786184|gb|AAC73114.1| (AE000111) homoserine kinase [Escherichia coli]</BlastOutput_query-def>
- <BlastOutput_query-len>310</BlastOutput_query-len>
- <BlastOutput_param>
- <Parameters>
- <Parameters_matrix>BLOSUM62</Parameters_matrix>
- <Parameters_expect>1e-05</Parameters_expect>
- <Parameters_include>0</Parameters_include>
- <Parameters_sc-match>0</Parameters_sc-match>
- <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
- <Parameters_gap-open>11</Parameters_gap-open>
- <Parameters_gap-extend>1</Parameters_gap-extend>
- <Parameters_filter>F</Parameters_filter>
- </Parameters>
- </BlastOutput_param>
- <BlastOutput_iterations>
- <Iteration>
- <Iteration_iter-num>1</Iteration_iter-num>
- <Iteration_hits>
- <Hit>
- <Hit_num>1</Hit_num>
- <Hit_id>gnl|Pfam|pfam00288</Hit_id>
- <Hit_def>GHMP_kinases, GHMP kinases putative ATP-binding protein</Hit_def>
- <Hit_accession>pfam00288</Hit_accession>
- <Hit_len>73</Hit_len>
- <Hit_hsps>
- <Hsp>
- <Hsp_num>1</Hsp_num>
- <Hsp_bit-score>55.4546</Hsp_bit-score>
- <Hsp_score>132</Hsp_score>
- <Hsp_evalue>4.05207e-09</Hsp_evalue>
- <Hsp_query-from>69</Hsp_query-from>
- <Hsp_query-to>132</Hsp_query-to>
- <Hsp_hit-from>1</Hsp_hit-from>
- <Hsp_hit-to>73</Hsp_hit-to>
- <Hsp_pattern-from>0</Hsp_pattern-from>
- <Hsp_pattern-to>0</Hsp_pattern-to>
- <Hsp_query-frame>1</Hsp_query-frame>
- <Hsp_hit-frame>1</Hsp_hit-frame>
- <Hsp_identity>22</Hsp_identity>
- <Hsp_positive>34</Hsp_positive>
- <Hsp_gaps>9</Hsp_gaps>
- <Hsp_align-len>73</Hsp_align-len>
- <Hsp_density>0</Hsp_density>
- <Hsp_qseq>WERFCQELGK----QIPVAMTLEKNMPIGSGLGSSAC-SVVAALMAMNE----HCGKPLNDTRLLALMGELEG</Hsp_qseq>
- <Hsp_hseq>WANYLKGGLKVIQPLPGLDVVISSNIPLGSGLGSSAAIAVVAGAVLANEFVAGLNGLKLSLANIQHLENQFEG</Hsp_hseq>
- <Hsp_midline>W + + K + + + N+P+GSGLGSSA +VVA + NE G L+ + L + EG</Hsp_midline>
- </Hsp>
- </Hit_hsps>
- </Hit>
- </Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>0</Statistics_hsp-len>
- <Statistics_eff-space>3.85928e+07</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- </Iteration>
- </BlastOutput_iterations>
-</BlastOutput>
-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
- <BlastOutput_program>blastp</BlastOutput_program>
- <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
- <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
- <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
- <BlastOutput_query-ID>lcl|QUERY_3</BlastOutput_query-ID>
- <BlastOutput_query-def>gi|1786185|gb|AAC73115.1| (AE000111) threonine synthase [Escherichia coli]</BlastOutput_query-def>
- <BlastOutput_query-len>428</BlastOutput_query-len>
- <BlastOutput_param>
- <Parameters>
- <Parameters_matrix>BLOSUM62</Parameters_matrix>
- <Parameters_expect>1e-05</Parameters_expect>
- <Parameters_include>0</Parameters_include>
- <Parameters_sc-match>0</Parameters_sc-match>
- <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
- <Parameters_gap-open>11</Parameters_gap-open>
- <Parameters_gap-extend>1</Parameters_gap-extend>
- <Parameters_filter>F</Parameters_filter>
- </Parameters>
- </BlastOutput_param>
- <BlastOutput_iterations>
- <Iteration>
- <Iteration_iter-num>1</Iteration_iter-num>
- <Iteration_hits>
- <Hit>
- <Hit_num>1</Hit_num>
- <Hit_id>gnl|Pfam|pfam00291</Hit_id>
- <Hit_def>PALP, Pyridoxal-phosphate dependent enzyme. Members of this family are all pyridoxal-phosphate dependent enzymes. This family includes: S dehydratase EC:4.2.1.13 P20132, threonine dehydratase EC:4.2.1.16, tryptophan synthase beta chain EC:4.2.1.20, threonine synthase EC:4.2.99.2, cysteine synthase EC:4.2.99.8 P11096, cystathionine beta-synthase EC:4.2.1.22, 1-aminocyclopropane-1-carboxylate deaminase EC:4.1.99.4</Hit_def>
- <Hit_accession>pfam00291</Hit_accession>
- <Hit_len>298</Hit_len>
- <Hit_hsps>
- <Hsp>
- <Hsp_num>1</Hsp_num>
- <Hsp_bit-score>129.028</Hsp_bit-score>
- <Hsp_score>323</Hsp_score>
- <Hsp_evalue>3.98147e-31</Hsp_evalue>
- <Hsp_query-from>96</Hsp_query-from>
- <Hsp_query-to>363</Hsp_query-to>
- <Hsp_hit-from>28</Hsp_hit-from>
- <Hsp_hit-to>280</Hsp_hit-to>
- <Hsp_pattern-from>0</Hsp_pattern-from>
- <Hsp_pattern-to>0</Hsp_pattern-to>
- <Hsp_query-frame>1</Hsp_query-frame>
- <Hsp_hit-frame>1</Hsp_hit-frame>
- <Hsp_identity>69</Hsp_identity>
- <Hsp_positive>101</Hsp_positive>
- <Hsp_gaps>19</Hsp_gaps>
- <Hsp_align-len>270</Hsp_align-len>
- <Hsp_density>0</Hsp_density>
- <Hsp_qseq>LELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQL-VVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMREL-KELGYTSEPHAAVAYRALRD</Hsp_qseq>
- <Hsp_hseq>LEDLN-PTGSFKDRGA-LNMILLAEKLGKKGGIVPGATSGNTGIALAYA-AALLGLKCTIVMPAT-TSREKVAQLRALGAENIVVPVVGGFDDLADAVKKALELAEENPK-NAYLVNQ-FDNPANVEA-GQKTIGLEIWEQLGGKPDAVVVPVGGGGTIAGIARYLKELLPVKVIGVEPEGSAVLSGFLKPG--SPVTLPETLSIAIGLGVPFVFPILDELL--------DDEVVTVTDEEALEAARLLAREEGIFVEPSSGAAVAAALK</Hsp_hseq>
- <Hsp_midline>LE + PT +FKD G +L G K + ATSG+TG A+A+A L +K I+ P S + LG V + G FD VK+A + E N + + ++ ++ + V VP G G + LPVK + + FL G SP TLS A+ + P +P ++EL + V DE + R L +E G EP + A A</Hsp_midline>
- </Hsp>
- </Hit_hsps>
- </Hit>
- </Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>0</Statistics_hsp-len>
- <Statistics_eff-space>1.52967e+08</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- </Iteration>
- </BlastOutput_iterations>
-</BlastOutput>
-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
- <BlastOutput_program>blastp</BlastOutput_program>
- <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
- <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
- <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
- <BlastOutput_query-ID>lcl|QUERY_4</BlastOutput_query-ID>
- <BlastOutput_query-def>gi|1786186|gb|AAC73116.1| (AE000111) orf, hypothetical protein [Escherichia coli]</BlastOutput_query-def>
- <BlastOutput_query-len>98</BlastOutput_query-len>
- <BlastOutput_param>
- <Parameters>
- <Parameters_matrix>BLOSUM62</Parameters_matrix>
- <Parameters_expect>1e-05</Parameters_expect>
- <Parameters_include>0</Parameters_include>
- <Parameters_sc-match>0</Parameters_sc-match>
- <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
- <Parameters_gap-open>11</Parameters_gap-open>
- <Parameters_gap-extend>1</Parameters_gap-extend>
- <Parameters_filter>F</Parameters_filter>
- </Parameters>
- </BlastOutput_param>
- <BlastOutput_iterations>
- <Iteration>
- <Iteration_iter-num>1</Iteration_iter-num>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>0</Statistics_hsp-len>
- <Statistics_eff-space>3.59021e+11</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- </Iteration>
- </BlastOutput_iterations>
-</BlastOutput>
-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
- <BlastOutput_program>blastp</BlastOutput_program>
- <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
- <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
- <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
- <BlastOutput_query-ID>lcl|QUERY_5</BlastOutput_query-ID>
- <BlastOutput_query-def>gi|1786187|gb|AAC73117.1| (AE000111) orf, hypothetical protein [Escherichia coli]</BlastOutput_query-def>
- <BlastOutput_query-len>258</BlastOutput_query-len>
- <BlastOutput_param>
- <Parameters>
- <Parameters_matrix>BLOSUM62</Parameters_matrix>
- <Parameters_expect>1e-05</Parameters_expect>
- <Parameters_include>0</Parameters_include>
- <Parameters_sc-match>0</Parameters_sc-match>
- <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
- <Parameters_gap-open>11</Parameters_gap-open>
- <Parameters_gap-extend>1</Parameters_gap-extend>
- <Parameters_filter>F</Parameters_filter>
- </Parameters>
- </BlastOutput_param>
- <BlastOutput_iterations>
- <Iteration>
- <Iteration_iter-num>1</Iteration_iter-num>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>0</Statistics_hsp-len>
- <Statistics_eff-space>3.3722e+11</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- </Iteration>
- </BlastOutput_iterations>
-</BlastOutput>
-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
- <BlastOutput_program>blastp</BlastOutput_program>
- <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
- <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
- <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
- <BlastOutput_query-ID>lcl|QUERY_6</BlastOutput_query-ID>
- <BlastOutput_query-def>gi|1786188|gb|AAC73118.1| (AE000111) inner membrane transport protein [Escherichia coli]</BlastOutput_query-def>
- <BlastOutput_query-len>476</BlastOutput_query-len>
- <BlastOutput_param>
- <Parameters>
- <Parameters_matrix>BLOSUM62</Parameters_matrix>
- <Parameters_expect>1e-05</Parameters_expect>
- <Parameters_include>0</Parameters_include>
- <Parameters_sc-match>0</Parameters_sc-match>
- <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
- <Parameters_gap-open>11</Parameters_gap-open>
- <Parameters_gap-extend>1</Parameters_gap-extend>
- <Parameters_filter>F</Parameters_filter>
- </Parameters>
- </BlastOutput_param>
- <BlastOutput_iterations>
- <Iteration>
- <Iteration_iter-num>1</Iteration_iter-num>
- <Iteration_hits>
- <Hit>
- <Hit_num>1</Hit_num>
- <Hit_id>gnl|Pfam|pfam01235</Hit_id>
- <Hit_def>Na_Ala_symp, Sodium:alanine symporter family</Hit_def>
- <Hit_accession>pfam01235</Hit_accession>
- <Hit_len>415</Hit_len>
- <Hit_hsps>
- <Hsp>
- <Hsp_num>1</Hsp_num>
- <Hsp_bit-score>558.525</Hsp_bit-score>
- <Hsp_score>1438</Hsp_score>
- <Hsp_evalue>2.26239e-160</Hsp_evalue>
- <Hsp_query-from>48</Hsp_query-from>
- <Hsp_query-to>458</Hsp_query-to>
- <Hsp_hit-from>7</Hsp_hit-from>
- <Hsp_hit-to>415</Hsp_hit-to>
- <Hsp_pattern-from>0</Hsp_pattern-from>
- <Hsp_pattern-to>0</Hsp_pattern-to>
- <Hsp_query-frame>1</Hsp_query-frame>
- <Hsp_hit-frame>1</Hsp_hit-frame>
- <Hsp_identity>233</Hsp_identity>
- <Hsp_positive>300</Hsp_positive>
- <Hsp_gaps>2</Hsp_gaps>
- <Hsp_align-len>411</Hsp_align-len>
- <Hsp_density>0</Hsp_density>
- <Hsp_qseq>KNSIHPQPGGLTSFQSLCTSLAARVGSGNLAGVALAITAGGPGAVFWMWVAAFIGMATSFAECSLAQLYKERDVNGQFRGGPAWYMARGLGMRWMGVLFAVFLLIAYGIIFSGVQANAVARALSFSFDFPPLVTGIILAVFTLLAITRGLHGVARLMQGFVPLMAIIWVLTSLVICVMNIGQLPHVIWSIFESAFGWQEAAGGAAGYTLSQAITNGFQRSMFSNEAGMGSTPNAAAAAASWPPHPAAQGIVQMIGIFIDTLVICTASAMLILLAGNGTTYMPLEGIQLIQKAMRVLMGSWGAEFVTLVVILFAFSSIVANYIYAENNLFFLRLNNPKAIWCLRICTFATVIGGTLLSLPLMWQLADIIMACMAITNLTAILLLSPVVHTIASDYLRQRKLGVRPVFDPLRY</Hsp_qseq>
- <Hsp_hseq>KFGKKDEGGDVSSFQALTTSLAGRVGTGNIAGVATAIAAGGPGAVFWMWVTAFIGMATAFVESTLAQLYKERDKDGNFRGGPAYYIKKGLGMRWLAILFAVAVIVSFGVIFSGVQANSIADAMSNAFGIPPLVTGIVLAILTALIIFGGVKRIAAISSIVVPFMAIIYLITALAIIAMNIEKVPDVIGLIFKSAFGFDAAAGGALGATVSKAIMWGVKRGLFSNEAGMGSAPNAAAAAHVS--HPAKQGLVQMLGIFLDTFIVCTATALVILLTGNYTNVETLKGAQLTQKAFDTLIGGFGATFVAIALLLFAFSTIIANYYYAETNLAYLVRSGPRGVALYRLAYLAAVFYGTVLSLTLVWALADIVMGIMALPNLIAILLLSKVAYEALKDYERQLKQGKDPEFDADEY</Hsp_hseq>
- <Hsp_midline>K + G ++SFQ+L TSLA RVG+GN+AGVA AI AGGPGAVFWMWV AFIGMAT+F E +LAQLYKERD +G FRGGPA+Y+ +GLGMRW+ +LFAV +++++G+IFSGVQAN++A A+S +F PPLVTGI+LA+ T L I G+ +A + VP MAII+++T+L I MNI ++P VI IF+SAFG+ AAGGA G T+S+AI G +R +FSNEAGMGS PNAAAAA HPA QG+VQM+GIF+DT ++CTA+A++ILL GN T L+G QL QKA L+G +GA FV + ++LFAFS+I+ANY YAE NL +L + P+ + R+ A V GT+LSL L+W LADI+M MA+ NL AILLLS V + DY RQ K G P FD Y</Hsp_midline>
- </Hsp>
- </Hit_hsps>
- </Hit>
- </Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>0</Statistics_hsp-len>
- <Statistics_eff-space>1.95607e+08</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- </Iteration>
- </BlastOutput_iterations>
-</BlastOutput>
-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
- <BlastOutput_program>blastp</BlastOutput_program>
- <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
- <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
- <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
- <BlastOutput_query-ID>lcl|QUERY_7</BlastOutput_query-ID>
- <BlastOutput_query-def>gi|1786189|gb|AAC73119.1| (AE000111) transaldolase B [Escherichia coli]</BlastOutput_query-def>
- <BlastOutput_query-len>317</BlastOutput_query-len>
- <BlastOutput_param>
- <Parameters>
- <Parameters_matrix>BLOSUM62</Parameters_matrix>
- <Parameters_expect>1e-05</Parameters_expect>
- <Parameters_include>0</Parameters_include>
- <Parameters_sc-match>0</Parameters_sc-match>
- <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
- <Parameters_gap-open>11</Parameters_gap-open>
- <Parameters_gap-extend>1</Parameters_gap-extend>
- <Parameters_filter>F</Parameters_filter>
- </Parameters>
- </BlastOutput_param>
- <BlastOutput_iterations>
- <Iteration>
- <Iteration_iter-num>1</Iteration_iter-num>
- <Iteration_hits>
- <Hit>
- <Hit_num>1</Hit_num>
- <Hit_id>gnl|Pfam|pfam00923</Hit_id>
- <Hit_def>Transaldolase, Transaldolase</Hit_def>
- <Hit_accession>pfam00923</Hit_accession>
- <Hit_len>282</Hit_len>
- <Hit_hsps>
- <Hsp>
- <Hsp_num>1</Hsp_num>
- <Hsp_bit-score>306.99</Hsp_bit-score>
- <Hsp_score>785</Hsp_score>
- <Hsp_evalue>7.90128e-85</Hsp_evalue>
- <Hsp_query-from>13</Hsp_query-from>
- <Hsp_query-to>313</Hsp_query-to>
- <Hsp_hit-from>1</Hsp_hit-from>
- <Hsp_hit-to>282</Hsp_hit-to>
- <Hsp_pattern-from>0</Hsp_pattern-from>
- <Hsp_pattern-to>0</Hsp_pattern-to>
- <Hsp_query-frame>1</Hsp_query-frame>
- <Hsp_hit-frame>1</Hsp_hit-frame>
- <Hsp_identity>161</Hsp_identity>
- <Hsp_positive>191</Hsp_positive>
- <Hsp_gaps>29</Hsp_gaps>
- <Hsp_align-len>306</Hsp_align-len>
- <Hsp_density>0</Hsp_density>
- <Hsp_qseq>TVVADTGDIAAMKLYQP----QDATTNPSLILNAAQIPEYRKLIDDAVAWAKQQSNDRAQQIVDATDKLAVNIGLEILKLVPGRISTEVDARLSYDTEASIAKAKRLIKLYNDAGISNDRILIKLASTWQGIRAAEQLEKEGINCNLTLLFSFAQARACAEAGVFLISPFVGRILDWYKANTDKKEYAPAEDPGVVSVSEIYQYYKEHGYETVVMGASFRNIGEILE-LAGCDRLTIAPALLKELAESEGAIERKLSYTGEVKARPARITESEFLWQHNQDPMAVDKLAEGIRKFAIDQEKLEKMI</Hsp_qseq>
- <Hsp_hseq>KVFLDTGDIEEIKKLAPIGIIQGVTTNPSLIAKAAKKSAYEKL----DAVGKKKGKTIKEQVENACDKLAVEF-PEILKIVPGRVSTEVDARLSFDAEAMIKEAKRLIKL-----ISKPNIVIKIPVTWEGLKAVKALEAEGIPVNVTLLFSAAQALAAAEAGVTYISPFVGRIDDWIDALTDK---NYEGDPGVQSVKDIYQYYKKHGYKTEVLAASFRNPGYILELLAGCDSLTIPPALLDQLLDHEPL---------NRKETGEKISEKKF--NIDEDAMAVELLDEGIRKFKKDFEKLLKST</Hsp_hseq>
- <Hsp_midline> V DTGDI +K P Q TTNPSLI AA+ Y KL A K++ +Q+ +A DKLAV EILK+VPGR+STEVDARLS+D EA I +AKRLIKL IS I+IK+ TW+G++A + LE EGI N+TLLFS AQA A AEAGV ISPFVGRI DW A TDK DPGV SV +IYQYYK+HGY+T V+ ASFRN G ILE LAGCD LTI PALL +L + E K +I+E +F ++D MAV+ L EGIRKF D EKL K </Hsp_midline>
- </Hsp>
- </Hit_hsps>
- </Hit>
- </Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>0</Statistics_hsp-len>
- <Statistics_eff-space>1.4562e+08</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- </Iteration>
- </BlastOutput_iterations>
-</BlastOutput>
-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
- <BlastOutput_program>blastp</BlastOutput_program>
- <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
- <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
- <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
- <BlastOutput_query-ID>lcl|QUERY_8</BlastOutput_query-ID>
- <BlastOutput_query-def>gi|1786190|gb|AAC73120.1| (AE000111) required for the efficient incorporation of molybdate into molybdoproteins [Escherichia coli]</BlastOutput_query-def>
- <BlastOutput_query-len>195</BlastOutput_query-len>
- <BlastOutput_param>
- <Parameters>
- <Parameters_matrix>BLOSUM62</Parameters_matrix>
- <Parameters_expect>1e-05</Parameters_expect>
- <Parameters_include>0</Parameters_include>
- <Parameters_sc-match>0</Parameters_sc-match>
- <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
- <Parameters_gap-open>11</Parameters_gap-open>
- <Parameters_gap-extend>1</Parameters_gap-extend>
- <Parameters_filter>F</Parameters_filter>
- </Parameters>
- </BlastOutput_param>
- <BlastOutput_iterations>
- <Iteration>
- <Iteration_iter-num>1</Iteration_iter-num>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>0</Statistics_hsp-len>
- <Statistics_eff-space>3.35238e+11</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- </Iteration>
- </BlastOutput_iterations>
-</BlastOutput>
-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
- <BlastOutput_program>blastp</BlastOutput_program>
- <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
- <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
- <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
- <BlastOutput_query-ID>lcl|QUERY_9</BlastOutput_query-ID>
- <BlastOutput_query-def>gi|1786191|gb|AAC73121.1| (AE000111) orf, hypothetical protein [Escherichia coli]</BlastOutput_query-def>
- <BlastOutput_query-len>188</BlastOutput_query-len>
- <BlastOutput_param>
- <Parameters>
- <Parameters_matrix>BLOSUM62</Parameters_matrix>
- <Parameters_expect>1e-05</Parameters_expect>
- <Parameters_include>0</Parameters_include>
- <Parameters_sc-match>0</Parameters_sc-match>
- <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
- <Parameters_gap-open>11</Parameters_gap-open>
- <Parameters_gap-extend>1</Parameters_gap-extend>
- <Parameters_filter>F</Parameters_filter>
- </Parameters>
- </BlastOutput_param>
- <BlastOutput_iterations>
- <Iteration>
- <Iteration_iter-num>1</Iteration_iter-num>
- <Iteration_hits>
- <Hit>
- <Hit_num>1</Hit_num>
- <Hit_id>gnl|Pfam|pfam01184</Hit_id>
- <Hit_def>Grp1_Fun34_YaaH, GPR1/FUN34/yaaH family</Hit_def>
- <Hit_accession>pfam01184</Hit_accession>
- <Hit_len>210</Hit_len>
- <Hit_hsps>
- <Hsp>
- <Hsp_num>1</Hsp_num>
- <Hsp_bit-score>218.009</Hsp_bit-score>
- <Hsp_score>554</Hsp_score>
- <Hsp_evalue>2.86271e-58</Hsp_evalue>
- <Hsp_query-from>5</Hsp_query-from>
- <Hsp_query-to>188</Hsp_query-to>
- <Hsp_hit-from>16</Hsp_hit-from>
- <Hsp_hit-to>210</Hsp_hit-to>
- <Hsp_pattern-from>0</Hsp_pattern-from>
- <Hsp_pattern-to>0</Hsp_pattern-to>
- <Hsp_query-frame>1</Hsp_query-frame>
- <Hsp_hit-frame>1</Hsp_hit-frame>
- <Hsp_identity>137</Hsp_identity>
- <Hsp_positive>146</Hsp_positive>
- <Hsp_gaps>13</Hsp_gaps>
- <Hsp_align-len>196</Hsp_align-len>
- <Hsp_density>0</Hsp_density>
- <Hsp_qseq>KLANPAPLGLMGFGMTTILLNLHNVGYFALD--GIILAMGIFYGGIAQIFAGLLEYKKGNTFGLTAFTSYGSFWLTLVAILLMPKLGLTDAPNAQ-----FLGVYLGLWGVFTLFMFFGTLKGARVLQFVFFSLTVLFALLAIGNIAGNAAIIHFAGWIGLICGASAIYLAMGEVLNEQFGRTV-----LPIGESH</Hsp_qseq>
- <Hsp_hseq>KFANPAPLGLSGFALTTIVLSLHNVGAFGLDNPGIIVGMAIFYGGIAQIFAGLWEYKKENTFGLTALTSYGGFWLSLVAIL-MPKFGITDAYNDQIEVQNALGVYLGGWGVFTLFLFFCTLKSTRVFFFLFFSLTVTFLLLAIANITGNAAIIHFGGWLGLICAFSAIYLAYAGVANEQNSYIVPVPLDLPIGEKH</Hsp_hseq>
- <Hsp_midline>K ANPAPLGL GF +TTI+L+LHNVG F LD GII+ M IFYGGIAQIFAGL EYKK NTFGLTA TSYG FWL+LVAIL MPK G+TDA N Q LGVYLG WGVFTLF+FF TLK RV F+FFSLTV F LLAI NI GNAAIIHF GW+GLIC SAIYLA V NEQ V LPIGE H</Hsp_midline>
- </Hsp>
- </Hit_hsps>
- </Hit>
- </Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>0</Statistics_hsp-len>
- <Statistics_eff-space>8.17579e+07</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- </Iteration>
- </BlastOutput_iterations>
-</BlastOutput>
View
660 t/data/mus.bls.xml
@@ -1,660 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">
-<BlastOutput>
- <BlastOutput_program>blastn</BlastOutput_program>
- <BlastOutput_version>blastn 2.2.6 [Apr-09-2003]</BlastOutput_version>
- <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
- <BlastOutput_db>Hs15_up1000</BlastOutput_db>
- <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
- <BlastOutput_query-def>NM_011441_up_1000_chr1_4505586_r chr1:4505586-4506585</BlastOutput_query-def>
- <BlastOutput_query-len>1000</BlastOutput_query-len>
- <BlastOutput_param>
- <Parameters>
- <Parameters_expect>10</Parameters_expect>
- <Parameters_sc-match>1</Parameters_sc-match>
- <Parameters_sc-mismatch>-3</Parameters_sc-mismatch>
- <Parameters_gap-open>5</Parameters_gap-open>
- <Parameters_gap-extend>2</Parameters_gap-extend>