Permalink
Browse files

Merge branch 'master' of github.com:bioperl/bioperl-live

  • Loading branch information...
2 parents b7b20e4 + fccda77 commit 0e70e88a76d638d9f0406643c37691bb20d60ceb @fangly fangly committed Jun 8, 2010
Showing with 15,992 additions and 20 deletions.
  1. +35 −19 Bio/SearchIO/blast.pm
  2. +134 −1 t/SearchIO/blast.t
  3. +15,823 −0 t/data/ZABJ4EA7014.CH878695.1.blast.txt
View
@@ -806,21 +806,6 @@ sub next_result {
}
);
}
- # bypasses this NCBI blast 2.2.13 extra output for now...
- # Features in/flanking this part of subject sequence:
- elsif (/^\sFeatures\s\w+\sthis\spart/xmso) {
- my $featline;
- $_ = $self->_readline;
- while($_ !~ /^\s*$/) {
- chomp;
- $featline .= $_;
- $_ = $self->_readline;
- }
- $self->_pushback($_);
- $featline =~ s{(?:^\s+|\s+^)}{}g;
- $featline =~ s{\n}{;}g;
- $self->{'_last_hspdata'}->{'Hsp_features'} = $featline;
- }
# move inside of a hit
elsif (/^>\s*(\S+)\s*(.*)?/) {
@@ -1041,19 +1026,49 @@ sub next_result {
elsif (
( $self->in_element('hit') || $self->in_element('hsp') )
&& # ncbi blast, works with 2.2.17
+ m/^\sFeatures\s\w+\sthis\spart/xmso
+ # If the line begins with "Features in/flanking this part of subject sequence:"
+ )
+ {
+ $self->in_element('hsp')
+ && $self->end_element( { 'Name' => 'Hsp' } );
+ my $featline;
+ $_ = $self->_readline;
+ while($_ !~ /^\s*$/) {
+ chomp;
+ $featline .= $_;
+ $_ = $self->_readline;
+ }
+ $self->_pushback($_);
+ $featline =~ s{(?:^\s+|\s+^)}{}g;
+ $featline =~ s{\n}{;}g;
+ $self->start_element( { 'Name' => 'Hsp' } );
+ $self->element(
+ {
+ 'Name' => 'Hsp_features',
+ 'Data' => $featline
+ }
+ );
+ $self->{'_seen_hsp_features'} = 1;
+ }
+ elsif (
+ ( $self->in_element('hit') || $self->in_element('hsp') )
+ && # ncbi blast, works with 2.2.17
m/Score\s*=\s*(\S+)\s*bits\s* # Bit score
- (?:\((\d+)\))?, # Missing for BLAT pseudo-BLAST fmt
+ (?:\((\d+)\))?, # Missing for BLAT pseudo-BLAST fmt
\s*Expect(?:\((\d+\+?)\))?\s*=\s*([^,\s]+) # E-value
/ox
)
{ # parse NCBI blast HSP
- $self->in_element('hsp')
- && $self->end_element( { 'Name' => 'Hsp' } );
+ if( !$self->{'_seen_hsp_features'} ) {
+ $self->in_element('hsp')
+ && $self->end_element( { 'Name' => 'Hsp' } );
+ $self->start_element( { 'Name' => 'Hsp' } );
+ }
# Some data clean-up so e-value will appear numeric to perl
my ( $bits, $score, $n, $evalue ) = ( $1, $2, $3, $4 );
$evalue =~ s/^e/1e/i;
- $self->start_element( { 'Name' => 'Hsp' } );
$self->element(
{
'Name' => 'Hsp_score',
@@ -2128,6 +2143,7 @@ sub end_element {
$self->{'_last_data'} = ''; # remove read data if we are at
# end of an element
$self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
+ $self->{'_seen_hsp_features'} = 0;
return $rc;
}
View
@@ -7,7 +7,7 @@ BEGIN {
use lib '.';
use Bio::Root::Test;
- test_begin(-tests => 1242);
+ test_begin(-tests => 1348);
use_ok('Bio::SearchIO');
}
@@ -1813,3 +1813,136 @@ while(my $query = $searchio->next_result) {
is($total_hsps, 2);
+# BLAST 2.2.20+ output file ZABJ4EA7014.CH878695.1.blast.txt
+# Tests SearchIO blast parsing of 'Features in/flanking this part of a subject sequence'
+$searchio = Bio::SearchIO->new(-format => 'blast',
+ -file => test_input_file('ZABJ4EA7014.CH878695.1.blast.txt'));
+
+$result = $searchio->next_result;
+# Parse BLAST header details
+is($result->algorithm, 'BLASTN');
+is($result->algorithm_version, '2.2.20+');
+is($result->algorithm_reference, 'Zheng Zhang, Scott Schwartz, Lukas Wagner, and
+Webb Miller (2000), "A greedy algorithm for aligning DNA
+sequences", J Comput Biol 2000; 7(1-2):203-14.
+');
+is($result->database_name, 'human build 35 genome database (reference assembly only)');
+is($result->database_entries, 378);
+is($result->database_letters, 2866055344);
+is($result->query_name, 'gi|95131563|gb|CH878695.1|');
+is($result->query_description, 'Homo sapiens 211000035829648 genomic scaffold');
+is($result->query_length, 29324);
+# Parse BLAST footer details
+is($result->get_statistic('posted_date'), 'Jul 26, 2007 3:20 PM');
+is($result->get_statistic('dbletters'), -1428911948);
+is($result->get_statistic('lambda'), '1.33');
+is($result->get_statistic('kappa'), '0.621');
+is($result->get_statistic('entropy'), '1.12');
+is($result->get_statistic('lambda_gapped'), '1.28');
+is($result->get_statistic('kappa_gapped'), '0.460');
+is($result->get_statistic('entropy_gapped'), '0.850');
+is($result->get_parameter('matrix'), 'blastn matrix:1 -2');
+is($result->get_parameter('gapopen'), 0);
+is($result->get_parameter('gapext'), 0);
+is($result->get_statistic('num_extensions'), 216);
+is($result->get_statistic('num_successful_extensions'), 216);
+is($result->get_parameter('expect'), '0.01');
+is($result->get_statistic('seqs_better_than_cutoff'), 10);
+is($result->get_statistic('number_of_hsps_better_than_expect_value_cutoff_without_gapping'), 0);
+is($result->get_statistic('number_of_hsps_gapped'), 216);
+is($result->get_statistic('number_of_hsps_successfully_gapped'), 212);
+is($result->get_statistic('length_adjustment'), 34);
+is($result->get_statistic('querylength'), 29290);
+is($result->get_statistic('effectivedblength'), 2866042492);
+is($result->get_statistic('effectivespace'), 83946384590680);
+is($result->get_statistic('effectivespaceused'), 83946384590680);
+is($result->get_statistic('A'), 0);
+is($result->get_statistic('X1'), 23);
+is($result->get_statistic('X1_bits'), '44.2');
+is($result->get_statistic('X2'), 32);
+is($result->get_statistic('X2_bits'), '59.1');
+is($result->get_statistic('X3'), 54);
+is($result->get_statistic('X3_bits'), '99.7');
+is($result->get_statistic('S1'), 23);
+is($result->get_statistic('S1_bits'), '43.6');
+is($result->get_statistic('S2'), 29);
+is($result->get_statistic('S2_bits'), '54.7');
+# Skip the 1st hit. It doesn't have any 'Features in/flanking this part of subject sequence:'
+$hit = $result->next_hit;
+# The 2nd hit has hsps with 'Features flanking this part of subject sequence:'
+$hit = $result->next_hit;
+is($hit->name, 'gi|51459264|ref|NT_077382.3|Hs1_77431');
+is($hit->description, 'Homo sapiens chromosome 1 genomic contig');
+is($hit->length, 237250);
+# In the 2nd hit, look at the 1st hsp
+$hsp = $hit->next_hsp;
+is($hsp->hit_features, "16338 bp at 5' side: PRAME family member 8 11926 bp at 3' side: PRAME family member 9");
+is($hsp->bits, 7286);
+is($hsp->score, 3945);
+is($hsp->expect, '0.0');
+is($hsp->hsp_length, 6145);
+is($hsp->num_identical, 5437);
+is(int sprintf("%.2f",$hsp->percent_identity), 88);
+is($hsp->gaps, 152);
+is($hsp->start('query'), 23225);
+is($hsp->start('sbjct'), 86128);
+is($hsp->end('query'), 29324);
+is($hsp->end('sbjct'), 92165);
+# In the 2nd hit, look at the 2nd hsp
+$hsp = $hit->next_hsp;
+is($hsp->hit_features, "25773 bp at 5' side: PRAME family member 3 3198 bp at 3' side: PRAME family member 5");
+is($hsp->bits, 4732);
+is($hsp->score, 2562);
+is($hsp->expect, '0.0');
+is($hsp->hsp_length, 4367);
+is($hsp->num_identical, 3795);
+is(int sprintf("%.2f",$hsp->percent_identity), 86);
+is($hsp->gaps, 178);
+is($hsp->start('query'), 23894);
+is($hsp->start('sbjct'), 37526);
+is($hsp->end('query'), 28193);
+is($hsp->end('sbjct'), 41781);
+# In the 2nd hit, look at the 3rd hsp
+$hsp = $hit->next_hsp;
+is($hsp->hit_features, "16338 bp at 5' side: PRAME family member 8 14600 bp at 3' side: PRAME family member 9");
+is($hsp->bits, 3825);
+is($hsp->score, 2071);
+is($hsp->expect, '0.0');
+is($hsp->hsp_length, 3406);
+is($hsp->num_identical, 2976);
+is(int sprintf("%.2f",$hsp->percent_identity), 87);
+is($hsp->gaps, 89);
+is($hsp->start('query'), 14528);
+is($hsp->start('sbjct'), 86128);
+is($hsp->end('query'), 17886);
+is($hsp->end('sbjct'), 89491);
+# In the 2nd hit, look at the 4th hsp
+$hsp = $hit->next_hsp;
+is($hsp->hit_features, "29101 bp at 5' side: PRAME family member 8 2120 bp at 3' side: PRAME family member 9");
+is($hsp->bits, 3241);
+is($hsp->score, 1755);
+is($hsp->expect, '0.0');
+is($hsp->hsp_length, 3158);
+is($hsp->num_identical, 2711);
+is(int sprintf("%.2f",$hsp->percent_identity), 85);
+is($hsp->gaps, 123);
+is($hsp->start('query'), 23894);
+is($hsp->start('sbjct'), 98891);
+is($hsp->end('query'), 27005);
+is($hsp->end('sbjct'), 101971);
+# In the 2nd hit, look at the 5th hsp
+$hsp = $hit->next_hsp;
+is($hsp->hit_features, "PRAME family member 13");
+is($hsp->bits, 3142);
+is($hsp->score, 1701);
+is($hsp->expect, '0.0');
+is($hsp->hsp_length, 2507);
+is($hsp->num_identical, 2249);
+is(int sprintf("%.2f",$hsp->percent_identity), 89);
+is($hsp->gaps, 63);
+is($hsp->start('query'), 3255);
+is($hsp->start('sbjct'), 128516);
+is($hsp->end('query'), 5720);
+is($hsp->end('sbjct'), 131000);
+
+
Oops, something went wrong.

0 comments on commit 0e70e88

Please sign in to comment.