Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

This commit was manufactured by cvs2svn to create branch

'release-0-04-bug'.

svn path=/bioperl-live/branches/release-0-04-bug/; revision=781
  • Loading branch information...
commit af1f2cb10012bfcbb20f04267555d8806edf663d 1 parent 147a8dd
authored December 16, 1998
2  Bio/Root/Utilities.pm
@@ -768,8 +768,6 @@ sub delete {
768 768
     } 
769 769
     my $ulval = unlink($fileName) > 0 or
770 770
 	$self->throw("Failed to delete file $fileName: $!"); 
771  
-
772  
-    print "\nUNLINK RETURN VAL = $ulval\n";
773 771
 }
774 772
 
775 773
 ######################################
4  Bio/Tools/Blast.pm
@@ -34,7 +34,7 @@ use strict;
34 34
 use vars qw($ID $VERSION $Blast @Blast_programs $Revision);
35 35
 
36 36
 $ID = 'Bio::Tools::Blast';
37  
-$VERSION  = 0.071; 
  37
+$VERSION  = 0.072; 
38 38
 $Revision = '$Id$';  #'
39 39
 
40 40
 ## Static Blast object. 
@@ -877,7 +877,7 @@ for use with "make test" during installation.
877 877
 
878 878
 =head1 VERSION
879 879
 
880  
-Bio::Tools::Blast.pm, 0.071
  880
+Bio::Tools::Blast.pm, 0.072
881 881
 
882 882
 
883 883
 =head1 FEEDBACK
15  Bio/Tools/Blast/CHANGES
... ...
@@ -1,5 +1,20 @@
1 1
 Revision history for Perl extension Bio::Tools::Blast.pm and related modules.
2 2
 
  3
+0.072  Wed Dec 16 05:05:21 1998
  4
+        - Fixed out of range exception in HSP::matches() as suggested by
  5
+          Michael Lonetto in bio.perl.org bug report #11.
  6
+        - Made changes in Sbjct.pm and HSP.pm to deal with the reporting of
  7
+          TBLAST[NX] data in amino acid coordinate space. This affects 
  8
+          frac_identical() and frac_conserved() in HSP.pm, and _tile_hsps(),
  9
+          length_aln(), frac_aligned_query(), frac_aligned_hit(), 
  10
+          frac_unaligned_query(), frac_unaligned_hit() in Sbjct.pm.
  11
+	  These also in response to bug report #11.
  12
+        - Fixed behavior of frac_identical() and frac_aligned() in 
  13
+          both Sbjct.pm and HSP.pm to correspond to the data reported
  14
+          by Blast. Default behavior now includes gaps instead of ignoring
  15
+          them as before. This was in response to a report by Eli Venter.
  16
+        - Cleaned up a few "uninitialized value" warnings.
  17
+
3 18
 0.071  Thu Dec 10 18:41:51 1998
4 19
         - Bio::Tools::Blast::Sbjct::_set_id() to no longer uppercases
5 20
           sequence ids for sbjct sequences. (Note however that the Blast
104  Bio/Tools/Blast/HSP.pm
@@ -27,7 +27,7 @@ use Bio::Root::Object ();
27 27
 use strict;
28 28
 use vars qw($ID $VERSION $GAP_SYMBOL @SCORE_CUTOFFS $Revision);
29 29
 $ID       = 'Bio::Tools::Blast::HSP';
30  
-$VERSION  = 0.071;
  30
+$VERSION  = 0.072;
31 31
 $Revision = '$Id$';  #'
32 32
 
33 33
 $GAP_SYMBOL    = '-';          # Need a more general way to handle gap symbols.
@@ -138,7 +138,7 @@ Steve A. Chervitz, sac@genome.stanford.edu
138 138
 
139 139
 =head1 VERSION
140 140
 
141  
-Bio::Tools::Blast::HSP.pm, 0.071
  141
+Bio::Tools::Blast::HSP.pm, 0.072
142 142
 
143 143
 =head1 SEE ALSO
144 144
 
@@ -296,6 +296,7 @@ sub _set_data {
296 296
 
297 297
     # Storing the match list in case it is needed later.
298 298
     $self->{'_matchList'} = \@matchList;
  299
+
299 300
 }
300 301
 
301 302
 
@@ -443,7 +444,8 @@ sub _set_seq_data {
443 444
 
444 445
     # Liberate some memory.
445 446
     @{$self->{'_queryList'}} = @{$self->{'_sbjctList'}} = ();
446  
-    $self->{'_queryList'} = $self->{'_sbjctList'} = undef;
  447
+    undef $self->{'_queryList'};
  448
+    undef $self->{'_sbjctList'};
447 449
 
448 450
     $self->{'_set_seq_data'} = 1;
449 451
 }
@@ -926,22 +928,45 @@ sub matches {
926 928
 	my($start,$stop) = $self->range($seqType);
927 929
 	if($beg == 0) { $beg = $start; $end = $beg+$end; }
928 930
 	elsif($end == 0) { $end = $stop; $beg = $end-$beg; }
929  
-	if($end > $stop) { $end = $stop; }
  931
+
  932
+	if($end >= $stop) { $end = $stop; } ##ML changed from if (end >stop)
  933
+	else { $end += 1;}   ##ML moved from commented position below, makes
  934
+                             ##more sense here
  935
+#	if($end > $stop) { $end = $stop; }
930 936
 	if($beg < $start) { $beg = $start; }
931  
-	else { $end += 1;}
  937
+#	else { $end += 1;}
932 938
 
933  
-	my $seq = substr($self->seq_str('match'), $beg-$start, ($end-$beg));
  939
+#	my $seq = substr($self->seq_str('match'), $beg-$start, ($end-$beg));
934 940
 
935  
-## DEBUGGING CODE:
936  
-#	my $seq = $self->seq_str('match');
937  
-#	if($seq =~ /FIG\+LDI/) {
938  
-#	    print "\n\nFIG+LDI FOUND IN: Hit ", $self->parent->name, ", Blast ", $self->parent->parent->name; <STDIN>;
939  
-#	}
940  
-#	if($self->parent->name eq '1AK5_' and $self->parent->parent->name eq 'YAR073W') {
941  
-#	    print "\nGOT MATCH SEQ:\n   $seq"; <STDIN>;
942  
-#	}
943  
-#	$seq = substr($seq, $beg-$start, ($end-$beg));
944  
-## END DEBUGGING CODE
  941
+	## ML: START fix for substr out of range error ------------------
  942
+	my $seq = "";
  943
+	if (($self->{'_prog'} eq 'TBLASTN') and ($seqType eq 'sbjct'))
  944
+	{
  945
+	    $seq = substr($self->seq_str('match'),
  946
+			  int(($beg-$start)/3), int(($end-$beg+1)/3));
  947
+
  948
+	} elsif (($self->{'_prog'} eq 'BLASTX') and ($seqType eq 'query'))
  949
+	{
  950
+	    ## ML: does BLASTX also need special handling?
  951
+	} else {
  952
+	    $seq = substr($self->seq_str('match'), 
  953
+			  $beg-$start, ($end-$beg));
  954
+	}
  955
+	## ML: End of fix for  substr out of range error -----------------
  956
+
  957
+	
  958
+	## ML: debugging code
  959
+	## This is where we get our exception.  Try printing out the values going
  960
+	## into this:
  961
+	##
  962
+#	 print STDERR 
  963
+#	     qq(*------------MY EXCEPTION --------------------\nSeq: ") , 
  964
+#	     $self->seq_str("$seqType"), qq("\n),$self->name,",(  index:";
  965
+#	 print STDERR  $beg-$start, ", len: ", $end-$beg," ), (HSPRealLen:", 
  966
+#	     CORE::length $self->seq_str("$seqType");
  967
+#	 print STDERR ", HSPCalcLen: ", $stop - $start +1 ," ), 
  968
+#	     ( beg: $beg, end: $end ), ( start: $start, stop: stop )\n";
  969
+	 ## ML: END DEBUGGING CODE----------
945 970
 
946 971
 	if(!CORE::length $seq) {
947 972
 	    $self->throw("Undefined sub-sequence ($beg,$end). Valid range = $start - $stop");
@@ -967,8 +992,16 @@ sub matches {
967 992
  Purpose   : Get the fraction of identical positions within the given HSP.
968 993
  Example   : $frac_iden = $hsp_object->frac_identical('query');
969 994
  Returns   : Float (2-decimal precision, e.g., 0.75).
970  
- Argument  : seq_type: 'query' | 'sbjct' (default = 'query')
  995
+ Argument  : seq_type: 'query' | 'sbjct' 
  996
+           : If no argument is provided, the longest sequence will be used.
971 997
  Throws    : n/a
  998
+ Comments  : The default behavior of using the longest sequence allows this method
  999
+           : to report the value reported by Blast when working with gapped alignments.
  1000
+           : The presence of gaps "inflates" the size of a sequence and Blast 
  1001
+           : reports the fraction identical using this inflated size.
  1002
+           : To get the fraction identical among only the aligned residues,
  1003
+           : ignoring the gaps, call this method with an argument of 'query'
  1004
+           : or 'sbjct'.
972 1005
 
973 1006
 See Also   : L<frac_conserved>(), L<matches>()
974 1007
 
@@ -981,14 +1014,18 @@ sub frac_identical {
981 1014
 # This saves storage and also permits flexibility in determining for which
982 1015
 # sequence (query or sbjct) the figure is to be calculated.
983 1016
 
984  
-    my( $self, $type ) = @_;
985  
-    $type  ||= 'query';
986  
-    ## Sensitive to member name format.
987  
-    $type = "_\L$type\E";
  1017
+    my( $self, $seqType ) = @_;
988 1018
 
989 1019
     $self->_set_seq_data() unless $self->{'_set_seq_data'};
990 1020
 
991  
-    sprintf( "%.2f", $self->{'_numIdentical'}/$self->{$type.'Length'});
  1021
+    if(!$seqType) {
  1022
+	$seqType = ($self->{'_queryLength'} > $self->{'_sbjctLength'}) 
  1023
+	           ? 'query' : 'sbjct';
  1024
+    }
  1025
+    ## Sensitive to member name format.
  1026
+    $seqType = "_\L$seqType\E";
  1027
+
  1028
+    sprintf( "%.2f", $self->{'_numIdentical'}/$self->{$seqType.'Length'});
992 1029
 }
993 1030
 
994 1031
 
@@ -1000,8 +1037,16 @@ sub frac_identical {
1000 1037
 	   : Blast report.)
1001 1038
  Example   : $frac_cons = $hsp_object->frac_conserved('query');
1002 1039
  Returns   : Float (2-decimal precision, e.g., 0.75).
1003  
- Argument  : seq_type: 'query' | 'sbjct' (default = 'query')
  1040
+ Argument  : seq_type: 'query' | 'sbjct' 
  1041
+           : If no argument is provided, the longest sequence will be used.
1004 1042
  Throws    : n/a
  1043
+ Comments  : The default behavior of using the longest sequence allows this method
  1044
+           : to report the value reported by Blast when working with gapped alignments.
  1045
+           : The presence of gaps "inflates" the size of a sequence and Blast 
  1046
+           : reports the fraction conserved using this inflated size.
  1047
+           : To get the fraction conserved among only the aligned residues,
  1048
+           : ignoring the gaps, call this method with an argument of 'query'
  1049
+           : or 'sbjct'.
1005 1050
 
1006 1051
 See Also   : L<frac_conserved>(), L<matches>()
1007 1052
 
@@ -1014,15 +1059,18 @@ sub frac_conserved {
1014 1059
 # This saves storage and also permits flexibility in determining for which
1015 1060
 # sequence (query or sbjct) the figure is to be calculated.
1016 1061
  
1017  
-    my( $self, $type ) = @_;
  1062
+    my( $self, $seqType ) = @_;
1018 1063
 
1019 1064
     $self->_set_seq_data() unless $self->{'_set_seq_data'};
1020 1065
 
1021  
-    $type  ||= 'query';
  1066
+    if(!$seqType) {
  1067
+	$seqType = ($self->{'_queryLength'} > $self->{'_sbjctLength'}) 
  1068
+	           ? 'query' : 'sbjct';
  1069
+    }
1022 1070
     ## Sensitive to member name format.
1023  
-    $type = "_\L$type\E";
  1071
+    $seqType = "_\L$seqType\E";
1024 1072
 
1025  
-    sprintf( "%.2f", $self->{'_numConserved'}/$self->{$type.'Length'});
  1073
+    sprintf( "%.2f", $self->{'_numConserved'}/$self->{$seqType.'Length'});
1026 1074
 }
1027 1075
 
1028 1076
 
@@ -1160,7 +1208,7 @@ sub strand {
1160 1208
     my( $self, $seqType ) = @_;
1161 1209
     $seqType  ||= (wantarray ? 'list' : 'query');
1162 1210
 
1163  
-    return undef if $seqType eq 'query' and $self->{'_prog'} eq 'TBLASTN';
  1211
+    return '' if $seqType eq 'query' and $self->{'_prog'} eq 'TBLASTN';
1164 1212
 
1165 1213
     ## Sensitive to member name format.
1166 1214
     $seqType = "_\L$seqType\E";
161  Bio/Tools/Blast/Sbjct.pm
@@ -28,7 +28,7 @@ use Exporter           ();
28 28
 use strict;
29 29
 use vars qw($ID $VERSION %SUMMARY_OFFSET $Revision);
30 30
 $ID = 'Bio::Tools::Blast::Sbjct';
31  
-$VERSION = 0.071;
  31
+$VERSION = 0.072;
32 32
 $Revision = '$Id$';  #'
33 33
 
34 34
 my $_layout     = '';
@@ -227,7 +227,7 @@ See the L<FEEDBACK> section for where to send bug reports and comments.
227 227
 
228 228
 =head1 VERSION
229 229
 
230  
-Bio::Tools::Blast::Sbjct.pm, 0.071
  230
+Bio::Tools::Blast::Sbjct.pm, 0.072
231 231
 
232 232
 =head1 COPYRIGHT
233 233
 
@@ -594,6 +594,11 @@ sub _set_hsps {
594 594
     
595 595
     $self->{'_length'} or $self->throw( "Can't determine hit sequence length.");
596 596
 
  597
+    # Adjust logical length based on BLAST flavor.
  598
+    if($_prog =~ /TBLAST[NX]/) {
  599
+	$self->{'_logical_length'} = $self->{'_length'} / 3;
  600
+    }
  601
+    
597 602
     if( not scalar @hspList) {
598 603
 	$self->throw( "Failed to build HSP list");
599 604
     } else {
@@ -778,7 +783,17 @@ sub _tile_hsps {
778 783
 	$self->ambiguous_aln('s'); 
779 784
 #	print "\n*** AMBIGUOUS ALIGNMENT: Sbjct\n\n";
780 785
     }
781  
-   
  786
+
  787
+    # Adjust length based on BLAST flavor.
  788
+    my $prog = $self->parent->program;
  789
+    if($prog eq 'TBLASTN') {
  790
+	$self->{'_length_aln_sbjct'} /= 3;
  791
+    } elsif($prog eq 'BLASTX' ) {
  792
+	$self->{'_length_aln_query'} /= 3;
  793
+    } elsif($prog eq 'TBLASTX') {
  794
+	$self->{'_length_aln_query'} /= 3;
  795
+	$self->{'_length_aln_sbjct'} /= 3;
  796
+    }
782 797
 }
783 798
 
784 799
 
@@ -1403,13 +1418,13 @@ sub num_hsps {
1403 1418
  Usage     : $sbjct_object->length();
1404 1419
  Purpose   : Get the total length of the hit sequence.
1405 1420
  Example   : $len    = $sbjct_object->length();
1406  
- Returns   : Integer 
  1421
+ Returns   : Integer 
1407 1422
  Argument  : n/a
1408 1423
  Throws    : n/a
1409 1424
  Comments  : Developer note: when using the built-in length function within
1410 1425
            : this module, call it as CORE::length().
1411 1426
 
1412  
-See Also   : L<length_aln>()
  1427
+See Also   : L<logical_length>(),  L<length_aln>()
1413 1428
 
1414 1429
 =cut
1415 1430
 
@@ -1421,21 +1436,69 @@ sub length {
1421 1436
 }    
1422 1437
 
1423 1438
 
  1439
+=head2 logical_length
  1440
+
  1441
+ Usage     : $sbjct_object->logical_length( [seq_type] );
  1442
+           : (mostly intended for internal use).
  1443
+ Purpose   : Get the logical length of the hit sequence.
  1444
+           : If the Blast is a TBLASTN or TBLASTX, the returned length 
  1445
+           : is the length of the would-be amino acid sequence (length/3).
  1446
+           : For all other BLAST flavors, this function is the same as length().
  1447
+ Example   : $len    = $sbjct_object->logical_length();
  1448
+ Returns   : Integer 
  1449
+ Argument  : seq_type = 'query' or 'sbjct' (default = 'query')
  1450
+ Throws    : n/a
  1451
+ Comments  : This is important for functions like frac_aligned_query()
  1452
+           : which need to operate in amino acid coordinate space when dealing
  1453
+           : with [T]BLAST[NX] type reports.
  1454
+
  1455
+See Also   : L<length>(), L<frac_aligned_query>(), L<frac_aligned_hit>()
  1456
+
  1457
+=cut
  1458
+
  1459
+#--------------------
  1460
+sub logical_length {
  1461
+#--------------------
  1462
+    my $self = shift;
  1463
+    my $seqType = shift || 'query';
  1464
+
  1465
+    # Return logical sbjct length
  1466
+    $seqType eq 'sbjct' and return 
  1467
+	$self->{'_logical_length'} || $self->{'_length'}; 
  1468
+
  1469
+    # Otherwise, return logical query length
  1470
+    my $qlen = $self->parent->length;    
  1471
+
  1472
+    # Adjust length based on BLAST flavor.
  1473
+    my $prog = $self->parent->program;
  1474
+    if($prog =~ /T?BLASTX/ ) {
  1475
+	$qlen /= 3;
  1476
+    }
  1477
+    return $qlen;
  1478
+}    
  1479
+
  1480
+
1424 1481
 
1425 1482
 =head2 length_aln
1426 1483
 
1427 1484
  Usage     : $sbjct_object->length_aln( [seq_type] );
1428 1485
  Purpose   : Get the total length of the aligned region for query or sbjct seq.
1429 1486
            : This number will include all HSPs
1430  
- Example   : $len    = $sbjct_object->length_aln(); #default = total
  1487
+ Example   : $len    = $sbjct_object->length_aln(); # default = query
1431 1488
            : $lenAln = $sbjct_object->length_aln('query');
1432 1489
  Returns   : Integer 
1433 1490
  Argument  : seq_Type = 'query' | 'sbjct'  (Default = 'query')
1434 1491
  Throws    : Exception if the argument is not recognized.
1435  
- Comments  : This method requires that all HSPs be tiled. If they have not
  1492
+ Comments  : This method will report the logical length of the alignment,
  1493
+           : meaning that for TBLAST[NX] reports, the length is reported
  1494
+           : using amino acid coordinate space (i.e., nucleotides / 3).
  1495
+           : 
  1496
+           : This method requires that all HSPs be tiled. If they have not
1436 1497
            : already been tiled, they will be tiled first.
  1498
+           : If you don't want the tiled data, iterate through each HSP
  1499
+           : calling length() on each (use hsps() to get the HSPs).
1437 1500
 
1438  
-See Also   : L<length>(), L<frac_aligned_query>(), L<frac_aligned_hit>(), L<gaps>(), L<_tile_hsps>()
  1501
+See Also   : L<length>(), L<frac_aligned_query>(), L<frac_aligned_hit>(), L<gaps>(), L<_tile_hsps>(), B<Bio::Tools::Blast::HSP::length()>
1439 1502
 
1440 1503
 =cut
1441 1504
 
@@ -1685,15 +1748,24 @@ sub range {
1685 1748
 
1686 1749
 =head2 frac_identical
1687 1750
 
1688  
- Usage     : $sbjct_object->frac_identical( seq_type );
  1751
+ Usage     : $sbjct_object->frac_identical( [seq_type] );
1689 1752
  Purpose   : Get the overall fraction of identical positions across all HSPs.
1690 1753
            : The number refers to only the aligned regions and does not
1691 1754
            : account for unaligned regions in between the HSPs, if any.
1692 1755
  Example   : $frac_iden = $sbjct_object->frac_identical('query');
1693 1756
  Returns   : Float (2-decimal precision, e.g., 0.75).
1694  
- Argument  : seq_type: 'query' | 'sbjct' (default = 'query')
  1757
+ Argument  : seq_type: 'query' | 'sbjct' 
  1758
+           : If no argument is provided, the longest sequence will be used.
1695 1759
  Throws    : n/a
1696  
- Comments  : If you need data for each HSP, use hsps() and then interate
  1760
+ Comments  : The default behavior of using the longest sequence allows this method
  1761
+           : to report the value reported by Blast when working with gapped alignments.
  1762
+           : The presence of gaps "inflates" the size of a sequence and Blast 
  1763
+           : reports the fraction identical using this inflated size.
  1764
+           : To get the fraction identical among only the aligned residues,
  1765
+           : ignoring the gaps, call this method with an argument of 'query'
  1766
+           : or 'sbjct'.
  1767
+           :
  1768
+           : If you need data for each HSP, use hsps() and then iterate
1697 1769
            : through the HSP objects.
1698 1770
            : This method requires that all HSPs be tiled. If they have not
1699 1771
            : already been tiled, they will be tiled first.
@@ -1706,10 +1778,15 @@ See Also   : L<frac_conserved>(), L<frac_aligned_query>(), L<matches>(), L<_tile
1706 1778
 sub frac_identical {
1707 1779
 #------------------
1708 1780
     my $self = shift;
  1781
+    my $seqType = shift;
1709 1782
 
1710 1783
     $self->_tile_hsps($self->parent->gapped) if not $self->{'_tile_hsps'};
1711 1784
 
1712  
-    my $seqType = (shift || 'query');
  1785
+    if(!$seqType) {
  1786
+	$seqType = ($self->{'_length_aln_query'} > $self->{'_length_aln_sbjct'}) 
  1787
+	           ? 'query' : 'sbjct';
  1788
+    }
  1789
+
1713 1790
     sprintf( "%.2f", $self->{'_totalIdentical'}/$self->{'_length_aln_'.$seqType});
1714 1791
 }
1715 1792
 
@@ -1717,15 +1794,24 @@ sub frac_identical {
1717 1794
 
1718 1795
 =head2 frac_conserved
1719 1796
 
1720  
- Usage     : $sbjct_object->frac_conserved( seq_type );
  1797
+ Usage     : $sbjct_object->frac_conserved( [seq_type] );
1721 1798
  Purpose   : Get the overall fraction of conserved positions across all HSPs.
1722 1799
            : The number refers to only the aligned regions and does not
1723 1800
            : account for unaligned regions in between the HSPs, if any.
1724 1801
  Example   : $frac_cons = $sbjct_object->frac_conserved('sbjct');
1725 1802
  Returns   : Float (2-decimal precision, e.g., 0.75).
1726  
- Argument  : seq_type: 'query' | 'sbjct' (default = 'query')
  1803
+ Argument  : seq_type: 'query' | 'sbjct' 
  1804
+           : If no argument is provided, the longest sequence will be used.
1727 1805
  Throws    : n/a
1728  
- Comments  : If you need data for each HSP, use hsps() and then interate
  1806
+ Comments  : The default behavior of using the longest sequence allows this method
  1807
+           : to report the value reported by Blast when working with gapped alignments.
  1808
+           : The presence of gaps "inflates" the size of a sequence and Blast 
  1809
+           : reports the fraction conserved using this inflated size.
  1810
+           : To get the fraction conserved among only the aligned residues,
  1811
+           : ignoring the gaps, call this method with an argument of 'query'
  1812
+           : or 'sbjct'.
  1813
+           :
  1814
+           : If you need data for each HSP, use hsps() and then interate
1729 1815
            : through the HSP objects.
1730 1816
            : This method requires that all HSPs be tiled. If they have not
1731 1817
            : already been tiled, they will be tiled first.
@@ -1738,10 +1824,15 @@ See Also   : L<frac_identical>(), L<matches>(), L<_tile_hsps>()
1738 1824
 sub frac_conserved {
1739 1825
 #--------------------
1740 1826
     my $self = shift;
  1827
+    my $seqType = shift;
1741 1828
 
1742 1829
     $self->_tile_hsps($self->parent->gapped) if not $self->{'_tile_hsps'};
1743 1830
 
1744  
-    my $seqType = (shift || 'query');
  1831
+    if(!$seqType) {
  1832
+	$seqType = ($self->{'_length_aln_query'} > $self->{'_length_aln_sbjct'}) 
  1833
+	           ? 'query' : 'sbjct';
  1834
+    }
  1835
+
1745 1836
     sprintf( "%.2f", $self->{'_totalConserved'}/$self->{'_length_aln_'.$seqType});
1746 1837
 }
1747 1838
 
@@ -1760,10 +1851,15 @@ sub frac_conserved {
1760 1851
  Throws    : n/a
1761 1852
  Comments  : If you need data for each HSP, use hsps() and then interate
1762 1853
            : through the HSP objects.
  1854
+           : To compute the fraction aligned, the logical length of the query
  1855
+           : sequence is used, meaning that for [T]BLASTX reports, the 
  1856
+           : full length of the query sequence is converted into amino acids
  1857
+           : by dividing by 3. This is necessary because of the way 
  1858
+           : the lengths of aligned sequences are computed.
1763 1859
            : This method requires that all HSPs be tiled. If they have not
1764 1860
            : already been tiled, they will be tiled first.
1765 1861
 
1766  
-See Also   : L<frac_aligned_hit>(), L<_tile_hsps>()
  1862
+See Also   : L<frac_aligned_hit>(), L<_tile_hsps>(), L<logical_length>(), L<length_aln>()
1767 1863
 
1768 1864
 =cut
1769 1865
 
@@ -1774,7 +1870,7 @@ sub frac_aligned_query {
1774 1870
 
1775 1871
     $self->_tile_hsps($self->parent->gapped) if not $self->{'_tile_hsps'};
1776 1872
 
1777  
-    sprintf( "%.2f", $self->{'_length_aln_query'}/$self->parent->length);
  1873
+    sprintf( "%.2f", $self->{'_length_aln_query'}/$self->logical_length('query'));
1778 1874
 }
1779 1875
 
1780 1876
 
@@ -1791,10 +1887,15 @@ sub frac_aligned_query {
1791 1887
  Throws    : n/a
1792 1888
  Comments  : If you need data for each HSP, use hsps() and then interate
1793 1889
            : through the HSP objects.
  1890
+           : To compute the fraction aligned, the logical length of the sbjct
  1891
+           : sequence is used, meaning that for TBLAST[NX] reports, the 
  1892
+           : full length of the sbjct sequence is converted into amino acids
  1893
+           : by dividing by 3. This is necessary because of the way 
  1894
+           : the lengths of aligned sequences are computed.
1794 1895
            : This method requires that all HSPs be tiled. If they have not
1795 1896
            : already been tiled, they will be tiled first.
1796 1897
 
1797  
-See Also   : L<frac_aligned_query>(), L<matches>(), L<_tile_hsps>()
  1898
+See Also   : L<frac_aligned_query>(), L<matches>(), L<_tile_hsps>(), L<logical_length>(), L<length_aln>()
1798 1899
 
1799 1900
 =cut
1800 1901
 
@@ -1805,7 +1906,7 @@ sub frac_aligned_hit {
1805 1906
 
1806 1907
     $self->_tile_hsps($self->parent->gapped) if not $self->{'_tile_hsps'};
1807 1908
 
1808  
-    sprintf( "%.2f", $self->{'_length_aln_sbjct'}/$self->length('seq'));
  1909
+    sprintf( "%.2f", $self->{'_length_aln_sbjct'}/$self->logical_length('sbjct'));
1809 1910
 }
1810 1911
 
1811 1912
 # Safety-net methods for those who try don't read or remember the API.
@@ -1830,12 +1931,14 @@ sub num_unaligned_sbjct {  my $self=shift; $self->num_unaligned_hit(@_); }
1830 1931
  Returns   : Integer
1831 1932
  Argument  : n/a
1832 1933
  Throws    : n/a
1833  
- Comments  : If you need data for each HSP, use hsps() and then interate
  1934
+ Comments  : See notes regarding logical lengths in the comments for frac_aligned_hit().
  1935
+           : They apply here as well.
  1936
+           : If you need data for each HSP, use hsps() and then interate
1834 1937
            : through the HSP objects.
1835 1938
            : This method requires that all HSPs be tiled. If they have not
1836 1939
            : already been tiled, they will be tiled first.
1837 1940
 
1838  
-See Also   : L<num_unaligned_query>(), L<_tile_hsps>()
  1941
+See Also   : L<num_unaligned_query>(), L<_tile_hsps>(), L<frac_aligned_hit>()
1839 1942
 
1840 1943
 =cut
1841 1944
 
@@ -1846,7 +1949,7 @@ sub num_unaligned_hit {
1846 1949
 
1847 1950
     $self->_tile_hsps($self->parent->gapped) if not $self->{'_tile_hsps'};
1848 1951
 
1849  
-    my $num = $self->length('seq') - $self->{'_length_aln_sbjct'};
  1952
+    my $num = $self->logical_length('sbjct') - $self->{'_length_aln_sbjct'};
1850 1953
     ($num < 0 ? 0 : $num );
1851 1954
 }
1852 1955
 
@@ -1860,12 +1963,14 @@ sub num_unaligned_hit {
1860 1963
  Returns   : Integer
1861 1964
  Argument  : n/a
1862 1965
  Throws    : n/a
1863  
- Comments  : If you need data for each HSP, use hsps() and then interate
  1966
+ Comments  : See notes regarding logical lengths in the comments for frac_aligned_query().
  1967
+           : They apply here as well.
  1968
+           : If you need data for each HSP, use hsps() and then interate
1864 1969
            : through the HSP objects.
1865 1970
            : This method requires that all HSPs be tiled. If they have not
1866 1971
            : already been tiled, they will be tiled first.
1867 1972
 
1868  
-See Also   : L<num_unaligned_hit>(), L<_tile_hsps>()
  1973
+See Also   : L<num_unaligned_hit>(), L<_tile_hsps>(), L<frac_aligned_query>()
1869 1974
 
1870 1975
 =cut
1871 1976
 
@@ -1876,7 +1981,7 @@ sub num_unaligned_query {
1876 1981
 
1877 1982
     $self->_tile_hsps($self->parent->gapped) if not $self->{'_tile_hsps'};
1878 1983
 
1879  
-    my $num = $self->parent->length - $self->{'_length_aln_query'};
  1984
+    my $num = $self->logical_length('query') - $self->{'_length_aln_query'};
1880 1985
     ($num < 0 ? 0 : $num );
1881 1986
 }
1882 1987
 
@@ -1993,7 +2098,7 @@ sub _display_stats {
1993 2098
 		$self->rank(), $self->name(),
1994 2099
 		$self->database() || 'UNKNOWN DB' ,$self->score(),$self->bits(),$self->p(),$self->expect(),
1995 2100
 		$self->gaps(), $self->n(), 
1996  
-		$self->length('seq'), $self->length_aln('query'),
  2101
+		$self->length(), $self->length_aln('query'),
1997 2102
 		$self->ambiguous_aln(),
1998 2103
 		$self->frac_aligned_query, $self->frac_aligned_hit, 
1999 2104
 		$self->matches('iden'), $self->frac_identical('query'), 
@@ -2003,7 +2108,7 @@ sub _display_stats {
2003 2108
 		$self->rank(), $self->name(),
2004 2109
 		$self->database()  || 'UNKNOWN DB' ,$self->score(),$self->bits(),$self->expect(),
2005 2110
 		$self->gaps(), $self->num_hsps, 
2006  
-		$self->length('seq'), $self->length_aln('query'),
  2111
+		$self->length(), $self->length_aln('query'),
2007 2112
 		$self->ambiguous_aln(),
2008 2113
 		$self->frac_aligned_query, $self->frac_aligned_hit, 
2009 2114
 		$self->matches('iden'), $self->frac_identical('query'), 
6  Changes
... ...
@@ -1,5 +1,11 @@
1 1
 Revision history for Bioperl core modules
2 2
 
  3
+0.04.1  Wed Dec 16 05:39:15 1998
  4
+        - Bug fixes in Bio::Tools::Blast modules, version 0.072
  5
+          (see Bio::Tools::Blast::CHANGES).
  6
+        - Compile/SW/Makefile.PL now removes *.o and *.a files 
  7
+          with make clean.
  8
+
3 9
 0.04  Tue Dec  8 07:49:19 1998
4 10
 	- Lots of new modules added including:
5 11
            * Ewan Birney's Bio::SimpleAlign.pm, Bio::Tools::AlignFactory.pm,
BIN  examples/blast/out/tblastn_206.out.gz
Binary file not shown

0 notes on commit af1f2cb

Please sign in to comment.
Something went wrong with that request. Please try again.