Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

HMMER: Added support of CS lines in Hmmer2 outputs instead

of ignoring them, which can be retrieved using the method
developed for Hmmer3. This method has changed its name from
"consensus_structure "to "consensus_string" to be consistent
with "query_string", "homology_string", etc.
  • Loading branch information...
commit a8dd0800eb5954e94ed3a473ccfff475d57b191e 1 parent 76822a4
@fjossandon fjossandon authored
View
10 Bio/Search/HSP/GenericHSP.pm
@@ -473,11 +473,11 @@ sub homology_string{
return $previous;
}
-=head2 consensus_structure
+=head2 consensus_string
- Title : consensus_structure
- Usage : my $cs_string = $hsp->consensus_structure;
- Function: Retrieves the consensus structure line for this HSP as a string (HMMer3).
+ Title : consensus_string
+ Usage : my $cs_string = $hsp->consensus_string;
+ Function: Retrieves the consensus structure line for this HSP as a string (HMMER).
: If the model had any consensus structure or reference line annotation
: that it inherited from a multiple alignment (#=GC SS cons,
: #=GC RF annotation in Stockholm files), that information is shown
@@ -487,7 +487,7 @@ sub homology_string{
=cut
-sub consensus_structure {
+sub consensus_string {
my ($self,$value) = @_;
my $previous = $self->{CS_SEQ};
if( defined $value || ! defined $previous ) {
View
43 Bio/SearchIO/hmmer2.pm
@@ -113,6 +113,7 @@ BEGIN {
'Hsp_hitgaps' => 'HSP-hit_gaps',
'Hsp_querygaps' => 'HSP-query_gaps',
'Hsp_qseq' => 'HSP-query_seq',
+ 'Hsp_csline' => 'HSP-cs_seq',
'Hsp_hseq' => 'HSP-hit_seq',
'Hsp_midline' => 'HSP-homology_seq',
'Hsp_align-len' => 'HSP-hsp_length',
@@ -381,14 +382,10 @@ sub next_result {
$count = 0;
my %domaincounter;
my $second_tier = 0;
+ my $csline = '';
while ( defined( $_ = $self->_readline ) ) {
- next if ( /^Align/o
- || ( $count != 1 && /^\s+RF\s+[x\s]+$/o )
- );
-
- # fix for bug 2632
- next if ($_ =~ m/^\s+CS\s+/o && $count == 0);
+ next if ( /^Align/o );
if ( m/^Histogram/o
|| m!^//!o
@@ -560,7 +557,13 @@ sub next_result {
# accumulates all the of the alignment lines into
# three array slots and then tests for the
# end of the line
- if (/^(\s+ \*->) (\S+)/ox) {
+ if ($_ =~ m/^\s+(?:CS|RF)\s+/o && $count == 0) {
+ # Buffer the CS line now and process it later at
+ # midline point, where $prelength and width will be known
+ $csline = $_;
+ next;
+ }
+ elsif (/^(\s+ \*->) (\S+)/ox) {
# start of domain
$prelength = CORE::length($1);
$width = 0;
@@ -652,10 +655,19 @@ sub next_result {
$self->element(
{
'Name' => 'Hsp_midline',
- 'Data' =>
- substr( $_, $prelength, $width )
+ 'Data' => substr( $_, $prelength, $width )
}
);
+ if ($csline ne '') {
+ $self->element(
+ {
+ 'Name' => 'Hsp_csline',
+ 'Data' => substr( $csline, $prelength, $width )
+
+ }
+ );
+ $csline = '';
+ }
}
else {
$self->element(
@@ -664,6 +676,15 @@ sub next_result {
'Data' => substr( $_, $prelength )
}
);
+ if ($csline ne '') {
+ $self->element(
+ {
+ 'Name' => 'Hsp_csline',
+ 'Data' => substr( $csline, $prelength )
+ }
+ );
+ $csline = '';
+ }
}
}
elsif ( $count == 2 ) {
@@ -857,7 +878,7 @@ sub end_element {
# Hsp are sort of weird, in that they end when another
# object begins so have to detect this in end_element for now
if ( $nm eq 'Hsp' ) {
- foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
+ foreach (qw(Hsp_csline Hsp_qseq Hsp_midline Hsp_hseq)) {
my $data = $self->{'_last_hspdata'}->{$_};
if ($data && $_ eq 'Hsp_hseq') {
# replace hmm '.' gap symbol by '-'
@@ -975,7 +996,7 @@ sub characters {
my ( $self, $data ) = @_;
if ( $self->in_element('hsp')
- && $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o
+ && $data->{'Name'} =~ /Hsp\_(?:qseq|hseq|csline|midline)/o
&& defined $data->{'Data'} )
{
$self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
View
4 Bio/SearchIO/hmmer3.pm
@@ -1019,7 +1019,7 @@ sub end_element {
# Hsp are sort of weird, in that they end when another
# object begins so have to detect this in end_element for now
if ( $nm eq 'Hsp' ) {
- foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
+ foreach (qw(Hsp_csline Hsp_qseq Hsp_midline Hsp_hseq Hsp_pline)) {
my $data = $self->{'_last_hspdata'}->{$_};
if ( $data && $_ eq 'Hsp_hseq' ) {
@@ -1116,7 +1116,7 @@ sub characters {
my ( $self, $data ) = @_;
if ( $self->in_element('hsp')
- && $data->{'Name'} =~ /Hsp\_(qseq|hseq|csline|pline|midline)/o
+ && $data->{'Name'} =~ /Hsp\_(?:qseq|hseq|csline|pline|midline)/o
&& defined $data->{'Data'} )
{
$self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
View
59 t/SearchIO/hmmer.t
@@ -7,7 +7,7 @@ BEGIN {
use lib '.';
use Bio::Root::Test;
- test_begin( -tests => 740 );
+ test_begin( -tests => 748 );
use_ok('Bio::SearchIO');
}
@@ -121,9 +121,9 @@ while ( $result = $searchio->next_result ) {
length( $hsp->homology_string ),
'Check if query string and homology string have an equal length'
);
- # Hmmpfam don't have PP or CS strings, these are tests to check for side effects
- is( $hsp->posterior_string, '' );
- is( $hsp->consensus_structure, '' );
+ # This Hmmpfam don't have PP or CS strings, these are tests to check for side effects
+ is( $hsp->posterior_string, '' );
+ is( $hsp->consensus_string, '' );
}
}
if ( defined( $hit = $result->next_model ) ) {
@@ -552,8 +552,8 @@ while ( $result = $searchio->next_result ) {
'Check for query string'
);
# Hmmsearch2 don't have PP or CS strings, these are tests to check for side effects
- is( $hsp->posterior_string, '' );
- is( $hsp->consensus_structure, '' );
+ is( $hsp->posterior_string, '' );
+ is( $hsp->consensus_string, '' );
$hit = $result->next_hit;
is( $hit->name, 'CATL_HUMAN', 'Check hit name' );
@@ -562,18 +562,41 @@ while ( $result = $searchio->next_result ) {
float_is( $hit->significance, 6.1e-134, 'Check hit significance' );
}
-# test for bug 2632 - CS lines should get ignored without breaking the parser
+# test for bug 2632 - CS lines are captured without breaking the parser
$searchio = Bio::SearchIO->new(
-format => 'hmmer',
-file => test_input_file('hmmpfam_cs.out')
);
-$result = $searchio->next_result;
-my $hit = $result->next_hit;
-my $hsp = $hit->next_hsp;
-is( $hsp->seq_str,
- 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES',
- 'Check for hsp seq_str'
-);
+if (defined ($result = $searchio->next_result) ) {
+ my $hit = $result->next_hit;
+ my $hsp = $hit->next_hsp;
+
+ is ($hsp->seq_str, $hsp->query_string);
+ is (length($hsp->seq_str), length($hsp->query_string));
+ is (length($hsp->homology_string), length($hsp->query_string));
+ is (length($hsp->consensus_string), length($hsp->query_string));
+
+ is( $hsp->consensus_string,
+ 'EEEEEEEEETSSHSBHHHHHHHHHHHHHGGGGSSCSTTSSCECEEEEEEECTCCCHHHHHHHCT----S GC-EEEEEEE-SSHHHHHHHHHHHHHHHHHTT-EEEEEEE--B-GGGS-HHHHHC--EEEEEEEE-TT--HHHHHHCEEEEECHSCHHHHTHHH. BEEEEEESSEEEEEECC-GGGHHHHBHGGGSTTEEBSEEEEEECESSSSSCTGGGSSCEEECCCTTCEEEEEEEEETTTHHHHHHHHHHTSCCCSSTTCGHHHHCC-SSS-TTSCHHHHHHHHHHHHHHTT--HHHHHHHHS----TT-GGGTST-HHHHHHHHHHHHHHCCHCCEEEEEEETSSEEEEEEETTTSCESEEEEEEEEEE.TTEEEEEESSC',
+ 'Check for consensus structure string'
+ );
+ is( $hsp->seq_str,
+ 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES',
+ 'Check for hsp seq_str'
+ );
+ is( $hsp->query_string,
+ 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES',
+ 'Check for query string'
+ );
+ is( $hsp->hit_string,
+ 'CGvlGfiAhikgkpshkivedaleaLerLeHRGavgADgktGDGAGIltqiPdgFFrevakelGieLpe-gqYAVGmvFLPqdelaraearkifEkiaeeeGLeVLGWReVPvnnsvLGetAlatePvIeQvFvgapsgdgedfErrLyviRkrieksivaenvn----fYiCSLSsrTIVYKGMLtseQLgqFYpDLqderfeSalAivHsRFSTNTfPsWplAQPfRVnslwgggivlAHNGEINTlrgNrnwMraRegvlksplFgddldkLkPIvneggSDSaalDnvlEllvraGRslpeAlMMlIPEAWqnnpdmdkdrpekraFYeylsglmEPWDGPAalvftDGryavgAtLDRNGLTRPaRygiTrdldkDglvvvaSEa',
+ 'Check for hit string'
+ );
+ is( $hsp->homology_string,
+ 'CGv GfiA+ ++ ++hkiv +aleaL+++eHRGa++AD ++GDGAGI t+iP+++F++ ++++i++ ++ +VGm+FLP l+ + i+E +++ee+Le++GWR VP+ +vLG++A + P++eQvF+ +++ +++ +E++L+++Rk+iek+i+ + + ++fYiCSLS++TIVYKGM++s++LgqFY+DL++++++S++Ai+H+RFSTNT+P+WplAQP+R ++ HNGEINTl gN nwM++Re +l+s++++d++++LkPI n+++SDSa+lD ++Ell+++GRs++eAlM+l+PEA+qn+pd +++e+ +FYey+sgl+EPWDGPA++vft+G++ +gAtLDRNGL RPaRy+iT kD+lv+v+SE+',
+ 'Check for homology string'
+ );
+}
# Tests for hmmer3 output here
$searchio = Bio::SearchIO->new(
@@ -876,7 +899,7 @@ while ( $result = $searchio->next_result ) {
is (length($hsp->homology_string), length($hsp->query_string));
- is( $hsp->consensus_structure,
+ is( $hsp->consensus_string,
'',
'Check for consensus structure string'
);
@@ -975,7 +998,7 @@ while ( $result = $searchio->next_result ) {
is (length($hsp->homology_string), length($hsp->query_string));
- is( $hsp->consensus_structure,
+ is( $hsp->consensus_string,
'S-TTEEEEEEEETTSEEEEEEEESTTS-HHHHHHHHHHHHHHHGGGS-HHHHHH-EEEEEEECCECEEEEEEESSSTS-HHHHHHHHHHCTHHHHHTSTTEEEEEESS.--EEEEEEE-HHHHHCTT--HHHHHHHHHHHSSB-EEEECTT-SB-EEEE-SB---SCCHHCT-EEEETTSEEEEHHHCEEEEEEESSSS-EEEETTCEEEEEEEEEETTSBHHHHHHHHHHHHHCCGGGSSTTEEEEEEEESHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHSSHCCCHHHHHHHHHHHHHHHHHHHHTT--EEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCSS-HHHHHHHHHHHHCCHHHHHHHHHHHHCCGGGGSBHHHHHHHHHHHHHHHHHHHHHHHHHHCCHHHHHHHCS----TT-CC..............................CHHHHHHHHHHHHHHHHHHHHHHHHHSCHHHHHHHHHHHHH.HHHHHCCS-BESS----TSEEEEEEE-STTC-HHHHHHHHHHHHHHHH...TTTTEEEEEEEESESSSS..E........CTTEEEEEEEE--CTTS-SCCCSHHHHHHHHHHHC.CTSTSSEEEEEE-SSSCCCSSSSSEEEEEEE.TSSSCHHHHHHHHHHHHHHHCCSTTEECEEESS-S-EEEEEEEE-HHHHHHCTB-HHHHHHHHHHHHT-..EEEEEEEETTE...EEEEEEEE-GGGSSSGGGGCC-EEEETTSE.EEECGGCEEEEEEEE-SEEEEETTCEEEEEEEEESTTS...-HHHHHHHHHHCCTT..SSTTEEEEEECHHHHHHHHCCCHHHHHHHHHHHHHHHHHHHCTSSSTCHHHHTTHHHHHHHHHHHHHHTT--BSHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCTTTBHHHHHHHHHHHHCHHHHHHHHHHHHHCCHHHHTT-STTHHHHHHHHHHHHHHHHHHHHCHHHHHHHHHHHHH',
'Check for consensus structure string'
);
@@ -1395,7 +1418,7 @@ is( $result->num_hits(), 2, 'Check num_hits' );
is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), '1.000' );
is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.981 );
- is( $hsp->consensus_structure,
+ is( $hsp->consensus_string,
'',
'Check for consensus structure string'
);
@@ -1460,7 +1483,7 @@ is( $result->num_hits(), 2, 'Check num_hits' );
is (length($hsp->homology_string), length($hsp->query_string));
- is( $hsp->consensus_structure,
+ is( $hsp->consensus_string,
'',
'Check for consensus structure string'
);
Please sign in to comment.
Something went wrong with that request. Please try again.