Permalink
Browse files

Add support for version 4, all tests pass

  • Loading branch information...
1 parent d5c7e75 commit 251eb6a96669f9078ce94759b5ebeaf17d895eed @bosborne bosborne committed Mar 26, 2013
View
@@ -35,9 +35,9 @@ the central portion of the "site" field in the meme file. The start
and end coordinates are derived from the "Start" field. See
L<Bio::SimpleAlign> and L<Bio::LocatableSeq> for more information.
-This module can only parse MEME version 3.0 and greater. Previous
+This module can only parse MEME version 3 and 4. Previous
versions have output formats that are more difficult to parse
-correctly. If the meme output file is not version 3.0 or greater,
+correctly. If the meme output file is not version 3.0 or greater
we signal an error.
=head1 FEEDBACK
@@ -84,11 +84,10 @@ use base qw(Bio::AlignIO);
# Constants
my $MEME_VERS_ERR =
-"MEME output file must be generated by version 3.0 or higher";
+ "MEME output file must be generated by version 3.0 or higher";
my $MEME_NO_HEADER_ERR =
-"MEME output file contains no header line (ex: MEME version 3.0)";
-my $HTML_VERS_ERR =
-"MEME output file must be generated with the -text option";
+ "MEME output file contains no header line (ex: MEME version 3.0)";
+my $HTML_VERS_ERR = "MEME output file must be generated with the -text option";
=head2 next_aln
@@ -102,87 +101,107 @@ my $HTML_VERS_ERR =
=cut
sub next_aln {
- my ($self) = @_;
- my $aln = Bio::SimpleAlign->new(-source => 'meme');
- my $line;
- my $good_align_sec = 0;
- my $in_align_sec = 0;
- my $evalue;
- while (!$good_align_sec && defined($line = $self->_readline())) {
- if (!$in_align_sec) {
- # Check for the meme header
- if ($line =~ /^\s*MEME\s+version\s+(\S+)/ ){
- $self->{'meme_vers'} = $1;
+ my ($self) = @_;
+ my $aln = Bio::SimpleAlign->new( -source => 'meme' );
+ my $line;
+ my $good_align_sec = 0;
+ my $in_align_sec = 0;
+ my $evalue;
+ while ( !$good_align_sec && defined( $line = $self->_readline() ) ) {
+ if ( !$in_align_sec ) {
+
+ # Check for the meme header
+ if ( $line =~ /^\s*MEME\s+version\s+(\S+)/ ) {
+ $self->{'meme_vers'} = $1;
my ($vers) = $self->{'meme_vers'} =~ /^(\d)/;
- $self->throw($MEME_VERS_ERR) unless ($vers >= 3);
- $self->{'seen_header'} = 1;
- }
- # Check if they've output the HTML version
- if ($line =~ /\<TITLE\>/i){
- $self->throw($HTML_VERS_ERR);
- }
- # Grab the evalue
- if ($line =~ /MOTIF\s+\d+\s+width.+E-value = (\S+)/) {
- $self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'});
- $evalue = $1;
- }
- # Check if we're going into an alignment section
- if ($line =~ /sites sorted by position/) {
- $self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'});
- $in_align_sec = 1;
- }
- } elsif ($line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
- (\S+)\s+([.ACTGNX\-]*)\s+([ACTGNX\-]+)\s+
- ([.ACTGNX\-]*)/xi ) {
- # Got a sequence line
- my $seq_name = $1;
- my $strand = ($2 eq '-') ? -1 : 1;
- my $start_pos = $3;
- my $central = uc($6);
-
- # my $p_val = $4;
- # my $left_flank = uc($5);
- # my $right_flank = uc($7);
-
- # Info about the flanking sequence
- # my $start_len = ($strand > 0) ? length($left_flank) :
- # length($right_flank);
- # my $end_len = ($strand > 0) ? length($right_flank) :
- # length($left_flank);
-
- # Make the sequence. Meme gives the start coordinate at the left
- # hand side of the motif relative to the INPUT sequence.
- my $end_pos = $start_pos + length($central) - 1;
- my $seq = Bio::LocatableSeq->new
- ('-seq' => $central,
- '-display_id' => $seq_name,
- '-start' => $start_pos,
- '-end' => $end_pos,
- '-strand' => $strand,
- '-alphabet' => $self->alphabet,
- );
- # Add the sequence motif to the alignment
- $aln->add_seq($seq);
- } elsif (($line =~ /^\-/) || ($line =~ /Sequence name/)){
- # These are acceptable things to be in the site section
- } elsif ($line =~ /^\s*$/){
- # This ends the site section
- $in_align_sec = 0;
- $good_align_sec = 1;
- } else{
- $self->warn("Unrecognized format:\n$line");
- return 0;
- }
- }
- # Signal an error if we didn't find a header section
- $self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'});
-
- if ($good_align_sec) {
- $aln->score($evalue);
- return $aln;
- }
-
- return;
+ $self->throw($MEME_VERS_ERR) unless ( $vers >= 3 );
+ $self->{'seen_header'} = 1;
+ }
+
+ # Check if they've output the HTML version
+ if ( $line =~ /\<TITLE\>/i ) {
+ $self->throw($HTML_VERS_ERR);
+ }
+
+ # Grab the evalue
+ if ( $line =~ /MOTIF\s+\d+\s+width.+E-value = (\S+)/ ) {
+ $self->throw($MEME_NO_HEADER_ERR)
+ unless ( $self->{'seen_header'} );
+ $evalue = $1;
+ }
+
+ # Check if we're going into an alignment section
+ if ( $line =~ /sites sorted by position/ ) {
+ $self->throw($MEME_NO_HEADER_ERR)
+ unless ( $self->{'seen_header'} );
+ $in_align_sec = 1;
+ }
+ }
+ # The first regexp is for version 3, the second is for version 4
+ elsif ( $line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
+ \S+\s+[.ACTGNX\-]*\s+([ACTGNX\-]+)\s+
+ ([.ACTGNX\-]*)/xi
+ ||
+ $line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
+ \S+\s+\.\s+([ACTGNX\-]+)/xi
+ )
+ {
+ # Got a sequence line
+ my $seq_name = $1;
+ my $strand = ( $2 eq '-' ) ? -1 : 1;
+ my $start_pos = $3;
+ my $central = uc($4);
+
+ # my $p_val = $4;
+ # my $left_flank = uc($5);
+ # my $right_flank = uc($7);
+
+ # Info about the flanking sequence
+ # my $start_len = ($strand > 0) ? length($left_flank) :
+ # length($right_flank);
+ # my $end_len = ($strand > 0) ? length($right_flank) :
+ # length($left_flank);
+
+ # Make the sequence. Meme gives the start coordinate at the left
+ # hand side of the motif relative to the INPUT sequence.
+ my $end_pos = $start_pos + length($central) - 1;
+ my $seq = Bio::LocatableSeq->new(
+ -seq => $central,
+ -display_id => $seq_name,
+ -start => $start_pos,
+ -end => $end_pos,
+ -strand => $strand,
+ -alphabet => $self->alphabet,
+ );
+
+ # Add the sequence motif to the alignment
+ $aln->add_seq($seq);
+ }
+ elsif ( ( $line =~ /^\-/ ) || ( $line =~ /Sequence name/ ) ) {
+
+ # These are acceptable things to be in the site section
+ }
+ elsif ( $line =~ /^\s*$/ ) {
+
+ # This ends the site section
+ $in_align_sec = 0;
+ $good_align_sec = 1;
+ }
+ else {
+ $self->warn("Unrecognized format:\n$line");
+ return 0;
+ }
+ }
+
+ # Signal an error if we didn't find a header section
+ $self->throw($MEME_NO_HEADER_ERR) unless ( $self->{'seen_header'} );
+
+ if ($good_align_sec) {
+ $aln->score($evalue);
+ return $aln;
+ }
+
+ return;
}
=head2 write_aln
@@ -196,22 +215,22 @@ sub next_aln {
=cut
sub write_aln {
- my ($self,@aln) = @_;
- $self->throw_not_implemented();
+ my ( $self, @aln ) = @_;
+ $self->throw_not_implemented();
}
# ----------------------------------------
# - Private methods
# ----------------------------------------
sub _initialize {
- my($self,@args) = @_;
+ my ( $self, @args ) = @_;
- # Call into our base version
- $self->SUPER::_initialize(@args);
+ # Call into our base version
+ $self->SUPER::_initialize(@args);
- # Then initialize our data variables
- $self->{'seen_header'} = 0;
+ # Then initialize our data variables
+ $self->{'seen_header'} = 0;
}
1;
View
@@ -1,36 +1,33 @@
# -*-Perl-*- Test Harness script for Bioperl
-# $Id: meme.t 14971 2008-10-28 16:08:52Z cjfields $
use strict;
BEGIN {
use lib '.';
use Bio::Root::Test;
- test_begin(-tests => 14);
+ test_begin(-tests => 20);
use_ok('Bio::AlignIO::meme');
}
my $DEBUG = test_debug();
-my ($str,$aln,$strout,$status);
-
# MEME
-# this file has no Strand column
-$str = Bio::AlignIO->new(
- -file => test_input_file('test.meme'),
+# this file has no Strand column, and it's version 3.0
+my $str = Bio::AlignIO->new(
+ -file => test_input_file('test-3.0-1.meme'),
-format => 'meme');
isa_ok($str,'Bio::AlignIO');
-$aln = $str->next_aln();
+my $aln = $str->next_aln();
isa_ok($aln,'Bio::Align::AlignI');is $aln->length,25;
is $aln->num_sequences,4;
is $aln->get_seq_by_pos(3)->seq(),"CCTTAAAATAAAATCCCCACCACCA";
is $aln->get_seq_by_pos(3)->strand,"1";
-# this file has a Strand column
+# this file has a Strand column, also version 3.0
$str = Bio::AlignIO->new(
- -file => test_input_file('test.meme2'),
+ -file => test_input_file('test-3.0-2.meme'),
-format => 'meme');
isa_ok($str,'Bio::AlignIO');
$aln = $str->next_aln();
@@ -39,3 +36,14 @@ is $aln->num_sequences,8;
is $aln->get_seq_by_pos(8)->seq(),"CCAGTCTCCCCTGAATACCC";
is $aln->get_seq_by_pos(7)->strand,"-1";
is $aln->get_seq_by_pos(6)->strand,"1";
+
+# version 4.9
+$str = Bio::AlignIO->new(
+ -file => test_input_file('test-4.9.meme'),
+ -format => 'meme');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');is $aln->length,21;
+is $aln->num_sequences,47;
+is $aln->get_seq_by_pos(3)->seq(),"AGAGAAACAAGAGGCCTCTTT";
+is $aln->get_seq_by_pos(3)->strand,"1";
File renamed without changes.
File renamed without changes.
Oops, something went wrong.

0 comments on commit 251eb6a

Please sign in to comment.