Permalink
Browse files

Added autodetection of file format in Bio::SeqIO::MultiFile

  • Loading branch information...
1 parent b77eb5d commit 6f4888b002e3e9c13049231e94da2d47d1413bb7 @fangly fangly committed Feb 19, 2014
Showing with 101 additions and 107 deletions.
  1. +75 −92 Bio/SeqIO/MultiFile.pm
  2. +2 −0 Changes
  3. +24 −15 t/SeqIO/MultiFile.t
View
167 Bio/SeqIO/MultiFile.pm
@@ -27,6 +27,7 @@ Bio::SeqIO::MultiFile - Treating a set of files as a single input stream
Bio::SeqIO::MultiFile provides a simple way of bundling a whole
set of identically formatted sequence input files as a single stream.
+File format is automatically determined by C<Bio::SeqIO>.
=head1 FEEDBACK
@@ -81,33 +82,26 @@ use base qw(Bio::SeqIO);
# _initialize is where the heavy stuff will happen when new is called
sub _initialize {
- my($self,@args) = @_;
-
- $self->SUPER::_initialize(@args);
-
- my ($file_array,$format) = $self->_rearrange([qw(
- FILES
- FORMAT
- )],
- @args,
- );
- if( !defined $file_array || ! ref $file_array ) {
- $self->throw("Must have an array files for MultiFile");
- }
-
- if( !defined $format ) {
- $self->throw("Must have a format for MultiFile");
- }
-
- $self->{'_file_array'} = [];
-
- $self->_set_file(@$file_array);
- $self->_format($format);
- if( $self->_load_file() == 0 ) {
- $self->throw("Unable even to initialise the first file");
- }
+ my($self, @args) = @_;
+
+ $self->SUPER::_initialize(@args);
+
+ my ($file_array, $format) = $self->_rearrange([qw(FILES FORMAT)], @args);
+ if( !defined $file_array || ! ref $file_array ) {
+ $self->throw("Must have an array files for MultiFile");
+ }
+
+ $self->{'_file_array'} = [];
+ $self->_set_file(@$file_array);
+
+ $self->format($format) if defined $format;
+
+ if( $self->_load_file() == 0 ) {
+ $self->throw("Unable to initialise the first file");
+ }
}
+
=head2 next_seq
Title : next_seq
@@ -117,25 +111,23 @@ sub _initialize {
Returns :
Args :
-
=cut
sub next_seq{
- my ($self,@args) = @_;
-
- my $seq = $self->_current_seqio->next_seq();
- if( !defined $seq ) {
- if( $self->_load_file() == 0) {
- return;
- } else {
- return $self->next_seq();
- }
- } else {
- return $seq;
- }
-
+ my ($self, @args) = @_;
+ my $seq = $self->_current_seqio->next_seq();
+ if( !defined $seq ) {
+ if( $self->_load_file() == 0) {
+ return;
+ } else {
+ return $self->next_seq();
+ }
+ } else {
+ return $seq;
+ }
}
+
=head2 next_primary_seq
Title : next_primary_seq
@@ -145,25 +137,23 @@ sub next_seq{
Returns :
Args :
-
=cut
sub next_primary_seq{
- my ($self,@args) = @_;
-
- my $seq = $self->_current_seqio->next_primary_seq();
- if( !defined $seq ) {
- if( $self->_load_file() == 0) {
- return;
- } else {
- return $self->next_primary_seq();
- }
- } else {
- return $seq;
- }
-
+ my ($self, @args) = @_;
+ my $seq = $self->_current_seqio->next_primary_seq();
+ if( !defined $seq ) {
+ if( $self->_load_file() == 0) {
+ return;
+ } else {
+ return $self->next_primary_seq();
+ }
+ } else {
+ return $seq;
+ }
}
+
=head2 _load_file
Title : _load_file
@@ -173,26 +163,32 @@ sub next_primary_seq{
Returns :
Args :
-
=cut
sub _load_file{
- my ($self,@args) = @_;
-
- my $file = shift(@{$self->{'_file_array'}});
- if( !defined $file ) {
- return 0;
- }
- my $seqio = Bio::SeqIO->new( '-format' => $self->_format(), -file => $file);
- # should throw an exception - but if not...
- if( !defined $seqio) {
- $self->throw("no seqio built for $file!");
- }
+ my ($self, @args) = @_;
+ my $file = shift @{$self->{'_file_array'}};
+ if( !defined $file ) {
+ return 0;
+ }
+ my $seqio;
+ my $format = $self->format;
+ if ($format) {
+ $seqio = Bio::SeqIO->new( -file => $file, -format => $format );
+ } else {
+ $seqio = Bio::SeqIO->new( -file => $file );
+ $self->format($seqio->format) if not $format;
+ }
- $self->_current_seqio($seqio);
- return 1;
+ # should throw an exception - but if not...
+ if( !defined $seqio) {
+ $self->throw("Could not build SeqIO object for $file!");
+ }
+ $self->_current_seqio($seqio);
+ return 1;
}
+
=head2 _set_file
Title : _set_file
@@ -202,16 +198,14 @@ sub _load_file{
Returns :
Args :
-
=cut
sub _set_file{
- my ($self,@files) = @_;
-
- push(@{$self->{'_file_array'}},@files);
-
+ my ($self, @files) = @_;
+ push @{$self->{'_file_array'}}, @files;
}
+
=head2 _current_seqio
Title : _current_seqio
@@ -221,37 +215,26 @@ sub _set_file{
Returns : value of _current_seqio
Args : newvalue (optional)
-
=cut
sub _current_seqio{
- my ($obj,$value) = @_;
- if( defined $value) {
- $obj->{'_current_seqio'} = $value;
+ my ($obj, $value) = @_;
+ if( defined $value) {
+ $obj->{'_current_seqio'} = $value;
}
return $obj->{'_current_seqio'};
-
}
-=head2 _format
-
- Title : _format
- Usage : $obj->_format($newval)
- Function:
- Example :
- Returns : value of _format
- Args : newvalue (optional)
-
-=cut
+# We overload the format() method of Bio::Root::IO by a simple get/set
-sub _format{
- my ($obj,$value) = @_;
- if( defined $value) {
- $obj->{'_format'} = $value;
+sub format{
+ my ($obj, $value) = @_;
+ if( defined $value) {
+ $obj->{'_format'} = $value;
}
return $obj->{'_format'};
-
}
+
1;
View
2 Changes
@@ -48,6 +48,8 @@ CPAN releases are branched from 'master'.
'-bits' and '-hit_filter' will now work with other Bio::SearchIO formats
besides Blast, instead of being ignored. Added tests for all moved methods
using HMMER outputs and run the full test suite and everything pass [fjossandon]
+ * Bio::SeqIO::MultiFile
+ - Autodetection of file format [fangly]
* Bio::Tools::GuessSeqFormat:
- Format detection from non-seekable filehandles such as STDIN [fangly]
View
39 t/SeqIO/MultiFile.t
@@ -4,26 +4,35 @@
use strict;
BEGIN {
- use lib '.';
+ use lib '.';
use Bio::Root::Test;
test_begin(-tests => 3);
-
- use_ok('Bio::SeqIO::MultiFile');
+
+ use_ok 'Bio::SeqIO::MultiFile';
}
my $verbose = test_debug();
-my $mf = Bio::SeqIO::MultiFile->new(-format => 'Fasta' ,
- -verbose => $verbose,
- -files =>
- [ test_input_file('multi_1.fa'),
- test_input_file('multi_2.fa')]);
-ok defined $mf;
+
+# Test multiple files, with a specified format
+ok my $mf = Bio::SeqIO::MultiFile->new(
+ -format => 'Fasta' ,
+ -verbose => $verbose,
+ -files => [ test_input_file('multi_1.fa'), test_input_file('multi_2.fa')],
+);
+
my $count = 0;
-eval {
- while (my $seq = $mf->next_seq() ) {
- $count++;
- }
-};
-is( $count,12 );
+while (my $seq = $mf->next_seq() ) {
+ $count++;
+}
+is $count, 12;
+
+
+# Automatically determine format
+ok $mf = Bio::SeqIO::MultiFile->new(
+ -verbose => $verbose,
+ -files => [ test_input_file('multi_1.fa'), test_input_file('multi_2.fa')],
+);
+
+is $mf->format, 'fasta';

0 comments on commit 6f4888b

Please sign in to comment.