Merge pull request #19 from carandraug/script/extract_feature_seq

Script/extract feature seq, fixes bug#3270
bioperl · Jul 26, 2011 · 9309042 · 9309042
2 parents 39cedcd + 87ec9d5
commit 9309042
Showing 1 changed file with 52 additions and 34 deletions.
diff --git a/scripts/seq/bp_extract_feature_seq.PLS b/scripts/seq/bp_extract_feature_seq.PLS
@@ -1,6 +1,8 @@
 #!perl
 use strict;
 use warnings;
+use Bio::SeqIO;
+use Getopt::Long;
 # Author Jason Stajich <jason@bioperl.org>
 
 =head1 NAME
@@ -9,76 +11,92 @@ bp_extract_feature_seq - extract the corresponding sequence for a specified feat
 
 =head1 SYNOPSIS
 
-bp_extract_feature_seq.PLS -i file --format genbank --feature=CDS -o output.fa
+bp_extract_feature_seq [--format FORMAT] [--feature CDS] [--output FILE] [--input] FILE
 
-=head1 DESCRIPTION 
+=head1 DESCRIPTION
 
 This script will extract the sequence for all the features you specify.
 
+=head1 OPTIONS
+
+=over
+
+=item B<-i>, B<--input>
+
+Specifies the sequence file to be read.
+
+=item B<--format>
+
+Format of the file specifed by B<--input>. If not given, it will try to guess the
+correct format from the file extension.
+
+=item B<--feature>
+
+Feature to be extracted. By default, it extracts the CDS feature.
+
+=item B<-o>, B<--output>
+
+File where the extracted features will be saved. If not specified, STDOUT is used.
+
 =head1 FEEDBACK
 
 =head2 Mailing Lists
 
 User feedback is an integral part of the evolution of this and other
 Bioperl modules. Send your comments and suggestions preferably to
-the Bioperl mailing list.  Your participation is much appreciated.
+the Bioperl mailing list. Your participation is much appreciated.
 
-  bioperl-l@bioperl.org                  - General discussion
-  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+  L<bioperl-l@bioperl.org>                  - General discussion
+  L<http://bioperl.org/wiki/Mailing_lists>  - About the mailing lists
 
 =head2 Reporting Bugs
 
 Report bugs to the Bioperl bug tracking system to help us keep track
 of the bugs and their resolution. Bug reports can be submitted via
 email or the web:
 
-  https://redmine.open-bio.org/projects/bioperl/
+  L<https://redmine.open-bio.org/projects/bioperl/>
 
 =head1 AUTHOR
 
- Jason Stajich E<lt>jason-at-bioperl-dot-orgE<gt>
+ Jason Stajich <jason-at-bioperl-dot-org>
 
 =cut
 
-use Bio::SeqIO;
-use Getopt::Long;
-
 my ($input,$format,$featuretype,$output);
 $featuretype ='CDS';
 GetOptions(
-	   'i|input:s' => \$input,
-	   'format:s'  => \$format,
-	   'feature:s' => \$featuretype,
-	   'o|output:s'=> \$output);
+           'i|input:s' => \$input,
+           'format:s'  => \$format,
+           'feature:s' => \$featuretype,
+           'o|output:s'=> \$output);
 
 $input || shift if @ARGV;
 
 my $in = new Bio::SeqIO(-file => $input,
-			-format => $format);
+                        -format => $format);
 my $out;
 if ($output ) {
-    $out = new Bio::SeqIO(-file => ">$output");
+    $out = new Bio::SeqIO(-file => ">$output", -format => 'fasta');
 } else { 
-    $out = new Bio::SeqIO(); # use STDOUT for output
+    $out = new Bio::SeqIO(-format => 'fasta'); # use STDOUT for output
 }
 
 my $count = 1;
-while( my $seq = $in->next_seq ) {    
-    foreach my $f ( grep { $_->primary_tag =~ /$featuretype/i } 
-		    $seq->get_SeqFeatures ) {
-	my $s = $f->spliced_seq;
-	if( $featuretype =~ /gene|CDS/ ) {
-	    $s->display_id($f->has_tag('gene') ? join(',',sort $f->each_tag_value('gene')) :
-			   $f->has_tag('label') ? join(',',$f->each_tag_value('label')): 
-			   $s->display_id);
-	} else {
-	    $s->display_id(sprintf("%s_%s_%d",
-				   $seq->display_id, 
-				   $f->primary_tag,
-				   $count++));
-	}
-	$out->write_seq($s);
+while( my $seq = $in->next_seq ) {
+    foreach my $f ( grep { $_->primary_tag =~ /$featuretype/i }
+                    $seq->get_SeqFeatures ) {
+        my $s = $f->spliced_seq;
+        if( $featuretype =~ /gene|CDS/ ) {
+            $s->display_id($f->has_tag('gene') ? join(',',sort $f->each_tag_value('gene')) :
+                           $f->has_tag('label') ? join(',',$f->each_tag_value('label')): 
+                           $s->display_id);
+        } else {
+            $s->display_id(sprintf("%s_%s_%d",
+                                   $seq->display_id,
+                                   $f->primary_tag,
+                                   $count++));
+        }
+        $out->write_seq($s);
     }
 }
-
-__END__