Permalink
Switch branches/tags
tag-ensembl-stable-061 start snapshot-at-head-of-07-branch release-ensembl-06 release-06 release-06-2 release-1_01 release-1-7-1 release-1-7-0 release-1-7-0-RC6 release-1-7-0-RC5 release-1-7-0-RC4 release-1-6-zenodo release-1-6-924 release-1-6-923 release-1-6-922 release-1-6-921 release-1-6-920 release-1-6-910 release-0-9-3 release-0-9-2 release-0-9-0 release-0-7-2 release-0-7-1 release-0-7-0 release-0-05 release-0-05-1 release-0-04-4 release-0-04-3 release-0-04-2 release-0-04-1 prerelease-06 ontology-overhaul-start ontology-overhaul-end ontology-fix1 lightweight_feature join-0-04-to-0-05 gbrowse_1_65 for_gmod_0_003 bioperl-run-release-1-2-0 bioperl-release-1-6 bioperl-release-1-6-901 bioperl-release-1-6-9 bioperl-release-1-6-1 bioperl-release-1-5-2 bioperl-release-1-5-2-patch2 bioperl-release-1-5-2-patch1 bioperl-release-1-5-1 bioperl-release-1-5-1-rc4 bioperl-release-1-5-0 bioperl-release-1-5-0-rc2 bioperl-release-1-5-0-rc1 bioperl-release-1-4-0 bioperl-release-1-2-3 bioperl-release-1-2-2 bioperl-release-1-2-1 bioperl-release-1-2-0 bioperl-release-1-1-0 bioperl-release-1-0-2 bioperl-release-1-0-1 bioperl-release-1-0-0 bioperl-devel-1-3-04 bioperl-devel-1-3-03 bioperl-devel-1-3-02 bioperl-devel-1-3-01 bioperl-devel-1-1-1 bioperl-061-pre1 bioperl-06-1 bioperl-1-6-RC4 bioperl-1-6-RC3_15392 bioperl-1-6-RC3 bioperl-1-6-RC2_15306 bioperl-1-6-RC2 bioperl-1-6-RC1 bioperl-1-6-0_006 bioperl-1-6-0_005 bioperl-1-6-0_004 bioperl-1-6-0_003 bioperl-1-6-0_002 bioperl-1-6-0_001 bioperl-1-2-1-rc1 bioperl-1-0-alpha2-rc bioperl-1-0-alpha bioperl-1-0-0 before-05-to-06-trunk before-05-to-06-merge after004 after-05-06-merge after-05-06-merge-2
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
149 lines (107 sloc) 3.24 KB
#!perl
use strict;
use warnings;
use Bio::SeqIO;
use Bio::Tools::SeqStats;
use Getopt::Long;
my $format = 'fasta';
my $file;
my $help =0;
my $aggregate;
GetOptions(
'f|format:s' => \$format,
'i|in:s' => \$file,
'h|help|?' => \$help,
'a|aggregate!'=> \$aggregate,
);
my $USAGE = "usage: gccalc.pl -f format -i filename\n";
if( $help ) {
die $USAGE;
}
$file = shift unless $file;
my $seqin;
if( defined $file ) {
print "Could not open file [$file]\n$USAGE" and exit unless -e $file;
$seqin = new Bio::SeqIO(-format => $format,
-file => $file);
} else {
$seqin = new Bio::SeqIO(-format => $format,
-fh => \*STDIN);
}
my ($total_base, $total_gc);
while( my $seq = $seqin->next_seq ) {
next if( $seq->length == 0 );
if( $seq->alphabet eq 'protein' ) {
warn("gccalc does not work on amino acid sequences ...skipping this seq");
next;
}
my $seq_stats = Bio::Tools::SeqStats->new('-seq'=>$seq);
my $hash_ref = $seq_stats->count_monomers(); # for DNA sequence
print "Seq: ", $seq->display_id, " ";
print $seq->desc if $seq->desc;
print " Len:", $seq->length, "\n";
$total_base += $seq->length;
$total_gc += $hash_ref->{'G'} + $hash_ref->{'C'};
printf "GC content is %.4f\n", ($hash_ref->{'G'} + $hash_ref->{'C'}) /
$seq->length();
foreach my $base (sort keys %{$hash_ref}) {
print "Number of bases of type ", $base, "= ", $hash_ref->{$base},"\n";
}
print "--\n";
}
if( $aggregate ) {
printf "Total GC content is %.4f out of %d bases\n", $total_gc / $total_base, $total_base;
}
# alternatively one could use code submitted by
# cckim@stanford.edu
sub calcgc {
my $seq = $_[0];
my @seqarray = split('',$seq);
my $count = 0;
foreach my $base (@seqarray) {
$count++ if $base =~ /[G|C]/i;
}
my $len = $#seqarray+1;
return $count / $len;
}
__END__
=head1 NAME
bp_gccalc - GC content of nucleotide sequences
=head1 SYNOPSIS
bp_gccalc [-f/--format FORMAT] [-h/--help] filename
or
bp_gccalc [-f/--format FORMAT] < filename
or
bp_gccalc [-f/--format FORMAT] -i filename
=head1 DESCRIPTION
This scripts prints out the GC content for every nucleotide sequence
from the input file.
=head1 OPTIONS
The default sequence format is fasta.
The sequence input can be provided using any of the three methods:
=over 3
=item unnamed argument
bp_gccalc filename
=item named argument
bp_gccalc -i filename
=item standard input
bp_gccalc < filename
=back
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via the
web:
https://github.com/bioperl/bioperl-live/issues
=head1 AUTHOR - Jason Stajich
Email jason@bioperl.org
=head1 HISTORY
Based on script code (see bottom) submitted by cckim@stanford.edu
Submitted as part of bioperl script project 2001/08/06
=cut