Permalink
Browse files

summarize feature type lengths from a GFF3 file

  • Loading branch information...
1 parent c8017af commit d3343978ae5e598f71c5516c8a30609d6756df4d @hyphaltip committed Oct 28, 2012
Showing with 29 additions and 0 deletions.
  1. +29 −0 seqfeature/summarize_features_len.pl
@@ -0,0 +1,29 @@
+#!/usr/bin/perl -w
+use strict;
+use List::Util qw(sum);
+use Statistics::Descriptive;
+
+# this script expects a GFF file as input
+# and that it is GFF3 with Parent and ID tags
+
+my $file = shift || die "cannot run without input file";
+
+open(my $fh => $file) || die "cannot open $file\n";
+my %types;
+while (<$fh>) {
+ next if /^\#/ || /^\s+$/;;
+ my ($chrom,$src,$type,$start,$end,$score,$strand,$frame,$info) = split;
+ my %info = map { split(/=/,$_) } split(/;/,$info);
+ my ($parent) = ($info{'Parent'} || $info{'ID'} );
+ $types{$type}->{$parent} += abs($end - $start);
+}
+
+for my $type ( keys %types ) {
+ my @lens;
+ for my $gene ( keys %{$types{$type}} ) {
+ push @lens, $types{$type}->{$gene};
+ }
+ my $stats = Statistics::Descriptive::Full->new();
+ $stats->add_data(@lens);
+ printf "%s mean=%.2f median=%d total_count=%d total_length=%d\n",$type, $stats->mean, $stats->median, $stats->count,$stats->sum;
+}

0 comments on commit d334397

Please sign in to comment.