Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
  • 3 commits
  • 1 file changed
  • 0 commit comments
  • 1 contributor
Commits on Mar 20, 2013
@jaredbischof Fixed bug where first cluster (if it was a singlet) was being printed…
… twice. Also, fixed bug where the last cluster was never being printed. Finally, reconfigured the code so the logic was sane.
4a6c2f8
@jaredbischof Added comments. 558dbf8
@jaredbischof Added back line that's needed to ignore first empty line. adff4a5
Showing with 32 additions and 20 deletions.
  1. +32 −20 bin/process_clusters
View
52 bin/process_clusters
@@ -24,13 +24,18 @@ if ( $help or !($file or $mapping or $prefix or $fasta) or !(-e $file) ){
exit(1);
}
+# This script reads the cluster output from qiime-uclust and prints out 2 files.
+# The first output file is a fasta file containing only the seed sequences.
+# The second output file is a summary of which sequences are in each cluster.
+# Note: the second output file contains one cluster summary per line.
+
open IN, "<".$file or die $!;
open MAP, ">".$mapping or die $!;
open FNA, ">".$fasta or die $!;
-my $cur_cluster = "";
-my $cur_seq_id = "";
-my $cur_seq = "";
+my $prev_cluster = "";
+my $prev_seq_id = "";
+my $prev_seq = "";
my @ids = ();
my @pids = ();
@@ -44,30 +49,37 @@ while (my $l = <IN>) {
if ($id_line =~ /^(\d+)\|(\d+\.\d\%|\*)\|(\S+)/) {
my ($cid,$pid,$seq_id) = ($1,$2,$3);
- if ($pid ne "*") {
+ # If this is the start of a new cluster, print the previous cluster.
+ if ($pid eq "*") {
+ # Checking if the previous cluster was a singlet or not. Singlets do not need to be in the mapping file.
+ if (scalar @ids) {
+ print MAP $prefix.$prev_cluster."\t".$prev_seq_id."\t".join(",",@ids)."\t".join(",",@pids)."\n";
+ print FNA ">".$prefix.$prev_cluster."\n$prev_seq\n";
+ } elsif ($prev_seq_id) {
+ print FNA ">$prev_seq_id\n$prev_seq\n";
+ }
+ $prev_cluster = $cid;
+ $prev_seq_id = $seq_id;
+ $prev_seq = $seq;
+ @ids = ();
+ @pids = ();
+ # If this is a cluster member, just add it to the list of ids.
+ } else {
push @ids, $seq_id;
push @pids, $pid;
next;
- } else {
- unless (scalar @ids) {
- if ($cur_seq_id) {
- print FNA ">$cur_seq_id\n$cur_seq\n";
- } else {
- print FNA ">$seq_id\n$seq\n";
- }
- } else {
- print MAP $prefix.$cur_cluster."\t".$cur_seq_id."\t".join(",",@ids)."\t".join(",",@pids)."\n";
- print FNA ">".$prefix.$cur_cluster."\n$cur_seq\n";
- }
}
- $cur_cluster = $cid;
- $cur_seq_id = $seq_id;
- $cur_seq = $seq;
- @ids = ();
- @pids = ();
}
}
+# Since the loop above prints the previous cluster, we need to print the last cluster here.
+if (scalar @ids) {
+ print MAP $prefix.$prev_cluster."\t".$prev_seq_id."\t".join(",",@ids)."\t".join(",",@pids)."\n";
+ print FNA ">".$prefix.$prev_cluster."\n$prev_seq\n";
+} elsif ($prev_seq_id) {
+ print FNA ">$prev_seq_id\n$prev_seq\n";
+}
+
close(IN);
close(MAP);
close(FNA);

No commit comments for this range

Something went wrong with that request. Please try again.