Skip to content
This repository
Browse code

handle pairwise and parameterize the gap size cutoff

  • Loading branch information...
commit 57fd3900fb4ae5c086237031d46a44d8d01f441a 1 parent 4004d72
Jason Stajich authored
12  conservation_profile/MSA_scan_for_large_indels.pl
@@ -3,9 +3,11 @@
3 3
 use Bio::AlignIO;
4 4
 use Getopt::Long;
5 5
 
  6
+my $Gapchar = '-';
6 7
 my $Min_gapsize = 40;
7 8
 my $Max_gapsize = 10000;
8 9
 my $Max_Npercent = 0.50;
  10
+my $ignoremono = 1;
9 11
 
10 12
 my $alndir = 'alignments';
11 13
 my $ref_genome;
@@ -19,6 +21,10 @@
19 21
 	   'n|names:s' => \$namesfile,
20 22
 	   'd|dir:s' => \$alndir,
21 23
 	   'v|verbose!' => \$debug,
  24
+	   'min:i'      => \$Min_gapsize,
  25
+           'max:i'      => \$Max_gapsize,
  26
+	   'p|percent:s'=> \$Max_Npercent,
  27
+	   'pairwise!' => sub { $ignoremono = 0 },
22 28
 	   );
23 29
 die"must have refgenome with -r or --ref\n" unless $ref_genome;
24 30
 
@@ -96,16 +102,16 @@
96 102
 		my $ch = substr($s,0,1,'');
97 103
 		$alleles{$ch}++;
98 104
 		if( $si != $order ) {
99  
-		    $allgaps{$i}++ if $ch eq '-';
  105
+		    $allgaps{$i}++ if $ch eq $Gapchar;
100 106
 		}
101 107
 		$si++;
102 108
 	    }
103  
-	    next if keys %alleles == 1;	# ignore where monomorphic
  109
+	    next if $ignoremono && keys %alleles == 1;	# ignore where monomorphic
104 110
 	    if( ! exists $alleles{'-'} ) {
105 111
 		$snv{$i} = join(",",keys %alleles); # store the SNP alleles
106 112
 	    }
107 113
 	}
108  
-	my @gaps = sort { $a <=> $b } grep { $allgaps{$_} > 1 } keys %allgaps;
  114
+	my @gaps = sort { $a <=> $b } grep { ! $ignoremono || $allgaps{$_} > 1 } keys %allgaps;
109 115
 	my @snps = sort { $a <=> $b } keys %snv;
110 116
 	my @collapse_gaps = &collapse_nums(@gaps); # run the collapse algorithm to get runs of GAPs
111 117
 	warn("collapse_gaps = @collapse_gaps\n") if $debug;

0 notes on commit 57fd390

Please sign in to comment.
Something went wrong with that request. Please try again.