-
Notifications
You must be signed in to change notification settings - Fork 0
/
removePrecursor.pl
64 lines (49 loc) · 1.49 KB
/
removePrecursor.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/perl
use strict;
use PerlIO::gzip;
my $file1 = shift; #pre-tRNAs bed12
my $file2 = shift; #sam
my $file3 = shift; #fq
my $flank = shift || 50; #length of added flanking region
open FILE1, "<:gzip(autopop)", "$file1" or die "can t open $file1\n";
open FILE2, "<:gzip(autopop)", "$file2" or die "can t open $file2\n";
open(FILE3, "gunzip -c $file3|") or die "Cannot open $file3\n";
my %tRNA = ();
my %read = ();
my @entry = ();
my $flankEnd = $flank - 6; # consider CCACCA tail
while(<FILE1>){
chomp $_;
my @id = split/\t/,$_;
my $idfull = "$id[3]($id[5])";
$tRNA{$idfull} = eval join '+', split/,/,$id[10]; #length
}
while(<FILE2>){
chomp $_;
my @id = split/\t/,$_;
my $start = $id[3];
my $len = length($id[9]);
my @cigar = split(/(I|M|X|D)/, $id[5]);
my $I = 0;
my $D = 0;
for(my $i=0; $i < scalar(@cigar); $i++){
if($cigar[$i] eq "I"){ $I += $cigar[$i-1];}
if($cigar[$i] eq "D"){ $D += $cigar[$i-1];}
}
my $end = $start + $len -1 + $D - $I;
if($start > $flank && $end <= $tRNA{$id[2]}-$flankEnd){
$read{$id[0]} = 0;
}
}
while(<FILE3>){
chomp;
push @entry, $_;
if (scalar(@entry) == 4) {
my ($id, $seq, $plusLine, $qual) = @entry;
@entry = ();
my @ID = split/\@/,$id;
if(exists $read{$ID[1]}){
print "$id\n$seq\n$plusLine\n$qual\n";
}
}
}