-
Notifications
You must be signed in to change notification settings - Fork 0
/
genebody.pl
55 lines (43 loc) · 1.69 KB
/
genebody.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/perl
use strict;
use warnings;
my $genebody_dir = "$ENV{HOME}/automation/coordinate_sorted_bams/rseqc/genebodycoverage";
my $bam_path = "$ENV{HOME}/automation/coordinate_sorted_bams";
chdir $bam_path or die "Cannot change to directory $bam_path: $!";
print "Successfully changed to directory: $bam_path\n";
# Getting all the files matching the pattern *Aligned.sortedByCoord.out.bam
my @files = glob("*Aligned.sortedByCoord.out.bam");
my $total_count = 0;
my $file_count = 0;
my $average_count;
foreach my $file (@files) {
my $count = `samtools view -c $file`;
chomp $count;
print "Count for file $file is: $count\n";
if ($count =~ /^\d+$/) {
$total_count += int($count);
$file_count++;
} else {
print "Non-numeric count for file $file: $count\n";
}
}
if ($file_count > 0) {
$average_count = $total_count / $file_count;
print "Average count of alignments: $average_count\n";
# Calculate subsampling fraction here, inside the conditional block
my $s = 200000 / $average_count;
foreach my $file (@files) {
(my $base = $file) =~ s/Aligned\.sortedByCoord\.out\.bam//; # substitute the pattern found in between "/ / with nothing"
system("samtools view -s $s -o ${base}Aligned.sortedByCoord.out_subset.bam $file");
print "Successfully subsampled a proportion of aligned reads for $file\n";
}
} else {
print "No files processed, unable to calculate average.\n";
exit;
}
my @subsetfiles = glob("*Aligned.sortedByCoord.out_subset.bam");
foreach my $file (@subsetfiles) {
my $subset_count = `samtools view -c $file`;
chomp $subset_count;
print "Number of alignments in $file is: $subset_count\n";
}