Permalink
Browse files

Rename bkgdstats and related variables for cleaner code

  • Loading branch information...
1 parent 9368afc commit 0db56502419bc1e32572aa1e8a676ce757d2094d @jherrero jherrero committed May 11, 2016
Showing with 55 additions and 42 deletions.
  1. +38 −27 eForge/eForge.pm
  2. +17 −15 eforge.pl
View
@@ -49,7 +49,7 @@ our $VERSION = '0.01';
our (@ISA, @EXPORT);
use Exporter;
@ISA = qw(Exporter);
-@EXPORT = qw(get_all_datasets get_all_arrays get_all_proximity_filters process_file get_random_matching_picks process_overlaps get_probe_annotations_and_overlap_for_dataset get_samples_from_dataset assign bkgrdstat proximity_filter);
+@EXPORT = qw(get_all_datasets get_all_arrays get_all_proximity_filters process_file get_random_matching_picks process_overlaps get_probe_annotations_and_overlap_for_dataset get_samples_from_dataset assign save_probe_annotation_stats proximity_filter);
=head1 SYNOPSIS
@@ -66,36 +66,47 @@ process_overlaps
get_probe_annotations_and_overlap_for_dataset
get_samples_from_dataset
assign
-bkgrdstat
+save_probe_annotation_stats
proximity_filter
=head1 SUBROUTINES/METHODS
-=head2 bkgrdstat
-
-Gives the background statistics.
-
-=cut
-
-sub bkgrdstat{
- my ($test, $lab, $flag) = @_;
- my $bfh;
- my $file = "$lab.bkgrd.stats";
- if ($flag eq "test") {
- open $bfh, ">", $file or die "cannot open $file";
- }
- else{
- open $bfh, ">>", $file or die "cannot open $file";
- }
- my (@feature, @cpg_island_relationship);
- foreach my $probeid (keys %{$$test{'MVPS'}}){
- my ($feature, $cpg_island_relationship) = split "\t", $$test{'MVPS'}{$probeid}{'PARAMS'};
- push @feature, $feature;
- push @cpg_island_relationship, $cpg_island_relationship;
- }
- say $bfh join("\t", $flag, "feature", @feature);
- say $bfh join("\t", $flag, "cpg_island_relationship", @cpg_island_relationship);
- }
+
+=head2 save_probe_annotation_stats
+
+ Arg[1] : hashref $overlaps (see get_overlaps)
+ Arg[2] : string $outdir
+ Arg[3] : string $label
+ Arg[4] : string $set_id
+ Returns :
+ Example : save_probe_annotation_stats($overlaps, ".", "Unnamed", "test");
+ Example : save_probe_annotation_stats($overlaps, ".", "Unnamed", 23);
+ Description : Save stats about the probe annotations on a text file. The $set_id is either
+ "test" (which relates to the input probe_ids, the ones to be tested for
+ enrichment) or the random pick number.
+ The file will contain for each set the whole list of gene features and CpG islands
+ relationships for the probes in that set.
+ Exceptions : Dies if file cannot be opened
+
+=cut
+
+sub save_probe_annotation_stats {
+ my ($overlaps, $out_dir, $lab, $flag) = @_;
+
+ my $fh;
+ my $file = "$out_dir/$lab.overlaps.stats.txt";
+ open(STATS, ">>$file") or die "cannot open $file";
+ my (@gene_features, @cpg_island_relationships);
+ foreach my $probeid (keys %{$overlaps->{'MVPS'}}){
+ my ($this_gene_feature, $this_cpg_island_relationship) =
+ split("\t", $overlaps->{'MVPS'}->{$probeid}->{'PARAMS'});
+ push @gene_features, $this_gene_feature;
+ push @cpg_island_relationships, $this_cpg_island_relationship;
+ }
+ say STATS join("\t", $flag, "gene_features", @gene_features);
+ say STATS join("\t", $flag, "cpg_island_relationships", @cpg_island_relationships);
+ close(STATS);
+}
=head2 process_file
View
@@ -94,9 +94,9 @@ =head1 OPTIONS
Give a value as the lower threshold and only MVPs with -log10 pvalues >= to the threshold will be
analysed. Default is no filtering.
-=item B<--bkgrd>
+=item B<--save_stats>
-Output background stats for investigation.
+Output annotation stats for the original and the random picks.
=item B<--reps INT>
@@ -108,7 +108,9 @@ =head1 OPTIONS
eForge will report MVPs removed due to proximity with another MVP in the list and will randomly pick
one of the probes among the set of probes that are in proximity (within 1 kb of each other).
-To turn off proximity filtering specify -noproxy
+At the moment, this is a dummy flag as only one proximity filter is available for each array. It
+will become useful if the database and code support more than one. At the moment to turn off
+proximity filtering, simply specify -noproxy
=item B<--noproxy>
@@ -202,13 +204,13 @@ =head1 ACKNOWLEDGEMENTS
my $min_num_probes = 5; # the minimum number of probes allowed for test. Set to 5 as we have binomial p
-my ($dataset, $filename, $bkgrdstat, $noplot,
+my ($dataset, $filename, $save_probe_annotation_stats, $noplot,
$help, $man, $proxy, $noproxy, $depletion, $filter, $out_dir, $probe_list,
$web, $autoopen);
GetOptions (
'dataset=s' => \$dataset,
- 'bkgrd' => \$bkgrdstat,
+ 'save_stats|bkgrd' => \$save_probe_annotation_stats,
'array|bkgd=s' => \$array,
'label=s' => \$label,
'f=s' => \$filename,
@@ -238,6 +240,8 @@ =head1 ACKNOWLEDGEMENTS
my $ug = new Data::UUID;
$out_dir = $ug->to_hexstring($ug->create());
}
+mkdir $out_dir;
+
# Define the thresholds to use.
if ($thresh) {
@@ -390,8 +394,8 @@ =head1 ACKNOWLEDGEMENTS
my $overlaps = process_overlaps($annotated_probes, $cells, $dataset);
# generate stats on the background selection
-if (defined $bkgrdstat) {
- bkgrdstat($overlaps, $lab, "test");
+if (defined $save_probe_annotation_stats) {
+ save_probe_annotation_stats($overlaps, $out_dir, $lab, "test");
}
@@ -409,21 +413,21 @@ =head1 ACKNOWLEDGEMENTS
# for bkgrd set need to get distribution of counts instead
# make a hash of data -> cell -> bkgrd-Set -> overlap counts
-my %overlaps_per_cell; #this hash is going to store the bkgrd overlaps
+my %overlaps_per_cell; #this hash is going to store the overlaps for the random picks, per cell
# Get the bits for the background sets and process
my $total_num_probes_in_random_picks;
warn "[".scalar(localtime())."] Running the analysis with $num_of_valid_probes MVPs...\n";
-my $num = 0;
+my $count = 0;
foreach my $this_random_pick (@$random_picks) {
- warn "[".scalar(localtime())."] Repetition $num out of ".$reps."\n" if (++$num%100 == 0);
+ warn "[".scalar(localtime())."] Repetition $count out of ".$reps."\n" if (++$count%100 == 0);
$annotated_probes = get_probe_annotations_and_overlap_for_dataset($dbh, $dataset, $array, $this_random_pick);
$total_num_probes_in_random_picks += scalar @$annotated_probes;
unless (scalar @$annotated_probes == $num_of_valid_probes) {
- warn "Random pick #$num only has " . scalar @$annotated_probes . " probes compared to $num_of_valid_probes in the input set.\n";
+ warn "Random pick #$count only has " . scalar @$annotated_probes . " probes compared to $num_of_valid_probes in the input set.\n";
}
my $this_pick_overlaps = process_overlaps($annotated_probes, $cells, $dataset);
@@ -433,8 +437,8 @@ =head1 ACKNOWLEDGEMENTS
push @{$overlaps_per_cell{$cell}}, $this_pick_overlaps->{'CELLS'}->{$cell}->{'COUNT'};
}
- if (defined $bkgrdstat) {
- bkgrdstat($this_pick_overlaps, $lab, $this_random_pick);
+ if (defined $save_probe_annotation_stats) {
+ save_probe_annotation_stats($this_pick_overlaps, $out_dir, $lab, $count);
}
}
@@ -446,8 +450,6 @@ =head1 ACKNOWLEDGEMENTS
#the table to be read into R for plotting.
-mkdir $out_dir;
-
if (!$web) {
open(BACKGROUND, "| gzip -9 > $out_dir/background.tsv.gz") or die "Cannot open background.tsv";
}

0 comments on commit 0db5650

Please sign in to comment.