Permalink
Browse files

Use compressed files to save space on the server

  • Loading branch information...
1 parent 65b8af4 commit fa557f9863c3b6aa3fd0bcb5614b6c1b616e0fb3 @jherrero jherrero committed Apr 18, 2016
Showing with 33 additions and 20 deletions.
  1. +22 −9 eforge.pl
  2. +11 −11 webserver/cgi-bin/index.pl
View
@@ -366,7 +366,14 @@ =head1 ACKNOWLEDGEMENTS
warn "You have specified p value filtering, but this isn't implemented for files of format $format. No filtering will happen."
}
}
- open my $fh, "<", $file or die "cannot open file $file : $!";
+ my $fh;
+ if ($file =~ /\.gz$/) {
+ open($fh, "gunzip -c $file |") or die "cannot open file $file : $!";
+ } elsif ($file =~ /\.bz2$/) {
+ open($fh, "bunzip2 -c $file |") or die "cannot open file $file : $!";
+ } else {
+ open($fh, "<$file") or die "cannot open file $file : $!";
+ }
$mvps = process_file($fh, $format, $dbh, $bkgd, $filter);
} elsif (@mvplist) {
@@ -508,7 +515,9 @@ =head1 ACKNOWLEDGEMENTS
mkdir $out_dir;
-open my $bfh, ">", "$out_dir/background.tsv" or die "Cannot open background.tsv";
+if (!$web) {
+ open(BACKGROUND, "| gzip -9 > $out_dir/background.tsv.gz") or die "Cannot open background.tsv";
+}
@@ -519,7 +528,9 @@ =head1 ACKNOWLEDGEMENTS
# above line sorts by the tissues alphabetically (from $tissues hash values)
# ultimately want a data frame of names(results)<-c("Zscore", "Cell", "Tissue", "File", "MVPs")
- say $bfh join("\t", @{$bkgrd{$cell}});
+ if (!$web) {
+ print BACKGROUND join("\t", @{$bkgrd{$cell}}), "\n";
+ }
my $teststat = ($$test{'CELLS'}{$cell}{'COUNT'} or 0); #number of overlaps for the test MVPs
# binomial pvalue, probability of success is derived from the background overlaps over the tests for this cell
@@ -560,20 +571,22 @@ =head1 ACKNOWLEDGEMENTS
push(@results, [$zscore, $pbinom, $shortcell, $$tissues{$cell}{'tissue'}, $$tissues{$cell}{'datatype'}, $$tissues{$cell}{'file'}, $mvp_string, $$tissues{$cell}{'acc'}]);
}
-close($bfh);
+if (!$web) {
+ close(BACKGROUND);
+}
# Correct the p-values for multiple testing using the Benjamini-Yekutieli FDR control method
my $qvalues = BY(\@pvalues);
$qvalues = [map {sprintf("%.2e", $_)} @$qvalues];
# Write the results to a tab-separated file
-my $filename = "$lab.chart.tsv";
-open my $ofh, ">", "$out_dir/$filename" or die "Cannot open $out_dir/$filename: $!";
-print $ofh join("\t", "Zscore", "Pvalue", "Cell", "Tissue", "Datatype", "File", "Probe", "Accession", "Qvalue"), "\n";
+my $filename = "$lab.chart.tsv.gz";
+open(TSV, "| gzip -9 > $out_dir/$filename") or die "Cannot open $out_dir/$filename: $!";
+print TSV join("\t", "Zscore", "Pvalue", "Cell", "Tissue", "Datatype", "File", "Probe", "Accession", "Qvalue"), "\n";
for (my $i = 0; $i < @results; $i++) {
- print $ofh join("\t", @{$results[$i]}, $qvalues->[$i]), "\n";
+ print TSV join("\t", @{$results[$i]}, $qvalues->[$i]), "\n";
}
-close($ofh);
+close(TSV);
warn "[".scalar(localtime())."] Generating plots...\n";
View
@@ -40,7 +40,7 @@
# The location of the bin dir w.r.t. the cgi-bin dir (DO NOT CHANGE)
my $BIN_DIR = "../bin";
# The name of the input data file
-my $INPUT_DATAFILE = "input.txt";
+my $INPUT_DATAFILE = "input.txt.gz";
# The name of the output data file
my $STDOUT_FILE = "output.txt";
@@ -432,7 +432,7 @@ sub validate_form {
## It seems like all the options are valid, so we can now store the input data in the output
## directory
my $absolute_outdir = get_absolute_outdir();
- open(INPUT, ">$absolute_outdir/$INPUT_DATAFILE") or
+ open(INPUT, "| gzip -9 > $absolute_outdir/$INPUT_DATAFILE") or
die "Cannot open $absolute_outdir/$INPUT_DATAFILE";
foreach my $this_line (@lines) {
print INPUT $this_line, "\n";
@@ -562,19 +562,19 @@ sub print_result {
my $web_outdir = get_web_outdir();
opendir(DIR, $absolute_outdir);
- my @files = grep {/(.pdf|.html|.tsv|.R)$/} readdir(DIR);
+ my @files = grep {/(.pdf|.html|.tsv|.R|.gz|.bz2)$/} readdir(DIR);
closedir(DIR);
- my $table_file = (grep {/.table.html$/} @files)[0];
- my $table_R = (grep {/.table.R$/i} @files)[0];
- my $dchart_file = (grep {/.dchart.html$/} @files)[0];
- my $dchart_R = (grep {/.dchart.R$/i} @files)[0];
- my $tsv_file = (grep {/.chart.tsv$/} @files)[0];
- my $pdf_file = (grep {/.chart.pdf$/} @files)[0];
+ my $table_file = (grep {/\.table\.html$/} @files)[0];
+ my $table_R = (grep {/\.table\.R$/i} @files)[0];
+ my $dchart_file = (grep {/\.dchart\.html$/} @files)[0];
+ my $dchart_R = (grep {/\.dchart\.R$/i} @files)[0];
+ my $tsv_file = (grep {/\.chart\.tsv(\.gz|\.bz2)?$/} @files)[0];
+ my $pdf_file = (grep {/\.chart\.pdf$/} @files)[0];
my $pdf_R = (grep {/.chart.R$/i} @files)[0];
print $fh Template::content_box_1("Results",
- "<a href=\"$web_outdir/$INPUT_DATAFILE\">Input data (txt)</a>",
- "<a href=\"$web_outdir/$tsv_file\">Raw data (tsv)</a>",
+ "<a href=\"$web_outdir/$INPUT_DATAFILE\">Input data (txt.gz)</a>",
+ "<a href=\"$web_outdir/$tsv_file\">Raw data (tsv.gz)</a>",
"<a href=\"$web_outdir/$pdf_file\">Static chart (PDF)</a>",
"<a href=\"$web_outdir/$dchart_file\">Interactive chart (HTML)</a>",
"<a href=\"$web_outdir/$table_file\">Interactive table (HTML)</a>",

0 comments on commit fa557f9

Please sign in to comment.