diff --git a/eForge/eForge.pm b/eForge/eForge.pm index b96273c..8fded20 100644 --- a/eForge/eForge.pm +++ b/eForge/eForge.pm @@ -245,7 +245,7 @@ Processes bitstrings to get a count of overlaps for each cell type. =cut sub process_bits{ - my ($rows, $cells, $data) = @_; + my ($rows, $cells, $data, $weights, $step) = @_; my %test; my @test_cells; my @indexes = 0..(@$cells-1); @@ -263,10 +263,16 @@ sub process_bits{ foreach my $index (@indexes) { ## $bit_string is a string made of 0s and 1s. If it is a 1 for this position, count and push if (substr($bit_string, $index, 1)) { + my $value = 0; $test_cells[$index][0]++; + $value = $test_cells[$index][0] if ($test_cells[$index][0]); + $value = $value*$weights; + $test_cells[$index][0]=$value; push @{$test_cells[$index][1]}, $probeid; } } + $weights=$weights-$step; + #we change weights now as we are going for another probe } my $index = 0; foreach my $cell (@$cells){ diff --git a/eForge/ePlot.pm b/eForge/ePlot.pm index e66c01a..d720412 100644 --- a/eForge/ePlot.pm +++ b/eForge/ePlot.pm @@ -234,12 +234,13 @@ d1\$save('$chart', cdn = F)\n"; system("R", "--no-save", "--quiet", "--slave", "--file=$rfile"); if ($web) { + $web =~ s/\/$//; open(FILE, "$resultsdir/$chart") or die; my @lines = ; close(FILE); open(FILE, ">", "$resultsdir/$chart") or die; foreach my $line (@lines) { - $line =~ s/src='.*\/js/src='\/libraries\/dimple\/js/; + $line =~ s/src='.*\/js/src='$web\/libraries\/dimple\/js/; print FILE $line; } close(FILE); @@ -277,13 +278,14 @@ dt\$save('$chart', cdn = F)\n"; system("R", "--no-save", "--quiet", "--slave", "--file=$rfile"); if ($web) { + $web =~ s/\/$//; open(FILE, "$resultsdir/$chart") or die; my @lines = ; close(FILE); open(FILE, ">", "$resultsdir/$chart") or die; foreach my $line (@lines) { - $line =~ s/href='.*\/css/href='\/libraries\/datatables\/css/; - $line =~ s/src='.*\/js/src='\/libraries\/datatables\/js/; + $line =~ s/href='.*\/css/href='$web\/libraries\/datatables\/css/; + $line =~ s/src='.*\/js/src='$web\/libraries\/datatables\/js/; print FILE $line; } close(FILE); diff --git a/eforge.pl b/eforge.pl index 65bcf15..34d287c 100644 --- a/eforge.pl +++ b/eforge.pl @@ -63,6 +63,14 @@ =head1 OPTIONS Can provide the mvps as probeids in a comma separated list. +=item B + +Weights with which the mvps will be analysed, given as an initial value that decreases by $step (e.g. if $weights= 1.15 and $step=0.01 actual probe weights will be 1.15 for the first probe, 1.14 for the second probe, 1.13 for the third probe, etc.). + +=item B + +Step for decrease in weights with which the mvps will be analysed as (e.g. if $weights= 1.15 and $step=0.01 actual probe weights will be 1.15 for the first probe, 1.14 for the second probe, 1.13 for the third probe, etc.). + =item B Specify the minimum number of MVPs to be allowed. Default is 5 now we are using binomial test. @@ -179,7 +187,7 @@ =head1 CONTACT my $bkgd = '450k'; # Default value my ($data, $peaks, $label, $file, $format, $min_mvps, $bkgrdstat, $noplot, $reps, - $help, $man, $thresh, $proxy, $noproxy, $depletion, $filter, $out_dir, @mvplist, + $help, $man, $thresh, $proxy, $noproxy, $weights, $step, $depletion, $filter, $out_dir, @mvplist, $web, $autoopen); GetOptions ( @@ -197,10 +205,12 @@ =head1 CONTACT 'thresh=s' => \$thresh, 'proxy=s' => \$proxy, 'noproxy' => \$noproxy, + 'weights=s' => \$weights, + 'step=s' => \$step, 'depletion' => \$depletion, 'filter=f' => \$filter, 'out_dir=s' => \$out_dir, - 'web' => \$web, + 'web=s' => \$web, 'autoopen' => \$autoopen, 'help|h|?' => \$help, 'man|m' => \$man, @@ -239,6 +249,16 @@ =head1 CONTACT $label = "$label.depletion"; } +#sets weights to 1 and step to 0 if undefined + +unless (defined $weights) { + $weights = 1; +} + +unless (defined $step) { + $step = 0; +} + #regexp puts underscores where labels before (my $lab = $label) =~ s/\s/_/g; $lab = "$lab.$bkgd.$data"; @@ -360,7 +380,7 @@ =head1 CONTACT my $rows = get_bits(\@mvps, $dbh); # unpack the bitstrings and store the overlaps by cell. -my $test = process_bits($rows, $cells, $data); +my $test = process_bits($rows, $cells, $data, $weights, $step); # generate stats on the background selection if (defined $bkgrdstat) { @@ -425,7 +445,7 @@ =head1 CONTACT unless (scalar @$rows == scalar @foundmvps) { warn "Background " . $bkgrd . " only " . scalar @$rows . " probes out of " . scalar @foundmvps . "\n"; } - my $result = process_bits($rows, $cells, $data); + my $result = process_bits($rows, $cells, $data, $weights, $step); foreach my $cell (keys %{$$result{'CELLS'}}) { push @{$bkgrd{$cell}}, $$result{'CELLS'}{$cell}{'COUNT'}; # accumulate the overlap counts by cell } diff --git a/webserver/INSTALL b/webserver/INSTALL index 82c9651..3ee8e7b 100644 --- a/webserver/INSTALL +++ b/webserver/INSTALL @@ -81,21 +81,27 @@ server by the server itself. 3.c. Copy the webserver files to the right directories - sudo rsync -avPL ~/eFORGE/webserver/cgi-bin/ /var/www/eFORGE/cgi-bin/ - sudo rsync -avPL ~/eFORGE/webserver/bin/ /var/www/eFORGE/bin/ - sudo rsync -avPL ~/eFORGE/webserver/html/ /var/www/eFORGE/html/ - sudo rsync -avPL ~/eFORGE/eforge.db /var/www/eFORGE/bin/ - sudo rsync -avPL ~/eFORGE/mvp_450k_bins /var/www/eFORGE/bin/ - sudo rsync -avPL ~/eFORGE/mvp_27k_bins /var/www/eFORGE/bin/ + sudo rsync -avPL ~/eFORGE/webserver/cgi-bin/ cgi-bin/ + sudo rsync -avPL ~/eFORGE/webserver/bin/ bin/ + sudo rsync -avPL ~/eFORGE/webserver/html/ html/ + sudo rsync -avPL ~/eFORGE/eforge.db bin/ + sudo rsync -avPL ~/eFORGE/mvp_450k_bins bin/ + sudo rsync -avPL ~/eFORGE/mvp_27k_bins bin/ 3.e. Add links to the database files for the Downloads page: - cd /var/www/eFORGE/html/files + cd html/files sudo ln -s ../../bin/eforge.db . sudo ln -s ../../bin/mvp_450k_bins . sudo ln -s ../../bin/mvp_27k_bins . + cd ../.. -3.f. Configure an Apache VirtualHost for eFORGE: +3.f. Configure the Apache server + +You can choose to either run eFORGE on a VirtualHost (like http://eforge.cs.ucl.ac.uk/) or on a +folder (like http://blic.cs.ucl.ac.uk/eFORGE). + +OPTION 1 -- Configure an Apache VirtualHost for eFORGE: cat > /etc/http/conf.d/eforge.conf @@ -121,9 +127,30 @@ server by the server itself. +OPTION 2 -- Configure an Apache VirtualHost for eFORGE: + + cat > /etc/http/conf.d/eforge.conf +ScriptAlias /eFORGE/cgi-bin/ /var/www/eFORGE/cgi-bin/ +Alias "/eFORGE" "/var/www/eFORGE/html" + + + Order allow,deny + Allow from all + + DirectoryIndex index.html cgi-bin/index.pl + + + + AllowOverride None + Options None + Order allow,deny + Allow from all + + + 3.g. Copy the rCharts libraries to the html path: - sudo rsync -avP /usr/lib64/R/library/rCharts/libraries /var/www/eFORGE/html/ + sudo rsync -avP /usr/lib64/R/library/rCharts/libraries html 3.h Re-start Apache diff --git a/webserver/cgi-bin/index.pl b/webserver/cgi-bin/index.pl index 9dbf6ae..2c4e09d 100755 --- a/webserver/cgi-bin/index.pl +++ b/webserver/cgi-bin/index.pl @@ -24,10 +24,24 @@ ################################################################################################## ## -my $WEB_ROOT_OUTDIR = "/files"; +# The location of the HTML pages for this server on the filesystem +my $DOCUMENT_ROOT = "/var/www/eFORGE/html"; + +# The base URL (without the server name) for this server. For instance: +# Running on http://server.org/ -> $WEB_ROOT = "" +# Running on http://server.org/tool/ -> $WEB_ROOT = "/tool" +# IMPORTANT - DO NOT INCLUDE A TRAILING '/' +my $WEB_ROOT = ""; + +# The location of the files w.r.t. the base URL (DO NOT CHANGE) +my $WEB_OUTDIR = "/files"; +# The location of the log DB w.r.t. the cgi-bin dir (DO NOT CHANGE) my $LOG_FILE = "../log/server_log.db"; +# The location of the bin dir w.r.t. the cgi-bin dir (DO NOT CHANGE) my $BIN_DIR = "../bin"; +# The name of the input data file my $INPUT_DATAFILE = "input.txt"; +# The name of the output data file my $STDOUT_FILE = "output.txt"; my $colour="bright-blue"; @@ -42,13 +56,13 @@ ]; my $left_menu = [ - {"__logo__" => "/img/logo.jpg"}, + {"__logo__" => "$WEB_ROOT/img/logo.jpg"}, {"__title__" => "eFORGE"}, - {"Start" => "/"}, - {"Help" => "/?help"}, - {"Documentation" => "/?documentation"}, - {"Download" => "/?download"}, - {"About" => "/?about"}, + {"Start" => "$WEB_ROOT/"}, + {"Help" => "$WEB_ROOT/?help"}, + {"Documentation" => "$WEB_ROOT/?documentation"}, + {"Download" => "$WEB_ROOT/?download"}, + {"About" => "$WEB_ROOT/?about"}, {"__title__" => "UCL Cancer Institute"}, {"Home" => "http://www.ucl.ac.uk/cancer/"}, {"Medical Genomics" => "http://www.ucl.ac.uk/cancer/medical-genomics/medgenhome"}, @@ -90,45 +104,65 @@ exit(0); -=head2 get_web_outdir +=head2 get_outdir Arg[1] : -none- - Example : my $web_outdir = get_web_outdir(); - Description : Gets the location of the output directory w.r.t. web server document root, i.e. - the WEB_OUTDIR portion in http://mytool.cs.ucl.ac.uk/WEB_OUTDIR. + Example : my $outdir = get_web_outdir(); + Description : Gets the name of the output directory. If the directory does not exist yet, it will create one using a UUID. - Returns : string $web_outdir + Returns : string $outdir Exceptions : dies if it fails to create the new directory when needed. =cut -sub get_web_outdir { +sub get_outdir { if (!$_OUT_DIR) { my $ug = new Data::UUID; - $_OUT_DIR = $WEB_ROOT_OUTDIR."/".$ug->to_hexstring($ug->create()); - mkdir($ENV{'DOCUMENT_ROOT'}.$_OUT_DIR) or die "Cannot create output directory for the run"; + $_OUT_DIR = $WEB_OUTDIR."/".$ug->to_hexstring($ug->create()); + mkdir($DOCUMENT_ROOT.$_OUT_DIR) or die "Cannot create output directory ($DOCUMENT_ROOT$_OUT_DIR) for the run"; } return $_OUT_DIR; } +=head2 get_web_outdir + + Arg[1] : -none- + Example : my $web_outdir = get_web_outdir(); + Description : Gets the location of the output directory w.r.t. web server document root, i.e. + the WEB_OUTDIR portion in http://mytool.cs.ucl.ac.uk/WEB_OUTDIR. + This method calls get_outdir() which creates the directory if needed. + Returns : string $web_outdir + Exceptions : None + +=cut + +sub get_web_outdir { + my $outdir = get_outdir(); + + return $WEB_ROOT.$outdir; +} + + =head2 get_absolute_outdir Arg[1] : -none- Example : my $absolute_outdir = get_absolute_outdir(); Description : Gets the location of the output directory in the file system. For instance, it will be something like /var/www/htdocs/files/0xG4214242AD2EEC1CC56354/ + This method calls get_outdir() which creates the directory if needed. Returns : string $absolute_outdir Exceptions : None =cut sub get_absolute_outdir { - my $web_outdir = get_web_outdir(); + my $outdir = get_outdir(); - return $ENV{'DOCUMENT_ROOT'}.$web_outdir; + return $DOCUMENT_ROOT.$outdir; } + =head2 get_absolute_root_outdir Arg[1] : -none- @@ -141,9 +175,7 @@ =head2 get_absolute_root_outdir =cut sub get_absolute_root_outdir { - my $web_outdir = get_web_outdir(); - - return $ENV{'DOCUMENT_ROOT'}.$WEB_ROOT_OUTDIR; + return $DOCUMENT_ROOT.$WEB_OUTDIR; } =head2 print_form @@ -252,7 +284,7 @@ =head2 validate_form sub validate_form { my $data; - my $validated_args = ["--web"]; + my $validated_args = ["--web", ($WEB_ROOT or "/")]; my @error_messages; my $input_data; @@ -914,7 +946,7 @@ sub print_documentation_page { "In the initial implementation, the functional elements considered are DNase I hotspots from either the ENCODE or Roadmap Epigenomics projects generated by the Hotspot method. The hotspots are regions of general DNase I sensitivity (rather than peaks which are more similar to DNase hypersensitive sites).
", "For each set of test DMPs, an overlap analysis is performed against the functional elements from either data source for each cell sample separately (125 samples for ENCODE, 299 for Roadmap), and the number of overlaps is counted. A background distribution of the expected overlap counts for this DMP set is obtained by picking sets of the same number of DMPs as the test DMP set, matched for gene relationship and CpG island relationship annotation. The matched background sets are then overlapped with the functional elements and the background distribution of overlaps determined. By default 1000 matched sets are used. The enrichment value for the test DMP set is expressed as the -log10 (binomial p value). Enrichments outside the nominal 99th and 99.9th percentile of the binomial distribution (i.e. -log10 (binomial p values) of >=2.68 and >= 3.38, respectively) are considered significant. A schematic of the analysis is shown below.
", "eFORGE Analysis Strategy
", - "", + "", "The results are presented by cell sample in either graphic (interactive Dimple chart or static pdf) or tabular (interactive DataTables table or tab separated file) forms. Typical results may show an enrichment of overlap (red or pink points) for the EWAS DMP set in a tissue of mechanistic relevance to the phenotype under analysis, for instance blood cell subtypes for Rheumatoid Arthritis DMPs.
", "Alternatively there may be no enrichment and all points will be blue below the -log10 (binomial p value) thresholds. This could be because there is no regulatory component underlying the EWAS association, or because the relevant tissue is not present in the available functional element datasets, or for other technical reasons (e.g. too few overlaps).
", "A list of probes from a tDMR study by Jaffe and Irizarry (Genome Biology, 2014) is available as default data in the web tool and more example datasets are being considered for a catalogue of eFORGE analysis results.
", @@ -938,7 +970,7 @@ sub print_documentation_page { "Estimating False Positive Rates by DMP count in the Test DMP Set

To estimate false positive rates, 1000 randomly chosen DMP sets for each of a series of DMP counts between 10 and 300 DMPs were analysed using eFORGE on the Roadmap and ENCODE data. The false positive rate was calculated as the number of cell enrichments greater than the two standard thresholds used by eFORGE expressed as the proportion of the total number of cell overlap tests performed.
", - "False Positive Rate by DMP set Count

This plot suggests that for a DMP set of >= 20, a threshold of -log10 (binomial p value) >= 3.38 (equivalent to 0.001 in corrected p value) maintains the false positive rate below around 0.0025 (0.25%)." + "False Positive Rate by DMP set Count

This plot suggests that for a DMP set of >= 20, a threshold of -log10 (binomial p value) >= 3.38 (equivalent to 0.001 in corrected p value) maintains the false positive rate below around 0.0025 (0.25%)." ); print Template::end; @@ -963,9 +995,10 @@ sub print_download_page { print Template::content_box("Download", "The code is available on GitHub: https://github.com/charlesbreeze/eFORGE", - "You also need to download the eforge.db, - mvp_450k_bins and - mvp_27k_bins files."); + "Additional files you will require: + "); print Template::content_box("License", "eforge.pl Functional analysis of EWAS DMPs

@@ -1030,9 +1063,9 @@ sub print_about_page { my $absolute_root_outdir = get_absolute_root_outdir(); refresh_usage_stats($absolute_root_outdir); print Template::content_box("Usage", - "", - "", - "", + "", + "", + "", "Disk usage: ".qx"cat $absolute_root_outdir/du.txt"." in ". qx"cat $absolute_root_outdir/num.txt"." folders", "You are accessing this server from ".$q->remote_addr(),