Permalink
Browse files

Fixed R environment setup. Added plot_fdist.pl to support_files. Gene…

…ral bug fixes/improvements
  • Loading branch information...
1 parent 42c652a commit b8188a961676198dee5684b32ea32b0aabad62b3 @gawbul committed Mar 30, 2012
View
@@ -1,4 +1,4 @@
-data = read.csv('/Users/stevemoss/Dropbox/PhD/Research/Development/gcat/data/ciona_intestinalis/grepeats.csv')
+data = read.csv('/Users/stevemoss/Dropbox/PhD/Research/Development/gcat/data/grepeats_raw_all.csv')
attach(data)
names(data)
rownames(data) <- data[,1]
@@ -7,23 +7,11 @@ data[,1] <- NULL
data
t(data)
-cols <- as.character(sample(colours(), 1, replace=F))
-barplot(as.matrix(data), beside=F, names.arg=, col=heat.colors(9), space=0.1, cex=0.8, cex.axis=0.8, las=1, ylab="Total")
-legend("topleft", legend=saccharomyces_cerevisiae.desc, inset=0.1, cex=0.8, fill=heat.colors(9))
+cols <- as.character(sample(colors()[grep("dark|deep|medium",colors())], 1, replace=F))
-freqs <- read.csv('/Users/stevemoss/Dropbox/PhD/Research/Development/gcat/data/grepeats_freqs_all.csv', header=T)
-freqs
-freqs(is.na(freqs)) <- NULL
-attach(freqs)
-names(freqs)
+colors()[grep("dark|deep|medium",colors())]
-max(na.omit(ciona_intestinalis.size))
-test <- as.character(sample(topo.colors(512), 5, replace=F))
+barplot(as.matrix(data), beside=F, names.arg=, col=colors()[grep("dark|deep|medium",colors())], space=0.1, cex=0.8, cex.axis=0.8, las=1, ylab="Total")
+
+legend("topleft", legend=ciona_intestinalis.desc, inset=0.1, cex=0.8, fill=colors()[grep("dark|deep|medium",colors())])
-?topo.colors
-cols <- c("red", "green", "blue")
-cols
-summary(cols)
-test
-class(test)
-summary(test)
View
@@ -7,17 +7,23 @@
test one two three four five
test six seven eight nine ten
-# This is a list of all (56) species names from the core database (release 65)
-# ailuropoda_melanoleuca anolis_carolinensis bos_taurus caenorhabditis_elegans callithrix_jacchus canis_familiaris cavia_porcellus choloepus_hoffmanni ciona_intestinalis ciona_savignyi danio_rerio dasypus_novemcinctus dipodomys_ordii drosophila_melanogaster echinops_telfairi equus_caballus erinaceus_europaeus felis_catus gadus_morhua gallus_gallus gasterosteus_aculeatus gorilla_gorilla homo_sapiens loxodonta_africana macaca_mulatta macropus_eugenii meleagris_gallopavo microcebus_murinus monodelphis_domestica mus_musculus myotis_lucifugus nomascus_leucogenys ochotona_princeps ornithorhynchus_anatinus oryctolagus_cuniculus oryzias_latipes otolemur_garnettii pan_troglodytes petromyzon_marinus pongo_abelii procavia_capensis pteropus_vampyrus rattus_norvegicus saccharomyces_cerevisiae sarcophilus_harrisii sorex_araneus spermophilus_tridecemlineatus sus_scrofa taeniopygia_guttata takifugu_rubripes tarsius_syrichta tetraodon_nigroviridis tupaia_belangeri tursiops_truncatus vicugna_pacos xenopus_tropicalis
+# This is a list of all (57) species names from the core database (release 66)
+# ailuropoda_melanoleuca anolis_carolinensis bos_taurus caenorhabditis_elegans callithrix_jacchus canis_familiaris cavia_porcellus choloepus_hoffmanni ciona_intestinalis ciona_savignyi danio_rerio dasypus_novemcinctus dipodomys_ordii drosophila_melanogaster echinops_telfairi equus_caballus erinaceus_europaeus felis_catus gadus_morhua gallus_gallus gasterosteus_aculeatus gorilla_gorilla homo_sapiens latimeria_chalumnae loxodonta_africana macaca_mulatta macropus_eugenii meleagris_gallopavo microcebus_murinus monodelphis_domestica mus_musculus myotis_lucifugus nomascus_leucogenys ochotona_princeps ornithorhynchus_anatinus oryctolagus_cuniculus oryzias_latipes otolemur_garnettii pan_troglodytes petromyzon_marinus pongo_abelii procavia_capensis pteropus_vampyrus rattus_norvegicus saccharomyces_cerevisiae sarcophilus_harrisii sorex_araneus spermophilus_tridecemlineatus sus_scrofa taeniopygia_guttata takifugu_rubripes tarsius_syrichta tetraodon_nigroviridis tupaia_belangeri tursiops_truncatus vicugna_pacos xenopus_tropicalis
-# get exons from ensembl
+# get teleost exons from ensembl
get_exons danio_rerio gasterosteus_aculeatus oryzias_latipes takifugu_rubripes tetraodon_nigroviridis
-# get introns from ensembl
+# get teleost introns from ensembl
get_introns danio_rerio gasterosteus_aculeatus oryzias_latipes takifugu_rubripes tetraodon_nigroviridis #test on line comment
-# do basic statistics for exons
+# do basic statistics for teleost exons
basic_stats exons danio_rerio gasterosteus_aculeatus oryzias_latipes takifugu_rubripes tetraodon_nigroviridis
-# do basic statistics for introns
-basic_stats introns danio_rerio gasterosteus_aculeatus oryzias_latipes takifugu_rubripes tetraodon_nigroviridis
+# do basic statistics for teleost introns
+basic_stats introns danio_rerio gasterosteus_aculeatus oryzias_latipes takifugu_rubripes tetraodon_nigroviridis
+
+# do advanced statistics for teleost exons
+advanced_stats exons danio_rerio gasterosteus_aculeatus oryzias_latipes takifugu_rubripes tetraodon_nigroviridis
+
+# do advanced statistics for teleost introns
+advanced_stats introns danio_rerio gasterosteus_aculeatus oryzias_latipes takifugu_rubripes tetraodon_nigroviridis
@@ -123,9 +123,6 @@ =head1 DESCRIPTION
$freqs_filename = File::Spec->catfile($path , $freqsfile);
}
-# build raw distribution plot
-&GCAT::Visualisation::R::plot_Raw_Dist($raw_filename, $feature, @{$organisms});
-
# build frequency distribution plot
&GCAT::Visualisation::R::plot_Frequency_Dist($freqs_filename, $feature, @{$organisms});
@@ -22,11 +22,65 @@ use warnings;
use strict;
# some imports
+use GCAT::Interface::Logging qw(logger);
+use Statistics::R;
use File::Spec;
use Cwd;
-use Text::CSV_XS;
-use GCAT::Data::Output;
-use Statistics::R;
+
+# export the subroutines
+require Exporter;
+our @ISA = qw(Exporter);
+our @EXPORT_OK = qw(check_R_Environ map_Chars_to_Tree);
+
+# check the R environment is setup for GCAT
+sub check_R_Environ {
+ # setup variables
+ my @pkg_list = ("ape", "geiger", "gdata");
+ my $mirror = "http://star-www.st-andrews.ac.uk/cran/";
+ my $status = 0;
+
+ # setup new R object
+ my $R = Statistics::R->new();
+
+ # start R session
+ $R->startR;
+
+ # define is.installed function
+ $R->send("is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])");
+
+ # let user know what we're doing
+ print "Checking packages (@pkg_list) are installed...\n";
+
+ # check if packages are installed
+ foreach my $pkg (@pkg_list) {
+ $R->send("is.installed(\"" . $pkg . "\")");
+ my $ret = $R->read();
+ if ($ret eq "[1] TRUE") {
+ $R->send("library(" . $pkg . ")");
+ $status = 1;
+ }
+ elsif ($ret eq "[1] FALSE") {
+ # $R->send("Sys.setenv(http_proxy=\'http://slb-webcache.hull.ac.uk:3128\')"); # change this to your proxy server details and uncomment if using a proxy
+ $R->send("options(repos=structure(c(CRAN=\"" . $mirror . "\")))"); # change this to your preferred mirror
+ $R->send("install.packages(\"" . $pkg . "\", dependencies=T)");
+ $R->send("library(" . $pkg . ")");
+ $status = 1;
+ }
+ else {
+ logger("An unknown error occurred while testing if \"$pkg\" exists!\n", "Error");
+ $status = 0;
+ }
+ }
+
+ # stop R session
+ $R->stopR;
+
+ # let user know we're done
+ print "Finished!\n";
+
+ return $status;
+}
+
# map character matrix to phylogenetic tree
sub map_Chars_to_Tree {
@@ -22,83 +22,35 @@ use warnings;
use strict;
# import some required modules
+use GCAT::Statistics::R qw(check_R_Environ);
use Statistics::R;
use Cwd;
+# export the subroutines
+require Exporter;
+our @ISA = qw(Exporter);
+our @EXPORT_OK = qw(plot_NFs plot_Stacked_Barplot plot_Frequency_Dist);
+
# use this routine to plot nucleotide frequencies
sub plot_NFs {
- return;
-}
-
-
-# plot basic stats output
-sub plot_Raw_Dist {
- # get arguments
- my ($infile, $feature, @organisms) = @_;
-
- # setup out file
- my $outfile = substr($infile, 0, -4) . ".pdf";
-
- # setup new R object
- my $R = Statistics::R->new();
-
- # start R
- $R->startR();
- # define return variable
- my $ret = undef;
-
- #################
- # main R script #
- #################
-
- # load file
- $R->send('raw <- read.csv("' . $infile . '", header=TRUE)');
- $R->send('attach(raw)');
- $R->send('names(raw)');
-
- # work out maximum sizes
- $R->send('y_max <- 0');
-
- # traverse organisms
- foreach my $org (@organisms) {
- $R->send('if (max(na.omit(' . $org . '.sizes' . ')) > y_max) y_max <- max(na.omit(' . $org . '.sizes))');
- }
-
- $R->send('y_max');
-
- # setup PDF graphics device
- $R->send('pdf(file="' . $outfile . '", width=12, height=12)');
-
- # build colours
- $R->send('cols <- as.character(sample(rainbow(128),' . scalar(@organisms) . ', replace=F))');
-
- # plot frequency distribution
- $R->send('plot(0, 0, type="l", xlim=c(0,x_max), xlab="' . substr(ucfirst($feature), 0, -1) . ' Size (bps)", ylim=c(1,y_max), log="y", ylab="Log Frequency", main="Raw ' . substr(ucfirst($feature), 0, -1) . ' Size in ' . scalar(@organisms) .' Organism(s)")');
- my $count = 1;
- foreach my $org (@organisms) {
- $R->send('lines(' . $org . '.size, ' . $org . '.freqs, type = "l", col=cols[' . $count . '])');
- $count++;
- }
-
- # add legend
- $R->send('legend("topright", inset=.05, c("' . join("\",\"", @organisms) . '"), lty=1, col=cols)');
+}
- # turn graphics device off, to allow writing to PDF file
- $R->send('dev.off()');
-
- # stop R
- $R->stopR();
+# plot stacked bar plot for repeats etc
+sub plot_Stacked_Barplot {
- # tell user what we've done
- print "Outputted raw distribution plot to $outfile\n";
}
# plot frequency distribution
sub plot_Frequency_Dist {
# get arguments
my ($infile, $feature, @organisms) = @_;
+ # check our R environment
+ unless (!&check_R_Environ) {
+
+ }
+
# setup out file
my $outfile = substr($infile, 0, -4) . ".pdf";
View
@@ -0,0 +1,41 @@
+# This is a helper script to build a frequency distribution plot for a number of species
+# Coded by Steve Moss
+# gawbul@gmail.com
+# http://stevemoss.ath.cx
+
+# make life simple
+use warnings;
+use strict;
+
+# import modules
+use GCAT::Visualisation::R;
+use GCAT::Data::Output qw(concatenate_CSV);
+
+# setup variables
+my @orgs = ("homo_sapiens", "pan_troglodytes", "gorilla_gorilla", "pongo_abelii", "nomascus_leucogenys");
+my $feature = "grepeats";
+my $organisms = \@orgs;
+my ($raw_filename, $freqs_filename) = undef;
+
+# check if we have more than one organism
+if (scalar(@{$organisms}) > 1) {
+ # concatenate the raw CSVs
+ $raw_filename = &concatenate_CSV($feature, "raw", @{$organisms});
+
+ # concatenate the freqs CSVs
+ $freqs_filename = &concatenate_CSV($feature, "freqs", @{$organisms});
+}
+else {
+ # build path
+ my $dir = getcwd();
+ my $path = File::Spec->catfile($dir , "data", @{$organisms}[0]);
+
+ # set filenames
+ my $rawfile = "$feature\_raw.csv";
+ my $freqsfile = "$feature\_freqs.csv";
+ $raw_filename = File::Spec->catfile($path , $rawfile);
+ $freqs_filename = File::Spec->catfile($path , $freqsfile);
+}
+
+# build frequency distribution plot
+&GCAT::Visualisation::R::plot_Frequency_Dist($freqs_filename, $feature, @{$organisms});
@@ -10,6 +10,10 @@
# import modules
use Statistics::R;
+# setup variables
+my @pkg_list = ("ape", "geiger", "gdata");
+my $mirror = "http://star-www.st-andrews.ac.uk/cran/";
+
# setup new R object
my $R = Statistics::R->new();
@@ -19,21 +23,30 @@
# define is.installed function
$R->send("is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])");
-# check if gdata is installed
-$R->send("is.installed(\'gdata\')");
-my $ret = $R->read();
-if ($ret eq "[1] TRUE") {
- $R->send("library(gdata)");
-}
-elsif ($ret eq "[1] FALSE") {
-# $R->send("Sys.setenv(http_proxy=\'http://slb-webcache.hull.ac.uk:3128\')"); # you should change this to your proxy server details if working behind a proxy
- $R->send("options(repos=structure(c(CRAN=\'http://star-www.st-andrews.ac.uk/cran/\')))"); # you should change this to your preferred mirror
- $R->send("install.packages(\'gdata\', dependencies=T)");
- $R->send("library(gdata)");
-}
-else {
- print "An unknown error occurred while testing if a package exists!\n";
+# let user know what we're doing
+print "Checking packages (@pkg_list) are installed...\n";
+
+# check if packages are installed
+foreach my $pkg (@pkg_list) {
+ $R->send("is.installed(\"" . $pkg . "\")");
+ my $ret = $R->read();
+ if ($ret eq "[1] TRUE") {
+ $R->send("library(" . $pkg . ")");
+ }
+ elsif ($ret eq "[1] FALSE") {
+ # $R->send("Sys.setenv(http_proxy=\'http://slb-webcache.hull.ac.uk:3128\')"); # change this to your proxy server details and uncomment if using a proxy
+ $R->send("options(repos=structure(c(CRAN=\"" . $mirror . "\")))"); # change this to your preferred mirror
+ $R->send("install.packages(\"" . $pkg . "\", dependencies=T)");
+ $R->send("library(" . $pkg . ")");
+ }
+ else {
+ logger("An unknown error occurred while testing if \"$pkg\" exists!\n", "Error");
+ exit;
+ }
}
# end R session
$R->stopR;
+
+# let user know we're done
+print "Finished!\n";

0 comments on commit b8188a9

Please sign in to comment.