adding scMT vignette and subsetSamples

bioFAM · Apr 9, 2018 · 0fdc117 · 0fdc117
1 parent 146c82c
commit 0fdc117
Show file tree

Hide file tree

Showing 34 changed files with 498 additions and 115 deletions.
diff --git a/MOFAtools/DESCRIPTION b/MOFAtools/DESCRIPTION
@@ -14,7 +14,7 @@ License: GPL (>= 2)
 Description: A collection of tools for running and analysing MOFA models.
 Encoding: UTF-8
 Depends: R (>= 3.4)
-Imports: rhdf5, dplyr, tidyr, reshape2, pheatmap, corrplot, ggplot2, ggbeeswarm, methods, scales, GGally, doParallel, RColorBrewer, cowplot, ggrepel, foreach, MultiAssayExperiment
+Imports: rhdf5, dplyr, tidyr, reshape2, pheatmap, corrplot, ggplot2, ggbeeswarm, methods, scales, GGally, RColorBrewer, cowplot, ggrepel, MultiAssayExperiment, Biobase, doParallel, foreach
 Suggests: knitr, pcaMethods
 biocViews:
 VignetteBuilder: knitr

diff --git a/MOFAtools/NAMESPACE b/MOFAtools/NAMESPACE
@@ -58,6 +58,7 @@ export(qualityControl)
 export(runMOFA)
 export(sampleNames)
 export(subsetFactors)
+export(subsetSamples)
 export(trainCurveELBO)
 export(trainCurveFactors)
 export(viewNames)
@@ -79,6 +80,7 @@ exportMethods(featureNames)
 exportMethods(sampleNames)
 exportMethods(viewNames)
 import(MultiAssayExperiment)
+import(RColorBrewer)
 import(doParallel)
 import(dplyr)
 import(foreach)
@@ -90,6 +92,7 @@ import(methods)
 import(pheatmap)
 import(reshape2)
 import(scales)
+importFrom(Biobase,phenoData)
 importFrom(corrplot,corrplot)
 importFrom(cowplot,plot_grid)
 importFrom(grDevices,colorRampPalette)

diff --git a/MOFAtools/R/getMethods.R b/MOFAtools/R/getMethods.R
@@ -179,9 +179,16 @@ getImputedData <- function(object, views = "all", features = "all", as.data.fram
 
   # Sanity checks
   if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel")
+  if (length(object@ImputedData)==0) {
+    stop("No imputed data found. Please run imputeMissing(MOFAobject) first")
+  }
 
   # Get views
-  if (paste0(views,collapse="") == "all") { views <- viewNames(object) } else { stopifnot(all(views %in% viewNames(object))) }
+  if (paste0(views,collapse="") == "all") { 
+    views <- viewNames(object) 
+  } else { 
+    stopifnot(all(views %in% viewNames(object))) 
+  }
 
   # Get features
   if (class(features)=="list") {
@@ -194,7 +201,7 @@ getImputedData <- function(object, views = "all", features = "all", as.data.fram
     }
   }
 
-  # Fetch data
+  # Fetch imputed data
   ImputedData <- object@ImputedData[views]
   ImputedData <- lapply(1:length(ImputedData), function(m) ImputedData[[m]][features[[m]],,drop=F]); names(ImputedData) <- views
 
@@ -203,9 +210,10 @@ getImputedData <- function(object, views = "all", features = "all", as.data.fram
     tmp <- lapply(views, function(m) { tmp <- reshape2::melt(ImputedData[[m]]); colnames(tmp) <- c("feature","sample","value"); tmp <- cbind(view=m,tmp); return(tmp) })
     ImputedData <- do.call(rbind,tmp)
     ImputedData[,c("view","feature","sample")] <- sapply(ImputedData[,c("view","feature","sample")], as.character)
-  } else if ((length(views)==1) && (as.data.frame==F)) {
-    ImputedData <- ImputedData[[views]]
-  }
+  } 
+  # else if ((length(views)==1) && (as.data.frame==F)) {
+  #   ImputedData <- ImputedData[[views]]
+  # }
 
   return(ImputedData)
 }
@@ -215,20 +223,51 @@ getImputedData <- function(object, views = "all", features = "all", as.data.fram
 #' @description This function extracts covariates from the \code{colData} in the input \code{MultiAssayExperiment} object. \cr
 #' Note that if you did not use \code{MultiAssayExperiment} to create your \code{\link{createMOFAobject}}, this function will not work.
 #' @param object a \code{\link{MOFAmodel}} object.
-#' @param covariates names of the covariates
+#' @param covariate names of the covariate
+#' @import MultiAssayExperiment
+#' @importFrom Biobase phenoData
 #' @export
-#' 
-getCovariates <- function(object, covariates) {
+getCovariates <- function(object, covariate) {
 
   # Sanity checks
   if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel")
-  if(class(object@InputData) != "MultiAssayExperiment") stop("To work with covariates, InputData has to be specified in form of a MultiAssayExperiment")  
-  stopifnot(all(covariates %in% colnames(colData(object@InputData))))
+  if(class(object@InputData) != "MultiAssayExperiment") {
+    stop("To work with covariates, InputData has to be specified in form of a MultiAssayExperiment")
+  }
 
-  # Get covariates
-  covariates <- colData(object@InputData)[,covariates]
+  # Check that samples from MOFAobject and MultiAssayExperiment are consistent.
+  samples <- sampleNames(object)
+  if (!all(samples %in% rownames(colData(object@InputData)))) {
+    stop("There are samples in the model which are not detected in the MultiAssayExperiment object")
+  } else {
+    mae <- object@InputData[,samples]
+  }
+
+  # Extract the covariate from colData or in the phenoData of specific assays
+  out <- list()
+  if (covariate %in% colnames(colData(mae))) { 
+    covariate_in_coldata <- TRUE
+    out[[covariate]] <- colData(mae)[,covariate]
+    names(out[[covariate]]) <- sampleNames(object)
+  } else {
+    for (view in viewNames(object)) {
+      phenodata <- phenoData(mae@ExperimentList[[view]])
+      if (covariate %in% colnames(phenodata)) {
+        out[[paste(view,covariate,sep="_")]] <- phenodata[[covariate]]
+        names(out[[paste(view,covariate,sep="_")]]) <- rownames(phenodata)
+      }
+    }
+  }
+
+  # Final sanity check
+  if (length(out) == 0) {
+      stop("Covariate not found in the MultiAssayExperiment object")    
+  } else if (length(out)>1) {
+      stop("Covariate ambiguously found in the phenoData of multiple assays. Please, extract it manually.")    
+  } else {
+    return(out[[1]])
+  }
 
-  return(covariates)
 }
 
 #' @title getExpectations

diff --git a/MOFAtools/R/plotData.R b/MOFAtools/R/plotData.R
@@ -30,7 +30,7 @@
 #' @import pheatmap
 #' @examples
 #' # Load example of MOFA model
-#' model <- loadModel(system.file("extdata", "model15.hdf5", package = "MOFAtools"))
+#' model <- loadModel(system.file("extdata", "CLL_model.hdf5", package = "MOFAtools"))
 #' 
 #' # Plot top 50 features for factor 1 in the mRNA view
 #' plotDataHeatmap(model, "mRNA", 1, 50)
@@ -118,6 +118,8 @@ plotDataHeatmap <- function(object, view, factor, features = 50, includeWeights
 #' a character giving the name of a feature present in the training data, 
 #' a character giving the same of a covariate (only if using MultiAssayExperiment as input), 
 #' or a vector of the same length as the number of samples specifying discrete groups.
+#' @param name_color name for the color legend
+#' @param name_shape name for the shape legend
 #' @details One of the first steps for the annotation of factors is to visualise the loadings using \code{\link{plotWeights}} or \code{\link{plotTopWeights}}, 
 #' which show you which features drive the heterogeneity of each factor. 
 #' However, one might also be interested in visualising the direct relationship between features and factors, rather than looking at "abstract" weights. \cr

diff --git a/MOFAtools/R/plotFactors.R b/MOFAtools/R/plotFactors.R
@@ -115,9 +115,7 @@ plotFactorHist <- function(object, factor, group_by = NULL, group_names = "", al
 #' This function generates a Beeswarm plot of the sample values in a given latent factor. \cr
 #' Similar functions are \code{\link{plotFactorScatter}} for doing scatter plots and \code{\link{plotFactorHist}} for doing histogram plots
 #' @return Returns a \code{ggplot2} object
-#' @import ggplot2
-#' @import ggbeeswarm
-#' @import grDevices
+#' @import ggplot2 ggbeeswarm RColorBrewer grDevices
 #' @export
 plotFactorBeeswarm <- function(object, factors, color_by = NULL, name_color = "", showMissing = FALSE) {
 
@@ -140,38 +138,36 @@ plotFactorBeeswarm <- function(object, factors, color_by = NULL, name_color = ""
       if(color_by %in% Reduce(union,featureNames)) {
         viewidx <- which(sapply(featureNames, function(vnm) color_by %in% vnm))
         color_by <- TrainData[[viewidx]][color_by,]
-      } else if(class(object@InputData) == "MultiAssayExperiment"){
+      } else if(class(object@InputData) == "MultiAssayExperiment") {
         color_by <- getCovariates(object, color_by)
+    } else {
+      stop("'color_by' was specified but it was not recognised, please read the documentation") 
     }
-    else stop("'color_by' was specified but it was not recognised, please read the documentation")
     # It is a vector of length N
     } else if (length(color_by) > 1) {
       stopifnot(length(color_by) == N)
-      # color_by <- as.factor(color_by)
     } else {
       stop("'color_by' was specified but it was not recognised, please read the documentation")
     }
   } else {
     color_by <- rep(TRUE,N)
     colorLegend <- F
   }
-  names(color_by) <- sampleNames(object)
+
   if(length(unique(color_by)) < 5) color_by <- as.factor(color_by)
-
   Z$color_by <- color_by[Z$sample]
 
   # Remove samples with missing values
   if (showMissing==F) {
     Z <- Z[!(is.na(color_by) | is.nan(color_by) | color_by=="NaN"),]
-    # Z <- Z[complete.cases(Z),]
-    # Z <- Z[!is.na(Z$value),]
   }
 
   # Generate plot
   p <- ggplot(Z, aes_string(x=0, y="value")) + 
     ggbeeswarm::geom_quasirandom(aes(color=color_by)) +
     ylab("Factor value") + xlab("") +
     scale_x_continuous(breaks=NULL) +
+    facet_wrap(~factor, scales="free") +
     theme(
       axis.text.y = element_text(size = rel(1.5), color = "black"),
       axis.title.y = element_text(size = rel(1.5), color = "black"),
@@ -187,14 +183,20 @@ plotFactorBeeswarm <- function(object, factors, color_by = NULL, name_color = ""
       legend.position = "right", 
       legend.direction = "vertical",
       legend.key = element_blank()
-      ) + facet_wrap(~factor, scales="free")
+      )
 
   # If color_by is numeric, define the default gradient
   # if (is.numeric(color_by)) { p <- p + scale_color_gradientn(colors=terrain.colors(10)) }
-  if (is.numeric(color_by)) { p <- p + scale_color_gradientn(colors=colorRampPalette(rev(brewer.pal(n = 5, name = "RdYlBu")))(10)) }
+  if (is.numeric(color_by)) { 
+    p <- p + scale_color_gradientn(colors = colorRampPalette(rev(brewer.pal(n=5, name="RdYlBu")))(10)) 
+  }
 
   # Add legend
-  if (colorLegend) { p <- p + labs(color=name_color) } else { p <- p + guides(color = FALSE) }
+  if (colorLegend) { 
+    p <- p + labs(color=name_color) 
+  } else { 
+    p <- p + guides(color = FALSE) 
+  }
 
   return(p)
 }
@@ -235,8 +237,9 @@ plotFactorScatter <- function (object, factors, color_by = NULL, shape_by = NULL
 
   # Collect relevant data  
   N <- object@Dimensions[["N"]]
-  Z <- getExpectations(object, "Z", "E")
+  Z <- getFactors(object, factors = factors)
   factors <- as.character(factors)
+  samples <- sampleNames(object)
 
   # Set color
   colorLegend <- T
@@ -249,10 +252,12 @@ plotFactorScatter <- function (object, factors, color_by = NULL, shape_by = NULL
       if(color_by %in% Reduce(union,featureNames)) {
         viewidx <- which(sapply(featureNames, function(vnm) color_by %in% vnm))
         color_by <- TrainData[[viewidx]][color_by,]
-      } else if(class(object@InputData) == "MultiAssayExperiment"){
+      } else if(class(object@InputData) == "MultiAssayExperiment") {
         color_by <- getCovariates(object, color_by)
-    }
-    else stop("'color_by' was specified but it was not recognised, please read the documentation")
+      } else { 
+        stop("'color_by' was specified but it was not recognised, please read the documentation") 
+      }
+
     # It is a vector of length N
     } else if (length(color_by) > 1) {
       stopifnot(length(color_by) == N)
@@ -281,7 +286,6 @@ plotFactorScatter <- function (object, factors, color_by = NULL, shape_by = NULL
     }
     else stop("'shape_by' was specified but it was not recognised, please read the documentation")
     # It is a vector of length N
-    # It is a vector of length N
     } else if (length(shape_by) > 1) {
       stopifnot(length(shape_by) == N)
     } else {

diff --git a/MOFAtools/R/prepareMOFA.R b/MOFAtools/R/prepareMOFA.R
@@ -15,7 +15,9 @@
 #' @export
 #' @examples
 #' # Generate a random data set
-#' data <- list("gaussian_view"=matrix(rnorm(100,mean=0,sd=1),10,10), "poisson_view"=matrix(rpois(100, lambda=1),10,10))
+#' gaussian_data <- matrix(rnorm(100,mean=0,sd=1),10,10)
+#' poisson_data <- matrix(rpois(100, lambda=1),10,10)
+#' data <- list("gaussian_view"=gaussian_data, "poisson_view"=poisson_data)
 #' # Create a MOFA object
 #' MOFAobject <- createMOFA(data)
 #' # Define I/O options
@@ -24,7 +26,7 @@
 #' DataOptions <- getDefaultDataOpts()
 #' # Define Model Options
 #' ModelOptions <- getDefaultModelOpts(MOFAobject)
-#' ModelOptions$likelihood <- ("gaussian","poisson")
+#' ModelOptions$likelihood <- c("gaussian","poisson")
 #' ModelOptions <- getDefaultModelOpts(MOFAobject)
 #' # Define Training Options
 #' TrainOptions <- getDefaultTrainOpts()
@@ -112,16 +114,18 @@ prepareMOFA <- function(object, DirOptions, DataOptions = NULL, ModelOptions = N
 #'  \item{\strong{DropFactorThreshold}:}{ numeric indicating the threshold on fraction of variance explained to consider a factor inactive and drop it from the model.
 #'  For example, a value of 0.01 implies that factors explaining less than 1\% of variance (in each view) will be dropped.}
 #'  \item{\strong{verbose}:}{ logical indicating whether to generate a verbose output.}
+#'  \item{\strong{seed}:}{ random seed for reproducibility (default is NULL).}
 #' }
 #' @return Returns a list with default training options
 #' @export
 getDefaultTrainOpts <- function() {
   TrainOpts <- list(
-    maxiter = 1000,               # Maximum number of iterations
+    maxiter = 5000,               # Maximum number of iterations
     tolerance = 0.1,              # Convergence threshold based on change in the evidence lower bound
     learnFactors = TRUE,          # (bool) learn the number of factors?
-    DropFactorThreshold = 0.03,   # Threshold on fraction of variance explained to drop a factor
-    verbose = F                   # verbosity?
+    DropFactorThreshold = 0.01,   # Threshold on fraction of variance explained to drop a factor
+    verbose = FALSE,              # verbosity?
+    seed = NULL                   # randoom seed
   )
   return(TrainOpts)
 }
@@ -147,10 +151,10 @@ getDefaultTrainOpts <- function() {
 #' @export
 getDefaultDataOpts <- function() {
   DataOpts <- list(
-    delimiter = "\t",            # Delimiter for the data
-    centerFeatures = F,          # Center features to zero mean (does not apply to binary or count views)
-    scaleViews = F,              # Scale views to unit variance (does not apply to binary or count views)
-    removeIncompleteSamples = F  # Remove incomplete samples that are not profiled in all omics?
+    delimiter = "\t",                # Delimiter for the data
+    centerFeatures = FALSE,          # Center features to zero mean (does not apply to binary or count views)
+    scaleViews = FALSE,              # Scale views to unit variance (does not apply to binary or count views)
+    removeIncompleteSamples = FALSE  # Remove incomplete samples that are not profiled in all omics?
   )
   return(DataOpts)
 }

diff --git a/MOFAtools/R/runMOFA.R b/MOFAtools/R/runMOFA.R
@@ -36,7 +36,8 @@ runMOFA <- function(object, DirOptions, ..., mofaPath="mofa") {
     factors = object@ModelOpts$numFactors,
     iter = object@TrainOpts$maxiter,
     dropR2 =  object@TrainOpts$DropFactorThreshold,
-    tolerance = object@TrainOpts$tolerance
+    tolerance = object@TrainOpts$tolerance,
+    seed = object@TrainOpts$seed
   )
 
   # Decide whether to learn factors

diff --git a/MOFAtools/R/subset.R b/MOFAtools/R/subset.R
@@ -24,8 +24,11 @@ subsetFactors <- function(object, factors, keep_intercept=T) {
     }
       else{ stopifnot(all(factors %in% factorNames(object))) }
 
-  if(keep_intercept & object@ModelOpts$learnIntercept == T & !"intercept" %in% factors) factors <- c("intercept", factors)
-  # Subset expectations
+  if (keep_intercept & object@ModelOpts$learnIntercept == T & !"intercept" %in% factors) {
+    factors <- c("intercept", factors)
+  }
+
+  # Subset relevant slots
   object@Expectations$Z <- object@Expectations$Z[,factors, drop=F]
   object@Expectations$AlphaW <- sapply(object@Expectations$AlphaW, function(x) x[factors], simplify = F, USE.NAMES = T)
   object@Expectations$W <- sapply(object@Expectations$W, function(x) x[,factors, drop=F], simplify = F, USE.NAMES = T)
@@ -40,3 +43,41 @@ subsetFactors <- function(object, factors, keep_intercept=T) {
   return(object)
 }
 
+
+
+#' @title Subset samples
+#' @name subsetSamples
+#' @description Method to subset (or sort) samples
+#' @param object a \code{\link{MOFAmodel}} object.
+#' @param samples character vector with the sample names, numeric vector with the sample indices or logical vector with the samples to be kept as TRUE.
+#' @export
+subsetSamples <- function(object, samples) {
+
+  # Sanity checks
+  if (class(object) != "MOFAmodel") stop("'object' has to be an instance of MOFAmodel")
+  stopifnot(length(samples) <= object@Dimensions[["N"]])
+  warning("Removing samples a posteriori is fine for an exploratory analysis, but we recommend removing them before training!")
+
+  # Get samples
+  if (is.character(samples)) {
+    stopifnot(all(samples %in% sampleNames(object)))
+  } else {
+    samples <- sampleNames(object)[samples]
+  }
+
+  # Subset relevant slots
+  object@Expectations$Z <- object@Expectations$Z[samples,, drop=F]
+  object@Expectations$Y <- sapply(object@Expectations$Y, function(x) x[samples,], simplify = F, USE.NAMES = T)
+  object@TrainData <- sapply(object@TrainData, function(x) x[,samples], simplify = F, USE.NAMES = T)
+  object@InputData <- object@InputData[,samples,] 
+  if (length(object@ImputedData)==0) { object@ImputedData <- sapply(object@ImputedData, function(x) x[,samples], simplify = F, USE.NAMES = T)}
+
+  # Modify dimensionality
+  object@Dimensions[["N"]] <- length(samples)
+
+  # Modify sample names in the MOFAobject
+  sampleNames(object) <- samples
+
+  return(object)
+}
+
diff --git a/MOFAtools/data/scMT_data.RData b/MOFAtools/data/scMT_data.RData
diff --git a/MOFAtools/inst/extdata/model15.hdf5 → MOFAtools/inst/extdata/CLL_model.hdf5 b/MOFAtools/inst/extdata/model15.hdf5 → MOFAtools/inst/extdata/CLL_model.hdf5
diff --git a/MOFAtools/inst/extdata/scMT_model.hdf5 b/MOFAtools/inst/extdata/scMT_model.hdf5