Skip to content

Commit

Permalink
adding scMT vignette and subsetSamples
Browse files Browse the repository at this point in the history
  • Loading branch information
rargelaguet committed Apr 9, 2018
1 parent 146c82c commit 0fdc117
Show file tree
Hide file tree
Showing 34 changed files with 498 additions and 115 deletions.
2 changes: 1 addition & 1 deletion MOFAtools/DESCRIPTION
Expand Up @@ -14,7 +14,7 @@ License: GPL (>= 2)
Description: A collection of tools for running and analysing MOFA models.
Encoding: UTF-8
Depends: R (>= 3.4)
Imports: rhdf5, dplyr, tidyr, reshape2, pheatmap, corrplot, ggplot2, ggbeeswarm, methods, scales, GGally, doParallel, RColorBrewer, cowplot, ggrepel, foreach, MultiAssayExperiment
Imports: rhdf5, dplyr, tidyr, reshape2, pheatmap, corrplot, ggplot2, ggbeeswarm, methods, scales, GGally, RColorBrewer, cowplot, ggrepel, MultiAssayExperiment, Biobase, doParallel, foreach
Suggests: knitr, pcaMethods
biocViews:
VignetteBuilder: knitr
Expand Down
3 changes: 3 additions & 0 deletions MOFAtools/NAMESPACE
Expand Up @@ -58,6 +58,7 @@ export(qualityControl)
export(runMOFA)
export(sampleNames)
export(subsetFactors)
export(subsetSamples)
export(trainCurveELBO)
export(trainCurveFactors)
export(viewNames)
Expand All @@ -79,6 +80,7 @@ exportMethods(featureNames)
exportMethods(sampleNames)
exportMethods(viewNames)
import(MultiAssayExperiment)
import(RColorBrewer)
import(doParallel)
import(dplyr)
import(foreach)
Expand All @@ -90,6 +92,7 @@ import(methods)
import(pheatmap)
import(reshape2)
import(scales)
importFrom(Biobase,phenoData)
importFrom(corrplot,corrplot)
importFrom(cowplot,plot_grid)
importFrom(grDevices,colorRampPalette)
Expand Down
65 changes: 52 additions & 13 deletions MOFAtools/R/getMethods.R
Expand Up @@ -179,9 +179,16 @@ getImputedData <- function(object, views = "all", features = "all", as.data.fram

# Sanity checks
if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel")
if (length(object@ImputedData)==0) {
stop("No imputed data found. Please run imputeMissing(MOFAobject) first")
}

# Get views
if (paste0(views,collapse="") == "all") { views <- viewNames(object) } else { stopifnot(all(views %in% viewNames(object))) }
if (paste0(views,collapse="") == "all") {
views <- viewNames(object)
} else {
stopifnot(all(views %in% viewNames(object)))
}

# Get features
if (class(features)=="list") {
Expand All @@ -194,7 +201,7 @@ getImputedData <- function(object, views = "all", features = "all", as.data.fram
}
}

# Fetch data
# Fetch imputed data
ImputedData <- object@ImputedData[views]
ImputedData <- lapply(1:length(ImputedData), function(m) ImputedData[[m]][features[[m]],,drop=F]); names(ImputedData) <- views

Expand All @@ -203,9 +210,10 @@ getImputedData <- function(object, views = "all", features = "all", as.data.fram
tmp <- lapply(views, function(m) { tmp <- reshape2::melt(ImputedData[[m]]); colnames(tmp) <- c("feature","sample","value"); tmp <- cbind(view=m,tmp); return(tmp) })
ImputedData <- do.call(rbind,tmp)
ImputedData[,c("view","feature","sample")] <- sapply(ImputedData[,c("view","feature","sample")], as.character)
} else if ((length(views)==1) && (as.data.frame==F)) {
ImputedData <- ImputedData[[views]]
}
}
# else if ((length(views)==1) && (as.data.frame==F)) {
# ImputedData <- ImputedData[[views]]
# }

return(ImputedData)
}
Expand All @@ -215,20 +223,51 @@ getImputedData <- function(object, views = "all", features = "all", as.data.fram
#' @description This function extracts covariates from the \code{colData} in the input \code{MultiAssayExperiment} object. \cr
#' Note that if you did not use \code{MultiAssayExperiment} to create your \code{\link{createMOFAobject}}, this function will not work.
#' @param object a \code{\link{MOFAmodel}} object.
#' @param covariates names of the covariates
#' @param covariate names of the covariate
#' @import MultiAssayExperiment
#' @importFrom Biobase phenoData
#' @export
#'
getCovariates <- function(object, covariates) {
getCovariates <- function(object, covariate) {

# Sanity checks
if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel")
if(class(object@InputData) != "MultiAssayExperiment") stop("To work with covariates, InputData has to be specified in form of a MultiAssayExperiment")
stopifnot(all(covariates %in% colnames(colData(object@InputData))))
if(class(object@InputData) != "MultiAssayExperiment") {
stop("To work with covariates, InputData has to be specified in form of a MultiAssayExperiment")
}

# Get covariates
covariates <- colData(object@InputData)[,covariates]
# Check that samples from MOFAobject and MultiAssayExperiment are consistent.
samples <- sampleNames(object)
if (!all(samples %in% rownames(colData(object@InputData)))) {
stop("There are samples in the model which are not detected in the MultiAssayExperiment object")
} else {
mae <- object@InputData[,samples]
}

# Extract the covariate from colData or in the phenoData of specific assays
out <- list()
if (covariate %in% colnames(colData(mae))) {
covariate_in_coldata <- TRUE
out[[covariate]] <- colData(mae)[,covariate]
names(out[[covariate]]) <- sampleNames(object)
} else {
for (view in viewNames(object)) {
phenodata <- phenoData(mae@ExperimentList[[view]])
if (covariate %in% colnames(phenodata)) {
out[[paste(view,covariate,sep="_")]] <- phenodata[[covariate]]
names(out[[paste(view,covariate,sep="_")]]) <- rownames(phenodata)
}
}
}

# Final sanity check
if (length(out) == 0) {
stop("Covariate not found in the MultiAssayExperiment object")
} else if (length(out)>1) {
stop("Covariate ambiguously found in the phenoData of multiple assays. Please, extract it manually.")
} else {
return(out[[1]])
}

return(covariates)
}

#' @title getExpectations
Expand Down
4 changes: 3 additions & 1 deletion MOFAtools/R/plotData.R
Expand Up @@ -30,7 +30,7 @@
#' @import pheatmap
#' @examples
#' # Load example of MOFA model
#' model <- loadModel(system.file("extdata", "model15.hdf5", package = "MOFAtools"))
#' model <- loadModel(system.file("extdata", "CLL_model.hdf5", package = "MOFAtools"))
#'
#' # Plot top 50 features for factor 1 in the mRNA view
#' plotDataHeatmap(model, "mRNA", 1, 50)
Expand Down Expand Up @@ -118,6 +118,8 @@ plotDataHeatmap <- function(object, view, factor, features = 50, includeWeights
#' a character giving the name of a feature present in the training data,
#' a character giving the same of a covariate (only if using MultiAssayExperiment as input),
#' or a vector of the same length as the number of samples specifying discrete groups.
#' @param name_color name for the color legend
#' @param name_shape name for the shape legend
#' @details One of the first steps for the annotation of factors is to visualise the loadings using \code{\link{plotWeights}} or \code{\link{plotTopWeights}},
#' which show you which features drive the heterogeneity of each factor.
#' However, one might also be interested in visualising the direct relationship between features and factors, rather than looking at "abstract" weights. \cr
Expand Down
40 changes: 22 additions & 18 deletions MOFAtools/R/plotFactors.R
Expand Up @@ -115,9 +115,7 @@ plotFactorHist <- function(object, factor, group_by = NULL, group_names = "", al
#' This function generates a Beeswarm plot of the sample values in a given latent factor. \cr
#' Similar functions are \code{\link{plotFactorScatter}} for doing scatter plots and \code{\link{plotFactorHist}} for doing histogram plots
#' @return Returns a \code{ggplot2} object
#' @import ggplot2
#' @import ggbeeswarm
#' @import grDevices
#' @import ggplot2 ggbeeswarm RColorBrewer grDevices
#' @export
plotFactorBeeswarm <- function(object, factors, color_by = NULL, name_color = "", showMissing = FALSE) {

Expand All @@ -140,38 +138,36 @@ plotFactorBeeswarm <- function(object, factors, color_by = NULL, name_color = ""
if(color_by %in% Reduce(union,featureNames)) {
viewidx <- which(sapply(featureNames, function(vnm) color_by %in% vnm))
color_by <- TrainData[[viewidx]][color_by,]
} else if(class(object@InputData) == "MultiAssayExperiment"){
} else if(class(object@InputData) == "MultiAssayExperiment") {
color_by <- getCovariates(object, color_by)
} else {
stop("'color_by' was specified but it was not recognised, please read the documentation")
}
else stop("'color_by' was specified but it was not recognised, please read the documentation")
# It is a vector of length N
} else if (length(color_by) > 1) {
stopifnot(length(color_by) == N)
# color_by <- as.factor(color_by)
} else {
stop("'color_by' was specified but it was not recognised, please read the documentation")
}
} else {
color_by <- rep(TRUE,N)
colorLegend <- F
}
names(color_by) <- sampleNames(object)

if(length(unique(color_by)) < 5) color_by <- as.factor(color_by)

Z$color_by <- color_by[Z$sample]

# Remove samples with missing values
if (showMissing==F) {
Z <- Z[!(is.na(color_by) | is.nan(color_by) | color_by=="NaN"),]
# Z <- Z[complete.cases(Z),]
# Z <- Z[!is.na(Z$value),]
}

# Generate plot
p <- ggplot(Z, aes_string(x=0, y="value")) +
ggbeeswarm::geom_quasirandom(aes(color=color_by)) +
ylab("Factor value") + xlab("") +
scale_x_continuous(breaks=NULL) +
facet_wrap(~factor, scales="free") +
theme(
axis.text.y = element_text(size = rel(1.5), color = "black"),
axis.title.y = element_text(size = rel(1.5), color = "black"),
Expand All @@ -187,14 +183,20 @@ plotFactorBeeswarm <- function(object, factors, color_by = NULL, name_color = ""
legend.position = "right",
legend.direction = "vertical",
legend.key = element_blank()
) + facet_wrap(~factor, scales="free")
)

# If color_by is numeric, define the default gradient
# if (is.numeric(color_by)) { p <- p + scale_color_gradientn(colors=terrain.colors(10)) }
if (is.numeric(color_by)) { p <- p + scale_color_gradientn(colors=colorRampPalette(rev(brewer.pal(n = 5, name = "RdYlBu")))(10)) }
if (is.numeric(color_by)) {
p <- p + scale_color_gradientn(colors = colorRampPalette(rev(brewer.pal(n=5, name="RdYlBu")))(10))
}

# Add legend
if (colorLegend) { p <- p + labs(color=name_color) } else { p <- p + guides(color = FALSE) }
if (colorLegend) {
p <- p + labs(color=name_color)
} else {
p <- p + guides(color = FALSE)
}

return(p)
}
Expand Down Expand Up @@ -235,8 +237,9 @@ plotFactorScatter <- function (object, factors, color_by = NULL, shape_by = NULL

# Collect relevant data
N <- object@Dimensions[["N"]]
Z <- getExpectations(object, "Z", "E")
Z <- getFactors(object, factors = factors)
factors <- as.character(factors)
samples <- sampleNames(object)

# Set color
colorLegend <- T
Expand All @@ -249,10 +252,12 @@ plotFactorScatter <- function (object, factors, color_by = NULL, shape_by = NULL
if(color_by %in% Reduce(union,featureNames)) {
viewidx <- which(sapply(featureNames, function(vnm) color_by %in% vnm))
color_by <- TrainData[[viewidx]][color_by,]
} else if(class(object@InputData) == "MultiAssayExperiment"){
} else if(class(object@InputData) == "MultiAssayExperiment") {
color_by <- getCovariates(object, color_by)
}
else stop("'color_by' was specified but it was not recognised, please read the documentation")
} else {
stop("'color_by' was specified but it was not recognised, please read the documentation")
}

# It is a vector of length N
} else if (length(color_by) > 1) {
stopifnot(length(color_by) == N)
Expand Down Expand Up @@ -281,7 +286,6 @@ plotFactorScatter <- function (object, factors, color_by = NULL, shape_by = NULL
}
else stop("'shape_by' was specified but it was not recognised, please read the documentation")
# It is a vector of length N
# It is a vector of length N
} else if (length(shape_by) > 1) {
stopifnot(length(shape_by) == N)
} else {
Expand Down
22 changes: 13 additions & 9 deletions MOFAtools/R/prepareMOFA.R
Expand Up @@ -15,7 +15,9 @@
#' @export
#' @examples
#' # Generate a random data set
#' data <- list("gaussian_view"=matrix(rnorm(100,mean=0,sd=1),10,10), "poisson_view"=matrix(rpois(100, lambda=1),10,10))
#' gaussian_data <- matrix(rnorm(100,mean=0,sd=1),10,10)
#' poisson_data <- matrix(rpois(100, lambda=1),10,10)
#' data <- list("gaussian_view"=gaussian_data, "poisson_view"=poisson_data)
#' # Create a MOFA object
#' MOFAobject <- createMOFA(data)
#' # Define I/O options
Expand All @@ -24,7 +26,7 @@
#' DataOptions <- getDefaultDataOpts()
#' # Define Model Options
#' ModelOptions <- getDefaultModelOpts(MOFAobject)
#' ModelOptions$likelihood <- ("gaussian","poisson")
#' ModelOptions$likelihood <- c("gaussian","poisson")
#' ModelOptions <- getDefaultModelOpts(MOFAobject)
#' # Define Training Options
#' TrainOptions <- getDefaultTrainOpts()
Expand Down Expand Up @@ -112,16 +114,18 @@ prepareMOFA <- function(object, DirOptions, DataOptions = NULL, ModelOptions = N
#' \item{\strong{DropFactorThreshold}:}{ numeric indicating the threshold on fraction of variance explained to consider a factor inactive and drop it from the model.
#' For example, a value of 0.01 implies that factors explaining less than 1\% of variance (in each view) will be dropped.}
#' \item{\strong{verbose}:}{ logical indicating whether to generate a verbose output.}
#' \item{\strong{seed}:}{ random seed for reproducibility (default is NULL).}
#' }
#' @return Returns a list with default training options
#' @export
getDefaultTrainOpts <- function() {
TrainOpts <- list(
maxiter = 1000, # Maximum number of iterations
maxiter = 5000, # Maximum number of iterations
tolerance = 0.1, # Convergence threshold based on change in the evidence lower bound
learnFactors = TRUE, # (bool) learn the number of factors?
DropFactorThreshold = 0.03, # Threshold on fraction of variance explained to drop a factor
verbose = F # verbosity?
DropFactorThreshold = 0.01, # Threshold on fraction of variance explained to drop a factor
verbose = FALSE, # verbosity?
seed = NULL # randoom seed
)
return(TrainOpts)
}
Expand All @@ -147,10 +151,10 @@ getDefaultTrainOpts <- function() {
#' @export
getDefaultDataOpts <- function() {
DataOpts <- list(
delimiter = "\t", # Delimiter for the data
centerFeatures = F, # Center features to zero mean (does not apply to binary or count views)
scaleViews = F, # Scale views to unit variance (does not apply to binary or count views)
removeIncompleteSamples = F # Remove incomplete samples that are not profiled in all omics?
delimiter = "\t", # Delimiter for the data
centerFeatures = FALSE, # Center features to zero mean (does not apply to binary or count views)
scaleViews = FALSE, # Scale views to unit variance (does not apply to binary or count views)
removeIncompleteSamples = FALSE # Remove incomplete samples that are not profiled in all omics?
)
return(DataOpts)
}
Expand Down
3 changes: 2 additions & 1 deletion MOFAtools/R/runMOFA.R
Expand Up @@ -36,7 +36,8 @@ runMOFA <- function(object, DirOptions, ..., mofaPath="mofa") {
factors = object@ModelOpts$numFactors,
iter = object@TrainOpts$maxiter,
dropR2 = object@TrainOpts$DropFactorThreshold,
tolerance = object@TrainOpts$tolerance
tolerance = object@TrainOpts$tolerance,
seed = object@TrainOpts$seed
)

# Decide whether to learn factors
Expand Down
45 changes: 43 additions & 2 deletions MOFAtools/R/subset.R
Expand Up @@ -24,8 +24,11 @@ subsetFactors <- function(object, factors, keep_intercept=T) {
}
else{ stopifnot(all(factors %in% factorNames(object))) }

if(keep_intercept & object@ModelOpts$learnIntercept == T & !"intercept" %in% factors) factors <- c("intercept", factors)
# Subset expectations
if (keep_intercept & object@ModelOpts$learnIntercept == T & !"intercept" %in% factors) {
factors <- c("intercept", factors)
}

# Subset relevant slots
object@Expectations$Z <- object@Expectations$Z[,factors, drop=F]
object@Expectations$AlphaW <- sapply(object@Expectations$AlphaW, function(x) x[factors], simplify = F, USE.NAMES = T)
object@Expectations$W <- sapply(object@Expectations$W, function(x) x[,factors, drop=F], simplify = F, USE.NAMES = T)
Expand All @@ -40,3 +43,41 @@ subsetFactors <- function(object, factors, keep_intercept=T) {
return(object)
}



#' @title Subset samples
#' @name subsetSamples
#' @description Method to subset (or sort) samples
#' @param object a \code{\link{MOFAmodel}} object.
#' @param samples character vector with the sample names, numeric vector with the sample indices or logical vector with the samples to be kept as TRUE.
#' @export
subsetSamples <- function(object, samples) {

# Sanity checks
if (class(object) != "MOFAmodel") stop("'object' has to be an instance of MOFAmodel")
stopifnot(length(samples) <= object@Dimensions[["N"]])
warning("Removing samples a posteriori is fine for an exploratory analysis, but we recommend removing them before training!")

# Get samples
if (is.character(samples)) {
stopifnot(all(samples %in% sampleNames(object)))
} else {
samples <- sampleNames(object)[samples]
}

# Subset relevant slots
object@Expectations$Z <- object@Expectations$Z[samples,, drop=F]
object@Expectations$Y <- sapply(object@Expectations$Y, function(x) x[samples,], simplify = F, USE.NAMES = T)
object@TrainData <- sapply(object@TrainData, function(x) x[,samples], simplify = F, USE.NAMES = T)
object@InputData <- object@InputData[,samples,]
if (length(object@ImputedData)==0) { object@ImputedData <- sapply(object@ImputedData, function(x) x[,samples], simplify = F, USE.NAMES = T)}

# Modify dimensionality
object@Dimensions[["N"]] <- length(samples)

# Modify sample names in the MOFAobject
sampleNames(object) <- samples

return(object)
}

Binary file added MOFAtools/data/scMT_data.RData
Binary file not shown.
File renamed without changes.
Binary file added MOFAtools/inst/extdata/scMT_model.hdf5
Binary file not shown.

0 comments on commit 0fdc117

Please sign in to comment.