Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 3442917
Showing
29 changed files
with
2,126 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
Package: tcgsaseq | ||
Type: Package | ||
Title: What the Package Does (Title Case) | ||
Version: 0.1 | ||
Date: 2016-03-31 | ||
Author: Who wrote it | ||
Maintainer: Who to complain to <yourfault@somewhere.net> | ||
Description: More about what it does (maybe more than one line) | ||
License: What license is it under? | ||
LazyData: TRUE |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
exportPattern("^[[:alpha:]]+") |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#' TcGSAseq: a package to perform Time-course Gene Set Analysis of RNA-seq data | ||
#' | ||
#' Gene set analysis of longitudinal RNA-seq data with varaince component score | ||
#' test acocunting for data heteroscedasticity through precision weights. | ||
#' | ||
#' The main functions of the TcGSAseq package is \code{\link{tcgsa_seq}} | ||
#' | ||
#' @docType package | ||
#' | ||
#' @references Agniel D, Hejblum BP, Variance component score test for | ||
#' time-course gene set analysis of longitudinal RNA-seq data, \emph{submitted}, 2016. | ||
#' | ||
#' @name TcGSAseq | ||
#' @importFrom GSA GSA.read.gmt | ||
#' | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#'Power for matrix elements | ||
#' | ||
#'Set each element of matrix \code{x} to the power \code{n} | ||
#' | ||
#'@param x a matrix | ||
#' | ||
#'@param n a real number | ||
#' | ||
#'@keywords internal | ||
|
||
"%^%" <- function(x, n){ | ||
with(eigen(x), vectors %*% (values^n * t(vectors))) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
#' Computing simulations results | ||
#' | ||
#'@examples | ||
#' | ||
#'\dontrun{ | ||
#'data_sims <- data_sim_voomlike(maxGSsize=300, minGSsize=30) | ||
#'res <- compute_sim_voomlike(counts = data_sims$counts, | ||
#' design = data_sims$design, | ||
#' gs_keep = data_sims$gs_keep, | ||
#' indiv = data_sims$indiv, | ||
#' alternative=FALSE, | ||
#' RE_indiv_sd=0.1) | ||
#'res_all <- cbind(res$res_voom, res$res_perso, res$res_noweights) | ||
#'colnames(res_all) <- paste0(rep(c("asym", "perm", "camera"), 3), | ||
#' rep(c("_voom", "_perso", "_noweights"), each=3)) | ||
#'} | ||
#' | ||
#'@keywords internal | ||
#' | ||
#'@export | ||
compute_sim_voomlike <- function(counts, design, gs_keep, indiv, alternative=FALSE, | ||
fixed_eff = 0.5, fixed_eff_sd = 0.2, | ||
rand_eff_sd = 0.25, RE_indiv_sd=NULL, eps_sd=0.05){ | ||
|
||
logcoutspm <- apply(counts, MARGIN=2,function(v){log2((v+0.5)/(sum(v)+1)*10^6)}) | ||
|
||
if(alternative){ | ||
n <- ncol(counts) | ||
nb_g <- nrow(counts) | ||
beta_time <- rnorm(1, mean=fixed_eff, sd=fixed_eff_sd) | ||
gamma_genes_time <- matrix(rnorm(nb_g, mean=0, sd=rand_eff_sd), ncol=1) | ||
logcoutspm_alt <- logcoutspm + (gamma_genes_time+beta_time)%*%design[, "time"] | ||
|
||
# for(gs in gs_keep){ | ||
# nb_g_gs <- length(gs) | ||
# gamma_genes_time <- matrix(rnorm(nb_g_gs, mean = 0, sd = rand_eff_sd), ncol = 1) | ||
# logcoutspm_alt[gs,] <- logcoutspm[gs,] + (gamma_genes_time+beta_time)%*%design[, "time"] | ||
# } | ||
|
||
err <- matrix(rnorm(n*nb_g, mean = 0, sd = eps_sd), nrow = nb_g, ncol = n) | ||
logcoutspm_alt <- logcoutspm_alt + err | ||
|
||
}else{ | ||
logcoutspm_alt <- logcoutspm | ||
} | ||
|
||
#Individual Random effects | ||
if(!is.null(RE_indiv_sd)){ | ||
RE_indiv <- rnorm(n=length(unique(indiv)), mean=0, sd=RE_indiv_sd) | ||
logcoutspm_alt <- logcoutspm_alt + RE_indiv[as.numeric(as.character(indiv))] | ||
} | ||
|
||
|
||
# Weights estimation ---- | ||
######################### | ||
|
||
#png(width=5.5, height=4.5, units="in", file="voom_ex.png", res=300) | ||
#w_voom <- voom_weights(x=design, y=t(counts2), doPlot = TRUE, preprocessed = FALSE) | ||
#dev.off() | ||
w_voom <- voom_weights(x=design, y=t(logcoutspm_alt), doPlot = FALSE, preprocessed = TRUE) | ||
|
||
w_perso <- sp_weights(x=design[,-which(colnames(design)=="time")], | ||
y=t(logcoutspm_alt), | ||
phi = cbind(design[, "time"]), | ||
doPlot = FALSE, | ||
exact=FALSE, | ||
preprocessed = TRUE | ||
) | ||
|
||
# Testing ---- | ||
############## | ||
res_voom <- NULL | ||
res_perso <- NULL | ||
res_noweights <- NULL | ||
gs_ind_list <- limma::ids2indices(gs_keep, identifier=rownames(logcoutspm_alt)) | ||
cor_limma <- limma::duplicateCorrelation(logcoutspm_alt, design, block = indiv) | ||
|
||
for(gs in 1:length(gs_keep)){ | ||
gs_test <- gs_keep[[gs]] | ||
y_test <- logcoutspm_alt[gs_test,] | ||
x_test <- design[,-which(colnames(design)=="time")] | ||
w_test_perso <- t(w_perso[,as.numeric(gs_test)]) | ||
w_test_voom <- t(w_voom[,as.numeric(gs_test)]) | ||
phi_test <- cbind(design[, "time"]) | ||
|
||
## Fitting the null model | ||
n <- ncol(y_test) | ||
g <- length(gs_test) | ||
y_T_vect <- as.vector(y_test) | ||
indiv_vect <- rep(indiv, g) | ||
g_vect <- as.factor(rep(1:g, n)) | ||
x_vect <-do.call(rbind, replicate(g, x_test, simplify=FALSE)) | ||
phi_vect <- do.call(rbind, replicate(g, phi_test, simplify=FALSE)) | ||
|
||
res_voom <- rbind(res_voom, cbind("asym" = vc_test_asym(y = y_test, x=x_test, indiv=indiv, phi=phi_test, | ||
Sigma_xi = as.matrix(diag(ncol(phi_test))), | ||
w = w_test_voom)[["pval"]], | ||
"permut" = vc_test_perm(y = y_test, x=x_test, indiv=indiv, phi=phi_test, | ||
Sigma_xi = as.matrix(diag(ncol(phi_test))), | ||
w = w_test_voom)[["pval"]], | ||
"camera" = limma::camera(y=logcoutspm_alt, index=gs_ind_list[[gs]], | ||
design=design, contrast=3, | ||
weights = t(w_voom))$PValue, | ||
"roast" = limma::roast(y=logcoutspm_alt, index=gs_ind_list[[gs]], | ||
design=design, contrast=3, | ||
block = indiv, correlation = cor_limma$consensus.correlation, | ||
weights = t(w_voom))$p.value["Mixed", "P.Value"]) | ||
) | ||
res_perso <- rbind(res_perso, cbind("asym" = vc_test_asym(y = y_test, x=x_test, indiv=indiv, phi=phi_test, | ||
Sigma_xi = as.matrix(diag(ncol(phi_test))), | ||
w = w_test_perso)[["pval"]], | ||
"permut" = vc_test_perm(y = y_test, x=x_test, indiv=indiv, phi=phi_test, | ||
Sigma_xi = as.matrix(diag(ncol(phi_test))), | ||
w = w_test_perso)[["pval"]], | ||
"camera" = limma::camera(y=logcoutspm_alt, index=gs_ind_list[[gs]], | ||
design=design, contrast=3, | ||
weights = t(w_perso))$PValue, | ||
"roast" = limma::roast(y=logcoutspm_alt, index=gs_ind_list[[gs]], | ||
design=design, contrast=3, | ||
block = indiv, correlation = cor_limma$consensus.correlation, | ||
weights = t(w_perso))$p.value["Mixed", "P.Value"]) | ||
) | ||
res_noweights <- rbind(res_noweights, cbind("asym" = vc_test_asym(y = y_test, x=x_test, indiv=indiv, phi=phi_test, | ||
Sigma_xi = as.matrix(diag(ncol(phi_test))), | ||
w = rep(1, length(gs_test)))[["pval"]], | ||
"permut" = vc_test_perm(y = y_test, x=x_test, indiv=indiv, phi=phi_test, | ||
Sigma_xi = as.matrix(diag(ncol(phi_test))), | ||
w = rep(1, length(gs_test)))[["pval"]], | ||
"camera" = limma::camera(y=logcoutspm_alt, index=gs_ind_list[[gs]], | ||
design=design, contrast=3)$PValue, | ||
"roast" = limma::roast(y=logcoutspm_alt, index=gs_ind_list[[gs]], | ||
block = indiv, correlation = cor_limma$consensus.correlation, | ||
design=design, contrast=3)$p.value["Mixed", "P.Value"]) | ||
) | ||
|
||
} | ||
|
||
return(list("res_voom"=res_voom, "res_perso"=res_perso, "res_noweights"=res_noweights)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
#'Data simulation function | ||
#' | ||
#'Adapted from the supplementary material from Law \emph{et al}. | ||
#' | ||
#'@references Law, C. W., Chen, Y., Shi, W., & Smyth, G. K. (2014). voom: Precision | ||
#'weights unlock linear model analysis tools for RNA-seq read counts. \emph{Genome | ||
#'Biology}, 15(2), R29. | ||
#' | ||
#' | ||
#'@examples | ||
#'\dontrun{ | ||
#'setseed(123) | ||
#'data_sims <- data_sim_voomlike(maxGSsize=300) | ||
#'} | ||
#'@keywords internal | ||
#' | ||
#'@export | ||
data_sim_voomlike <- function(seed=NULL, maxGSsize=400, minGSsize=30){ | ||
|
||
|
||
############################################################################ | ||
# Get distribution function of abundance proportions | ||
# This distribution was generated from a real dataset | ||
data(qAbundanceDist, envir = environment()) | ||
qAbundanceDist <- get(qAbundanceDist) | ||
|
||
# Generate baseline proportions for desired number of genes ----- | ||
ngenes <- 10000 | ||
baselineprop <- qAbundanceDist( (1:ngenes)/(ngenes+1) ) | ||
baselineprop <- baselineprop/sum(baselineprop) | ||
|
||
# Design ---- | ||
n <- 18 | ||
n1 <- n/2 | ||
n2 <- n1 | ||
nindiv <- 6 | ||
ngroup <- 2 | ||
ntime <- 3 | ||
group <- factor(rep(1:2, each=n/ngroup)) | ||
indiv <- factor(rep(1:nindiv, each=n/nindiv)) | ||
time <- rep(1:ntime, nindiv) | ||
design <- model.matrix(~ group + time) | ||
#design <- design[, 1, drop=FALSE] | ||
nlibs <- n | ||
|
||
# Library size ---- | ||
# Use equal or unequal library sizes | ||
equal <- FALSE | ||
if(equal){ | ||
expected.lib.size <- rep(11e6,nlibs) | ||
} else { | ||
expected.lib.size <- 20e6 * rep(c(1,0.1), n1) | ||
} | ||
|
||
# Set seed ---- | ||
if(!is.null(seed)){set.seed(seed*54321)} | ||
u <- runif(100) | ||
|
||
# Expected counts, group basis ---- | ||
i <- sample(1:ngenes,200) | ||
i1 <- i[1:100] | ||
i2 <- i[101:200] | ||
fc <- 2 | ||
baselineprop1 <- baselineprop2 <- baselineprop | ||
baselineprop1[i1] <- baselineprop1[i1]*fc | ||
baselineprop2[i2] <- baselineprop2[i2]*fc | ||
mu0.1 <- matrix(baselineprop1,ngenes,1) %*% matrix(expected.lib.size[1:n1],1,n1) | ||
mu0.2 <- matrix(baselineprop2,ngenes,1) %*% matrix(expected.lib.size[(n1+1):(n1+n2)],1,n2) | ||
mu0 <- cbind(mu0.1,mu0.2) | ||
status <- rep(0,ngenes) | ||
status[i1] <- -1 | ||
status[i2] <- 1 | ||
|
||
# Biological variation ---- | ||
BCV0 <- 0.2+1/sqrt(mu0) | ||
|
||
# Use inverse chi-square or log-normal dispersion | ||
invChisq <- TRUE | ||
|
||
if(invChisq){ | ||
df.BCV <- 40 | ||
BCV <- BCV0*sqrt(df.BCV/rchisq(ngenes,df=df.BCV)) | ||
} else { | ||
BCV <- BCV0*exp( rnorm(ngenes,mean=0,sd=0.25)/2 ) | ||
} | ||
if(NCOL(BCV)==1) BCV <- matrix(BCV, ngenes, nlibs) | ||
shape <- 1/BCV^2 | ||
scale <- mu0/shape | ||
mu <- matrix(rgamma(ngenes*nlibs,shape=shape,scale=scale), ngenes, nlibs) | ||
|
||
# Technical variation | ||
counts <- matrix(rpois(ngenes*nlibs,lambda=mu), ngenes, nlibs) | ||
|
||
# Filter | ||
keep <- rowSums(counts)>=10 | ||
nkeep <- sum(keep) | ||
counts2 <- counts[keep,] | ||
|
||
S_temp <- cor(t(counts2)) | ||
rownames(S_temp) <- as.character(1:nrow(S_temp)) | ||
colnames(S_temp) <- as.character(1:ncol(S_temp)) | ||
rownames(counts2) <- rownames(S_temp) | ||
GS <- list() | ||
for(i in 1:ncol(S_temp)){ | ||
GS[[i]] <-which(abs(S_temp[,1])>0.8) | ||
#if(inherits(GS[[i]], "try-error")){browser()} | ||
S_temp <- S_temp[-GS[[i]], -GS[[i]]] | ||
if(is.null(dim(S_temp)) || nrow(S_temp)==0){ | ||
break() | ||
} | ||
} | ||
gs_keep <- lapply(GS[sapply(GS, length)<maxGSsize & sapply(GS, length)>minGSsize], names) | ||
|
||
return(list("counts"=counts2, "design"=design, "gs_keep"=gs_keep, "indiv"=indiv)) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#' Gene abundance proportion distribution of RNA-seq data. | ||
#' | ||
#' An example of gene abundance proportion distribution function of RNA-seq data, | ||
#' generated from a real dataset. See supplmenetary material of Law \emph{et al}. | ||
#' | ||
#'@name qAbundanceDist | ||
#'@rdname qAbundanceDist | ||
#'@aliases qAbundanceDist | ||
#' | ||
#'@usage data(qAbundanceDist) | ||
#' | ||
#'@references Law, C. W., Chen, Y., Shi, W., & Smyth, G. K. (2014). voom: Precision | ||
#'weights unlock linear model analysis tools for RNA-seq read counts. \emph{Genome | ||
#'Biology}, 15(2), R29. | ||
#' | ||
#' @format A function: \code{qAbundanceDist}. | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' # Get distribution function of abundance proportions | ||
#' # This distribution was generated from a real dataset | ||
#' #load(url("http://bioinf.wehi.edu.au/voom/qAbundanceDist.RData")) | ||
#' data("qAbundanceDist") | ||
#' | ||
#' # Generate baseline proportions for desired number of genes | ||
#' ngenes <- 10000 | ||
#' baselineprop <- qAbundanceDist( (1:ngenes)/(ngenes+1) ) | ||
#' baselineprop <- baselineprop/sum(baselineprop) | ||
#' } | ||
#' | ||
#' | ||
#' @source \url{http://bioinf.wehi.edu.au/voom/} | ||
#' | ||
#' @keywords datasets | ||
#' @docType data | ||
NULL | ||
|
Oops, something went wrong.