Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
denisagniel committed Mar 31, 2016
0 parents commit 3442917
Show file tree
Hide file tree
Showing 29 changed files with 2,126 additions and 0 deletions.
10 changes: 10 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,10 @@
Package: tcgsaseq
Type: Package
Title: What the Package Does (Title Case)
Version: 0.1
Date: 2016-03-31
Author: Who wrote it
Maintainer: Who to complain to <yourfault@somewhere.net>
Description: More about what it does (maybe more than one line)
License: What license is it under?
LazyData: TRUE
340 changes: 340 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions NAMESPACE
@@ -0,0 +1 @@
exportPattern("^[[:alpha:]]+")
Binary file added R/.DS_Store
Binary file not shown.
16 changes: 16 additions & 0 deletions R/TcGSAseq-package.R
@@ -0,0 +1,16 @@
#' TcGSAseq: a package to perform Time-course Gene Set Analysis of RNA-seq data
#'
#' Gene set analysis of longitudinal RNA-seq data with varaince component score
#' test acocunting for data heteroscedasticity through precision weights.
#'
#' The main functions of the TcGSAseq package is \code{\link{tcgsa_seq}}
#'
#' @docType package
#'
#' @references Agniel D, Hejblum BP, Variance component score test for
#' time-course gene set analysis of longitudinal RNA-seq data, \emph{submitted}, 2016.
#'
#' @name TcGSAseq
#' @importFrom GSA GSA.read.gmt
#'
NULL
13 changes: 13 additions & 0 deletions R/TcGSAseq_internal.R
@@ -0,0 +1,13 @@
#'Power for matrix elements
#'
#'Set each element of matrix \code{x} to the power \code{n}
#'
#'@param x a matrix
#'
#'@param n a real number
#'
#'@keywords internal

"%^%" <- function(x, n){
with(eigen(x), vectors %*% (values^n * t(vectors)))
}
139 changes: 139 additions & 0 deletions R/compute_sim_voomlike.R
@@ -0,0 +1,139 @@
#' Computing simulations results
#'
#'@examples
#'
#'\dontrun{
#'data_sims <- data_sim_voomlike(maxGSsize=300, minGSsize=30)
#'res <- compute_sim_voomlike(counts = data_sims$counts,
#' design = data_sims$design,
#' gs_keep = data_sims$gs_keep,
#' indiv = data_sims$indiv,
#' alternative=FALSE,
#' RE_indiv_sd=0.1)
#'res_all <- cbind(res$res_voom, res$res_perso, res$res_noweights)
#'colnames(res_all) <- paste0(rep(c("asym", "perm", "camera"), 3),
#' rep(c("_voom", "_perso", "_noweights"), each=3))
#'}
#'
#'@keywords internal
#'
#'@export
compute_sim_voomlike <- function(counts, design, gs_keep, indiv, alternative=FALSE,
fixed_eff = 0.5, fixed_eff_sd = 0.2,
rand_eff_sd = 0.25, RE_indiv_sd=NULL, eps_sd=0.05){

logcoutspm <- apply(counts, MARGIN=2,function(v){log2((v+0.5)/(sum(v)+1)*10^6)})

if(alternative){
n <- ncol(counts)
nb_g <- nrow(counts)
beta_time <- rnorm(1, mean=fixed_eff, sd=fixed_eff_sd)
gamma_genes_time <- matrix(rnorm(nb_g, mean=0, sd=rand_eff_sd), ncol=1)
logcoutspm_alt <- logcoutspm + (gamma_genes_time+beta_time)%*%design[, "time"]

# for(gs in gs_keep){
# nb_g_gs <- length(gs)
# gamma_genes_time <- matrix(rnorm(nb_g_gs, mean = 0, sd = rand_eff_sd), ncol = 1)
# logcoutspm_alt[gs,] <- logcoutspm[gs,] + (gamma_genes_time+beta_time)%*%design[, "time"]
# }

err <- matrix(rnorm(n*nb_g, mean = 0, sd = eps_sd), nrow = nb_g, ncol = n)
logcoutspm_alt <- logcoutspm_alt + err

}else{
logcoutspm_alt <- logcoutspm
}

#Individual Random effects
if(!is.null(RE_indiv_sd)){
RE_indiv <- rnorm(n=length(unique(indiv)), mean=0, sd=RE_indiv_sd)
logcoutspm_alt <- logcoutspm_alt + RE_indiv[as.numeric(as.character(indiv))]
}


# Weights estimation ----
#########################

#png(width=5.5, height=4.5, units="in", file="voom_ex.png", res=300)
#w_voom <- voom_weights(x=design, y=t(counts2), doPlot = TRUE, preprocessed = FALSE)
#dev.off()
w_voom <- voom_weights(x=design, y=t(logcoutspm_alt), doPlot = FALSE, preprocessed = TRUE)

w_perso <- sp_weights(x=design[,-which(colnames(design)=="time")],
y=t(logcoutspm_alt),
phi = cbind(design[, "time"]),
doPlot = FALSE,
exact=FALSE,
preprocessed = TRUE
)

# Testing ----
##############
res_voom <- NULL
res_perso <- NULL
res_noweights <- NULL
gs_ind_list <- limma::ids2indices(gs_keep, identifier=rownames(logcoutspm_alt))
cor_limma <- limma::duplicateCorrelation(logcoutspm_alt, design, block = indiv)

for(gs in 1:length(gs_keep)){
gs_test <- gs_keep[[gs]]
y_test <- logcoutspm_alt[gs_test,]
x_test <- design[,-which(colnames(design)=="time")]
w_test_perso <- t(w_perso[,as.numeric(gs_test)])
w_test_voom <- t(w_voom[,as.numeric(gs_test)])
phi_test <- cbind(design[, "time"])

## Fitting the null model
n <- ncol(y_test)
g <- length(gs_test)
y_T_vect <- as.vector(y_test)
indiv_vect <- rep(indiv, g)
g_vect <- as.factor(rep(1:g, n))
x_vect <-do.call(rbind, replicate(g, x_test, simplify=FALSE))
phi_vect <- do.call(rbind, replicate(g, phi_test, simplify=FALSE))

res_voom <- rbind(res_voom, cbind("asym" = vc_test_asym(y = y_test, x=x_test, indiv=indiv, phi=phi_test,
Sigma_xi = as.matrix(diag(ncol(phi_test))),
w = w_test_voom)[["pval"]],
"permut" = vc_test_perm(y = y_test, x=x_test, indiv=indiv, phi=phi_test,
Sigma_xi = as.matrix(diag(ncol(phi_test))),
w = w_test_voom)[["pval"]],
"camera" = limma::camera(y=logcoutspm_alt, index=gs_ind_list[[gs]],
design=design, contrast=3,
weights = t(w_voom))$PValue,
"roast" = limma::roast(y=logcoutspm_alt, index=gs_ind_list[[gs]],
design=design, contrast=3,
block = indiv, correlation = cor_limma$consensus.correlation,
weights = t(w_voom))$p.value["Mixed", "P.Value"])
)
res_perso <- rbind(res_perso, cbind("asym" = vc_test_asym(y = y_test, x=x_test, indiv=indiv, phi=phi_test,
Sigma_xi = as.matrix(diag(ncol(phi_test))),
w = w_test_perso)[["pval"]],
"permut" = vc_test_perm(y = y_test, x=x_test, indiv=indiv, phi=phi_test,
Sigma_xi = as.matrix(diag(ncol(phi_test))),
w = w_test_perso)[["pval"]],
"camera" = limma::camera(y=logcoutspm_alt, index=gs_ind_list[[gs]],
design=design, contrast=3,
weights = t(w_perso))$PValue,
"roast" = limma::roast(y=logcoutspm_alt, index=gs_ind_list[[gs]],
design=design, contrast=3,
block = indiv, correlation = cor_limma$consensus.correlation,
weights = t(w_perso))$p.value["Mixed", "P.Value"])
)
res_noweights <- rbind(res_noweights, cbind("asym" = vc_test_asym(y = y_test, x=x_test, indiv=indiv, phi=phi_test,
Sigma_xi = as.matrix(diag(ncol(phi_test))),
w = rep(1, length(gs_test)))[["pval"]],
"permut" = vc_test_perm(y = y_test, x=x_test, indiv=indiv, phi=phi_test,
Sigma_xi = as.matrix(diag(ncol(phi_test))),
w = rep(1, length(gs_test)))[["pval"]],
"camera" = limma::camera(y=logcoutspm_alt, index=gs_ind_list[[gs]],
design=design, contrast=3)$PValue,
"roast" = limma::roast(y=logcoutspm_alt, index=gs_ind_list[[gs]],
block = indiv, correlation = cor_limma$consensus.correlation,
design=design, contrast=3)$p.value["Mixed", "P.Value"])
)

}

return(list("res_voom"=res_voom, "res_perso"=res_perso, "res_noweights"=res_noweights))
}
116 changes: 116 additions & 0 deletions R/data_sim_voomlike.R
@@ -0,0 +1,116 @@
#'Data simulation function
#'
#'Adapted from the supplementary material from Law \emph{et al}.
#'
#'@references Law, C. W., Chen, Y., Shi, W., & Smyth, G. K. (2014). voom: Precision
#'weights unlock linear model analysis tools for RNA-seq read counts. \emph{Genome
#'Biology}, 15(2), R29.
#'
#'
#'@examples
#'\dontrun{
#'setseed(123)
#'data_sims <- data_sim_voomlike(maxGSsize=300)
#'}
#'@keywords internal
#'
#'@export
data_sim_voomlike <- function(seed=NULL, maxGSsize=400, minGSsize=30){


############################################################################
# Get distribution function of abundance proportions
# This distribution was generated from a real dataset
data(qAbundanceDist, envir = environment())
qAbundanceDist <- get(qAbundanceDist)

# Generate baseline proportions for desired number of genes -----
ngenes <- 10000
baselineprop <- qAbundanceDist( (1:ngenes)/(ngenes+1) )
baselineprop <- baselineprop/sum(baselineprop)

# Design ----
n <- 18
n1 <- n/2
n2 <- n1
nindiv <- 6
ngroup <- 2
ntime <- 3
group <- factor(rep(1:2, each=n/ngroup))
indiv <- factor(rep(1:nindiv, each=n/nindiv))
time <- rep(1:ntime, nindiv)
design <- model.matrix(~ group + time)
#design <- design[, 1, drop=FALSE]
nlibs <- n

# Library size ----
# Use equal or unequal library sizes
equal <- FALSE
if(equal){
expected.lib.size <- rep(11e6,nlibs)
} else {
expected.lib.size <- 20e6 * rep(c(1,0.1), n1)
}

# Set seed ----
if(!is.null(seed)){set.seed(seed*54321)}
u <- runif(100)

# Expected counts, group basis ----
i <- sample(1:ngenes,200)
i1 <- i[1:100]
i2 <- i[101:200]
fc <- 2
baselineprop1 <- baselineprop2 <- baselineprop
baselineprop1[i1] <- baselineprop1[i1]*fc
baselineprop2[i2] <- baselineprop2[i2]*fc
mu0.1 <- matrix(baselineprop1,ngenes,1) %*% matrix(expected.lib.size[1:n1],1,n1)
mu0.2 <- matrix(baselineprop2,ngenes,1) %*% matrix(expected.lib.size[(n1+1):(n1+n2)],1,n2)
mu0 <- cbind(mu0.1,mu0.2)
status <- rep(0,ngenes)
status[i1] <- -1
status[i2] <- 1

# Biological variation ----
BCV0 <- 0.2+1/sqrt(mu0)

# Use inverse chi-square or log-normal dispersion
invChisq <- TRUE

if(invChisq){
df.BCV <- 40
BCV <- BCV0*sqrt(df.BCV/rchisq(ngenes,df=df.BCV))
} else {
BCV <- BCV0*exp( rnorm(ngenes,mean=0,sd=0.25)/2 )
}
if(NCOL(BCV)==1) BCV <- matrix(BCV, ngenes, nlibs)
shape <- 1/BCV^2
scale <- mu0/shape
mu <- matrix(rgamma(ngenes*nlibs,shape=shape,scale=scale), ngenes, nlibs)

# Technical variation
counts <- matrix(rpois(ngenes*nlibs,lambda=mu), ngenes, nlibs)

# Filter
keep <- rowSums(counts)>=10
nkeep <- sum(keep)
counts2 <- counts[keep,]

S_temp <- cor(t(counts2))
rownames(S_temp) <- as.character(1:nrow(S_temp))
colnames(S_temp) <- as.character(1:ncol(S_temp))
rownames(counts2) <- rownames(S_temp)
GS <- list()
for(i in 1:ncol(S_temp)){
GS[[i]] <-which(abs(S_temp[,1])>0.8)
#if(inherits(GS[[i]], "try-error")){browser()}
S_temp <- S_temp[-GS[[i]], -GS[[i]]]
if(is.null(dim(S_temp)) || nrow(S_temp)==0){
break()
}
}
gs_keep <- lapply(GS[sapply(GS, length)<maxGSsize & sapply(GS, length)>minGSsize], names)

return(list("counts"=counts2, "design"=design, "gs_keep"=gs_keep, "indiv"=indiv))
}

37 changes: 37 additions & 0 deletions R/qAbundanceDist.R
@@ -0,0 +1,37 @@
#' Gene abundance proportion distribution of RNA-seq data.
#'
#' An example of gene abundance proportion distribution function of RNA-seq data,
#' generated from a real dataset. See supplmenetary material of Law \emph{et al}.
#'
#'@name qAbundanceDist
#'@rdname qAbundanceDist
#'@aliases qAbundanceDist
#'
#'@usage data(qAbundanceDist)
#'
#'@references Law, C. W., Chen, Y., Shi, W., & Smyth, G. K. (2014). voom: Precision
#'weights unlock linear model analysis tools for RNA-seq read counts. \emph{Genome
#'Biology}, 15(2), R29.
#'
#' @format A function: \code{qAbundanceDist}.
#'
#' @examples
#' \dontrun{
#' # Get distribution function of abundance proportions
#' # This distribution was generated from a real dataset
#' #load(url("http://bioinf.wehi.edu.au/voom/qAbundanceDist.RData"))
#' data("qAbundanceDist")
#'
#' # Generate baseline proportions for desired number of genes
#' ngenes <- 10000
#' baselineprop <- qAbundanceDist( (1:ngenes)/(ngenes+1) )
#' baselineprop <- baselineprop/sum(baselineprop)
#' }
#'
#'
#' @source \url{http://bioinf.wehi.edu.au/voom/}
#'
#' @keywords datasets
#' @docType data
NULL

0 comments on commit 3442917

Please sign in to comment.