Initial commit

denisagniel · Mar 31, 2016 · 3442917 · 3442917
commit 3442917
Show file tree

Hide file tree

Showing 29 changed files with 2,126 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,10 @@
+Package: tcgsaseq
+Type: Package
+Title: What the Package Does (Title Case)
+Version: 0.1
+Date: 2016-03-31
+Author: Who wrote it
+Maintainer: Who to complain to <yourfault@somewhere.net>
+Description: More about what it does (maybe more than one line)
+License: What license is it under?
+LazyData: TRUE
diff --git a/LICENSE b/LICENSE
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1 @@
+exportPattern("^[[:alpha:]]+")
diff --git a/R/.DS_Store b/R/.DS_Store
diff --git a/R/TcGSAseq-package.R b/R/TcGSAseq-package.R
@@ -0,0 +1,16 @@
+#' TcGSAseq: a package to perform Time-course Gene Set Analysis of RNA-seq data
+#' 
+#' Gene set analysis of longitudinal RNA-seq data with varaince component score 
+#' test acocunting for data heteroscedasticity through precision weights.
+#' 
+#' The main functions of the TcGSAseq package is \code{\link{tcgsa_seq}}
+#' 
+#' @docType package
+#' 
+#' @references Agniel D, Hejblum BP, Variance component score test for 
+#' time-course gene set analysis of longitudinal RNA-seq data, \emph{submitted}, 2016. 
+#' 
+#' @name TcGSAseq
+#' @importFrom GSA GSA.read.gmt
+#' 
+NULL
diff --git a/R/TcGSAseq_internal.R b/R/TcGSAseq_internal.R
@@ -0,0 +1,13 @@
+#'Power for matrix elements
+#'
+#'Set each element of matrix \code{x} to the power \code{n}
+#'
+#'@param x a matrix
+#'
+#'@param n a real number
+#'
+#'@keywords internal
+
+"%^%" <- function(x, n){
+  with(eigen(x), vectors %*% (values^n * t(vectors)))
+}
diff --git a/R/compute_sim_voomlike.R b/R/compute_sim_voomlike.R
@@ -0,0 +1,139 @@
+#' Computing simulations results
+#' 
+#'@examples
+#'
+#'\dontrun{
+#'data_sims <- data_sim_voomlike(maxGSsize=300, minGSsize=30)
+#'res <- compute_sim_voomlike(counts = data_sims$counts, 
+#'                            design = data_sims$design, 
+#'                            gs_keep = data_sims$gs_keep,
+#'                            indiv = data_sims$indiv,
+#'                            alternative=FALSE,
+#'                            RE_indiv_sd=0.1)
+#'res_all <- cbind(res$res_voom, res$res_perso, res$res_noweights)
+#'colnames(res_all) <- paste0(rep(c("asym", "perm", "camera"), 3), 
+#'                            rep(c("_voom", "_perso", "_noweights"), each=3))
+#'}
+#'
+#'@keywords internal
+#'
+#'@export
+compute_sim_voomlike <- function(counts, design, gs_keep, indiv, alternative=FALSE,
+                                 fixed_eff = 0.5, fixed_eff_sd = 0.2,
+                                 rand_eff_sd = 0.25, RE_indiv_sd=NULL, eps_sd=0.05){
+
+  logcoutspm <- apply(counts, MARGIN=2,function(v){log2((v+0.5)/(sum(v)+1)*10^6)})
+
+  if(alternative){
+    n <- ncol(counts)
+    nb_g <- nrow(counts)
+    beta_time <- rnorm(1, mean=fixed_eff, sd=fixed_eff_sd)
+    gamma_genes_time <- matrix(rnorm(nb_g, mean=0, sd=rand_eff_sd), ncol=1)
+    logcoutspm_alt <- logcoutspm + (gamma_genes_time+beta_time)%*%design[, "time"]
+
+    # for(gs in gs_keep){
+    #   nb_g_gs <- length(gs)
+    #   gamma_genes_time <- matrix(rnorm(nb_g_gs, mean = 0, sd = rand_eff_sd), ncol = 1)
+    #   logcoutspm_alt[gs,] <- logcoutspm[gs,] + (gamma_genes_time+beta_time)%*%design[, "time"]
+    # }
+
+    err <- matrix(rnorm(n*nb_g, mean = 0, sd = eps_sd), nrow = nb_g, ncol = n)
+    logcoutspm_alt <- logcoutspm_alt + err
+
+  }else{
+    logcoutspm_alt <- logcoutspm 
+  }
+
+  #Individual Random effects
+  if(!is.null(RE_indiv_sd)){
+    RE_indiv <- rnorm(n=length(unique(indiv)), mean=0, sd=RE_indiv_sd)
+    logcoutspm_alt <- logcoutspm_alt + RE_indiv[as.numeric(as.character(indiv))]
+  }
+
+
+  # Weights estimation ----
+  #########################
+
+  #png(width=5.5, height=4.5, units="in", file="voom_ex.png", res=300)
+  #w_voom <- voom_weights(x=design, y=t(counts2), doPlot = TRUE, preprocessed = FALSE)
+  #dev.off()
+  w_voom <- voom_weights(x=design, y=t(logcoutspm_alt), doPlot = FALSE, preprocessed = TRUE)
+
+  w_perso <- sp_weights(x=design[,-which(colnames(design)=="time")],
+                        y=t(logcoutspm_alt), 
+                        phi = cbind(design[, "time"]),
+                        doPlot = FALSE,
+                        exact=FALSE,
+                        preprocessed = TRUE
+  )
+
+  # Testing ----
+  ##############
+  res_voom <- NULL
+  res_perso <- NULL
+  res_noweights <- NULL
+  gs_ind_list <- limma::ids2indices(gs_keep, identifier=rownames(logcoutspm_alt))
+  cor_limma <- limma::duplicateCorrelation(logcoutspm_alt, design, block = indiv)
+
+  for(gs in 1:length(gs_keep)){
+    gs_test <- gs_keep[[gs]]
+    y_test <- logcoutspm_alt[gs_test,]
+    x_test <- design[,-which(colnames(design)=="time")]
+    w_test_perso <- t(w_perso[,as.numeric(gs_test)])
+    w_test_voom <- t(w_voom[,as.numeric(gs_test)])
+    phi_test <- cbind(design[, "time"])
+
+    ## Fitting the null model
+    n <- ncol(y_test)
+    g <- length(gs_test)
+    y_T_vect <- as.vector(y_test)
+    indiv_vect <- rep(indiv, g)
+    g_vect <- as.factor(rep(1:g, n))
+    x_vect <-do.call(rbind, replicate(g, x_test, simplify=FALSE))
+    phi_vect <- do.call(rbind, replicate(g, phi_test, simplify=FALSE))
+
+    res_voom <- rbind(res_voom, cbind("asym" = vc_test_asym(y = y_test, x=x_test, indiv=indiv, phi=phi_test, 
+                                                            Sigma_xi = as.matrix(diag(ncol(phi_test))),
+                                                            w = w_test_voom)[["pval"]],
+                                      "permut" = vc_test_perm(y = y_test, x=x_test, indiv=indiv, phi=phi_test, 
+                                                              Sigma_xi = as.matrix(diag(ncol(phi_test))),
+                                                              w = w_test_voom)[["pval"]],
+                                      "camera" = limma::camera(y=logcoutspm_alt, index=gs_ind_list[[gs]],
+                                                               design=design, contrast=3, 
+                                                               weights = t(w_voom))$PValue,
+                                      "roast" = limma::roast(y=logcoutspm_alt, index=gs_ind_list[[gs]],
+                                                             design=design, contrast=3, 
+                                                             block = indiv, correlation = cor_limma$consensus.correlation, 
+                                                             weights = t(w_voom))$p.value["Mixed", "P.Value"])
+    )
+    res_perso <- rbind(res_perso, cbind("asym" = vc_test_asym(y = y_test, x=x_test, indiv=indiv, phi=phi_test, 
+                                                              Sigma_xi = as.matrix(diag(ncol(phi_test))),
+                                                              w = w_test_perso)[["pval"]],
+                                        "permut" = vc_test_perm(y = y_test, x=x_test, indiv=indiv, phi=phi_test, 
+                                                                Sigma_xi = as.matrix(diag(ncol(phi_test))),
+                                                                w = w_test_perso)[["pval"]],
+                                        "camera" = limma::camera(y=logcoutspm_alt, index=gs_ind_list[[gs]],
+                                                                 design=design, contrast=3, 
+                                                                 weights = t(w_perso))$PValue,
+                                        "roast" = limma::roast(y=logcoutspm_alt, index=gs_ind_list[[gs]],
+                                                               design=design, contrast=3, 
+                                                               block = indiv, correlation = cor_limma$consensus.correlation,  
+                                                               weights = t(w_perso))$p.value["Mixed", "P.Value"])
+    )
+    res_noweights <- rbind(res_noweights, cbind("asym" = vc_test_asym(y = y_test, x=x_test, indiv=indiv, phi=phi_test, 
+                                                                      Sigma_xi = as.matrix(diag(ncol(phi_test))),
+                                                                      w = rep(1, length(gs_test)))[["pval"]],
+                                                "permut" = vc_test_perm(y = y_test, x=x_test, indiv=indiv, phi=phi_test, 
+                                                                        Sigma_xi = as.matrix(diag(ncol(phi_test))),
+                                                                        w = rep(1, length(gs_test)))[["pval"]],
+                                                "camera" = limma::camera(y=logcoutspm_alt, index=gs_ind_list[[gs]],
+                                                                         design=design, contrast=3)$PValue,
+                                                "roast" = limma::roast(y=logcoutspm_alt, index=gs_ind_list[[gs]], 
+                                                                       block = indiv, correlation = cor_limma$consensus.correlation, 
+                                                                       design=design, contrast=3)$p.value["Mixed", "P.Value"])
+    )
+
+  }
+
+  return(list("res_voom"=res_voom, "res_perso"=res_perso, "res_noweights"=res_noweights))
+}
diff --git a/R/data_sim_voomlike.R b/R/data_sim_voomlike.R
@@ -0,0 +1,116 @@
+#'Data simulation function 
+#'
+#'Adapted from the supplementary material from Law \emph{et al}.
+#'
+#'@references Law, C. W., Chen, Y., Shi, W., & Smyth, G. K. (2014). voom: Precision 
+#'weights unlock linear model analysis tools for RNA-seq read counts. \emph{Genome 
+#'Biology}, 15(2), R29.
+#'
+#'
+#'@examples
+#'\dontrun{
+#'setseed(123)
+#'data_sims <- data_sim_voomlike(maxGSsize=300)
+#'}
+#'@keywords internal
+#'
+#'@export
+data_sim_voomlike <- function(seed=NULL, maxGSsize=400, minGSsize=30){
+
+
+  ############################################################################
+  # Get distribution function of abundance proportions
+  # This distribution was generated from a real dataset
+  data(qAbundanceDist, envir = environment())
+  qAbundanceDist <- get(qAbundanceDist)
+
+  # Generate baseline proportions for desired number of genes -----
+  ngenes <- 10000
+  baselineprop <- qAbundanceDist( (1:ngenes)/(ngenes+1) )
+  baselineprop <- baselineprop/sum(baselineprop)
+
+  # Design ----
+  n <- 18
+  n1 <- n/2
+  n2 <- n1
+  nindiv <- 6
+  ngroup <- 2
+  ntime <- 3
+  group <- factor(rep(1:2, each=n/ngroup))
+  indiv <- factor(rep(1:nindiv, each=n/nindiv))
+  time <- rep(1:ntime, nindiv)
+  design <- model.matrix(~ group + time)
+  #design <- design[, 1, drop=FALSE]
+  nlibs <- n
+
+  # Library size ----
+  # Use equal or unequal library sizes
+  equal <- FALSE
+  if(equal){
+    expected.lib.size <- rep(11e6,nlibs)
+  } else {
+    expected.lib.size <- 20e6 * rep(c(1,0.1), n1)
+  }
+
+  # Set seed ----
+  if(!is.null(seed)){set.seed(seed*54321)}
+  u <- runif(100)
+
+  # Expected counts, group basis ----
+  i <- sample(1:ngenes,200)
+  i1 <- i[1:100]
+  i2 <- i[101:200]
+  fc <- 2
+  baselineprop1 <- baselineprop2 <- baselineprop
+  baselineprop1[i1] <- baselineprop1[i1]*fc
+  baselineprop2[i2] <- baselineprop2[i2]*fc
+  mu0.1 <- matrix(baselineprop1,ngenes,1) %*% matrix(expected.lib.size[1:n1],1,n1)
+  mu0.2 <- matrix(baselineprop2,ngenes,1) %*% matrix(expected.lib.size[(n1+1):(n1+n2)],1,n2)
+  mu0 <- cbind(mu0.1,mu0.2)
+  status <- rep(0,ngenes)
+  status[i1] <- -1
+  status[i2] <- 1
+
+  # Biological variation ----
+  BCV0 <- 0.2+1/sqrt(mu0)
+
+  # Use inverse chi-square or log-normal dispersion
+  invChisq <- TRUE
+
+  if(invChisq){
+    df.BCV <- 40
+    BCV <- BCV0*sqrt(df.BCV/rchisq(ngenes,df=df.BCV))
+  } else {
+    BCV <- BCV0*exp( rnorm(ngenes,mean=0,sd=0.25)/2 )
+  }
+  if(NCOL(BCV)==1) BCV <- matrix(BCV, ngenes, nlibs)
+  shape <- 1/BCV^2
+  scale <- mu0/shape
+  mu <- matrix(rgamma(ngenes*nlibs,shape=shape,scale=scale), ngenes, nlibs)
+
+  # Technical variation
+  counts <- matrix(rpois(ngenes*nlibs,lambda=mu), ngenes, nlibs)
+
+  # Filter
+  keep <- rowSums(counts)>=10
+  nkeep <- sum(keep)
+  counts2 <- counts[keep,]
+
+  S_temp <- cor(t(counts2))
+  rownames(S_temp) <- as.character(1:nrow(S_temp))
+  colnames(S_temp) <- as.character(1:ncol(S_temp))
+  rownames(counts2) <- rownames(S_temp)
+  GS <- list()
+  for(i in 1:ncol(S_temp)){
+    GS[[i]] <-which(abs(S_temp[,1])>0.8)
+    #if(inherits(GS[[i]], "try-error")){browser()}
+    S_temp <- S_temp[-GS[[i]], -GS[[i]]]
+    if(is.null(dim(S_temp)) || nrow(S_temp)==0){
+      break()
+    }
+  }
+  gs_keep <- lapply(GS[sapply(GS, length)<maxGSsize & sapply(GS, length)>minGSsize], names)
+
+  return(list("counts"=counts2, "design"=design, "gs_keep"=gs_keep, "indiv"=indiv))
+}
+
diff --git a/R/qAbundanceDist.R b/R/qAbundanceDist.R
@@ -0,0 +1,37 @@
+#' Gene abundance proportion distribution of RNA-seq data.
+#'
+#' An example of gene abundance proportion distribution function of RNA-seq data,
+#' generated from a real dataset. See supplmenetary material of Law \emph{et al}.
+#' 
+#'@name qAbundanceDist
+#'@rdname qAbundanceDist
+#'@aliases qAbundanceDist
+#'
+#'@usage data(qAbundanceDist)
+#'
+#'@references Law, C. W., Chen, Y., Shi, W., & Smyth, G. K. (2014). voom: Precision 
+#'weights unlock linear model analysis tools for RNA-seq read counts. \emph{Genome 
+#'Biology}, 15(2), R29.
+#'
+#' @format A function: \code{qAbundanceDist}.
+#' 
+#' @examples
+#' \dontrun{
+#' # Get distribution function of abundance proportions
+#' # This distribution was generated from a real dataset
+#' #load(url("http://bioinf.wehi.edu.au/voom/qAbundanceDist.RData"))
+#' data("qAbundanceDist")
+#' 
+#' # Generate baseline proportions for desired number of genes
+#' ngenes <- 10000
+#' baselineprop <- qAbundanceDist( (1:ngenes)/(ngenes+1) )
+#' baselineprop <- baselineprop/sum(baselineprop)
+#' }
+#' 
+#' 
+#' @source \url{http://bioinf.wehi.edu.au/voom/}
+#' 
+#' @keywords datasets
+#' @docType data
+NULL
+