version 1.1.0

cran · Dec 13, 2008 · 2f39698 · 2f39698
1 parent ab04015
commit 2f39698
Show file tree

Hide file tree

Showing 14 changed files with 267 additions and 141 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,17 @@
 Package: st
-Version: 1.0.3
-Date: 2008-10-27
-Title: Shrinkage t Statistic
-Author: Rainer Opgen-Rhein and Korbinian Strimmer.
+Version: 1.1.0
+Date: 2008-12-13
+Title: Shrinkage t Statistic and Correlation-Adjusted t Score
+Author: Rainer Opgen-Rhein, Verena Zuber, and Korbinian Strimmer.
 Maintainer: Korbinian Strimmer <strimmer@uni-leipzig.de>
-Depends: R (>= 2.7.0), corpcor (>= 1.4.7), fdrtool (>= 1.2.5)
+Depends: R (>= 2.7.0), sda (>= 1.0.3), fdrtool (>= 1.2.5)
 Suggests: limma, samr
 Description: This package implements the "shrinkage t" statistic 
-   described in Opgen-Rhein and Strimmer (2007).  It also offers
-   a convenient interface to a number of other regularized t-type 
-   statistics often used in high-dimensional case-control studies.    
+   introduced in Opgen-Rhein and Strimmer (2007) and a shrinkage estimate
+   of the "correlation-adjusted t-score" (cat score) described in
+   Zuber and Strimmer (2008).  It also offers a convenient interface 
+   to a number of other regularized t-statistics commonly 
+   employed in high-dimensional case-control studies.    
 License: GPL (>= 3)
 URL: http://strimmerlab.org/software/st/
-Packaged: Mon Oct 27 00:07:25 2008; strimmer
+Packaged: Sat Dec 13 16:36:07 2008; strimmer
diff --git a/CHANGES → NEWS b/CHANGES → NEWS
@@ -1,6 +1,16 @@
 
-                   Release History of "st" Package
-               ========================================
+                 RELEASE HISTORY OF THE "st" PACKAGE
+               =======================================
+
+
+		CHANGES IN `st' PACKAGE VERSION 1.1.0
+
+- new functions shrinkcat.stat() und shrinkcat.fun() implementing
+  a shrinkage estimate of the "correlation-adjusted t-score" described
+  in Zuber and Strimmer (2008).
+- part of the code was rewritten to share code with the "sda" package 
+  (exploiting the link between discriminant analysis and gene ranking). 
+
 
 		CHANGES IN `st' PACKAGE VERSION 1.0.3
 
@@ -11,22 +21,19 @@
 
 		CHANGES IN `st' PACKAGE VERSION 1.0.2
 
-
 - some corrections and extensions have been made in the documentation,
   in particular, how to compute q-values and local FDR values
 - shrinkt() now returns a proper vector (one for which is.vector() is TRUE).
 - the license was changed from "GNU GPL 2 or later" to "GNU GPL 3 or later".
 
 
-
 		CHANGES IN `st' PACKAGE VERSION 1.0.1
 
 - reference to Opgen-Rhein and Strimmer (2007) updated.
 - updated email address
 - updated minimum R versions and version of corpcor package
 
 
-
 		CHANGES IN `st' PACKAGE VERSION 1.0.0
 
 

diff --git a/R/efront.R b/R/efront.R
@@ -1,4 +1,4 @@
-### efront.R  (2008-10-27)
+### efront.R  (2008-11-19)
 ###
 ###    Efron t Statistic (2001)
 ###
@@ -32,23 +32,20 @@ efront.stat = function (X, L, verbose=TRUE)
 
 efront.fun <- function (L, verbose=TRUE)
 {
-    if (missing(L)) stop("class labels are missing!")
-    L = factor(L)
-    cl = levels(L)
-    if (length(cl) != 2) stop("class labels must be specified for two groups, not more or less!")
-    idx1 = (L == cl[1])
-    idx2 = (L == cl[2])
+    if (missing(L)) stop("Class labels are missing!")
 
     function(X)
     {
-      tmp = pvt.group.moments(X, idx1, idx2, variances=TRUE)
+      tmp = centroids(X, L, var.pooled=TRUE, var.groups=FALSE, shrink=FALSE, verbose=verbose)
 
       # differences between the two groups
-      diff = tmp$mu1-tmp$mu2
+      diff = tmp$means[,1]-tmp$means[,2]
 
       # standard error of diff
-      n1 = sum(idx1); n2 = sum(idx2)      
-      sd = sqrt( (1/n1 + 1/n2)*tmp$v.pooled )
+      n1 = tmp$samples[1]
+      n2 = tmp$samples[2]
+      v =  tmp$var.pooled   
+      sd = sqrt( (1/n1 + 1/n2)*v )
 
       # tuning parameter
       a0 <- quantile(sd, probs=c(0.9))

diff --git a/R/modt.R b/R/modt.R
@@ -37,10 +37,10 @@ modt.fun <- function (L)
 {
     require("limma")
 
-    if (missing(L)) stop("class labels are missing!")
+    if (missing(L)) stop("Class labels are missing!")
     L = factor(L)
     cl = levels(L)
-    if (length(cl) != 2) stop("class labels must be specified for two groups, not more or less!")
+    if (length(cl) != 2) stop("Class labels must be specified for two groups, not more or less!")
 
     function(X)
     {

diff --git a/R/sam.R b/R/sam.R
@@ -36,10 +36,10 @@ sam.fun <- function(L)
 {
    require("samr")
 
-    if (missing(L)) stop("class labels are missing!")
+    if (missing(L)) stop("Class labels are missing!")
     L = factor(L)
     cl = levels(L)
-    if (length(cl) != 2) stop("class labels must be specified for two groups, not more or less!")
+    if (length(cl) != 2) stop("Class labels must be specified for two groups, not more or less!")
 
    function(X)
     {

diff --git a/R/samL1.R b/R/samL1.R
@@ -1,4 +1,4 @@
-### samL1.R  (2008-10-27)
+### samL1.R  (2008-11-19)
 ###
 ###    Wu (2005)Improved SAM Statistic
 ###
@@ -44,23 +44,19 @@ samL1.fun <- function (L, method=c("lowess", "cor"),
 {
     method = match.arg(method)
 
-    if (missing(L)) stop("class labels are missing!")
-    L = factor(L)
-    cl = levels(L)
-    if (length(cl) != 2) stop("class labels must be specified for two groups, not more or less!")
-    idx1 = (L == cl[1])
-    idx2 = (L == cl[2])
-
+    if (missing(L)) stop("Class labels are missing!")
+
     function(X)
     {
-      tmp = pvt.group.moments(X, idx1, idx2, variances=TRUE)
+      tmp = centroids(X, L, var.pooled=TRUE, var.groups=FALSE, shrink=FALSE, verbose=verbose)
 
       # differences between the two groups
-      diff = tmp$mu1-tmp$mu2
+      diff = tmp$means[,1]-tmp$means[,2]
 
       # variance of diff
-      n1 = sum(idx1); n2 = sum(idx2)      
-      v.diff = (1/n1 + 1/n2)*tmp$v.pooled
+      n1 = tmp$samples[1]
+      n2 = tmp$samples[2]
+      v.diff = (1/n1 + 1/n2)*tmp$var.pooled  
       sd = sqrt(v.diff)
 
       lambda = pvt.samL1.get.lambda(diff, sd, method=method, verbose=verbose, plot=plot)

diff --git a/R/shrinkcat.R b/R/shrinkcat.R
@@ -0,0 +1,73 @@
+### shrinkcat.R  (2008-12-01)
+###
+###    Shrinkage Estimation of Correlation-Adjusted t Statistic
+###
+### Copyright 2008 Verena Zuber and Korbinian Strimmer
+###
+###
+### This file is part of the `st' library for R and related languages.
+### It is made available under the terms of the GNU General Public
+### License, version 3, or at your option, any later version,
+### incorporated herein by reference.
+### 
+### This program is distributed in the hope that it will be
+### useful, but WITHOUT ANY WARRANTY; without even the implied
+### warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+### PURPOSE.  See the GNU General Public License for more
+### details.
+### 
+### You should have received a copy of the GNU General Public
+### License along with this program; if not, write to the Free
+### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+### MA 02111-1307, USA
+
+
+shrinkcat.stat = function (X, L, verbose=TRUE)
+{
+  FUN = shrinkcat.fun(L=L, verbose=verbose)
+  score = FUN(X)
+
+  return( score )
+}
+
+
+shrinkcat.fun = function (L, verbose=TRUE)
+{
+    if (missing(L)) stop("Class labels are missing!")
+
+    function(X)
+    {
+      p = ncol(X)
+      n = nrow(X)   
+
+      tmp = centroids(X, L, var.pooled=TRUE, var.groups=FALSE, 
+                         powcor.pooled=TRUE, alpha=-1/2, shrink=TRUE, verbose=verbose)
+      n1 = tmp$samples[1]
+      n2 = tmp$samples[2]
+
+      # differences between the two groups
+      diff = tmp$means[,1]-tmp$means[,2]
+
+      # standard error of diff
+      n1 = tmp$samples[1]
+      n2 = tmp$samples[2]
+      v =  tmp$var.pooled   
+      sd = sqrt( (1/n1 + 1/n2)*v )
+
+
+      # t statistic
+      t = diff/sd
+
+      # correlation-adjusted statistic
+      if (is.null(dim(tmp$powcor.pooled))) # if there is no correlation
+        cat = t
+      else
+        cat = crossprod(tmp$powcor.pooled, t) # decorrelate t
+
+      cat = as.vector(cat)
+      attr(cat, "lambda.var") = attr(tmp$var.pooled, "lambda.var")
+      attr(cat, "lambda") = attr(tmp$powcor.pooled, "lambda")
+
+      return(cat)
+    }
+}
diff --git a/R/shrinkt.R b/R/shrinkt.R
@@ -33,46 +33,39 @@ shrinkt.stat = function (X, L, var.equal=TRUE, verbose=TRUE)
 
 shrinkt.fun = function (L, var.equal=TRUE, verbose=TRUE)
 {
-    if (missing(L)) stop("class labels are missing!")
-    L = factor(L)
-    cl = levels(L)
-    if (length(cl) != 2) stop("class labels must be specified for two groups, not more or less!")
-    idx1 = (L == cl[1])
-    idx2 = (L == cl[2])
-
+    if (missing(L)) stop("Class labels are missing!")
+
     function(X)
     {
       p = ncol(X)
       n = nrow(X)   
-      n1 = sum(idx1)
-      n2 = sum(idx2)  
-
-      tmp = pvt.group.moments(X, idx1, idx2, variances=FALSE)
-
-      # differences between the two groups
-      diff = tmp$mu1-tmp$mu2
-
-      #adiff = abs(diff)
-      #cutoff = quantile(adiff, probs=c(0.5))
-      #diff[ (adiff < cutoff) ] = 0   # hard thresholding
-
+
       if (var.equal) # compute pooled variance
-      {	
-	# center data
-        xc1 = sweep(X[idx1,], 2, tmp$mu1)
-        xc2 = sweep(X[idx2,], 2, tmp$mu2)
-
-        v = as.vector( var.shrink(rbind(xc1, xc2), verbose=verbose)*(n-1)/(n-2) )
+      {
+        tmp = centroids(X, L, var.pooled=TRUE, var.groups=FALSE, shrink=TRUE, verbose=verbose)
+        n1 = tmp$samples[1]
+        n2 = tmp$samples[2]
 
-        # standard error of diff    
+        # differences between the two groups
+        diff = tmp$means[,1]-tmp$means[,2]
+
+        # standard error of diff
+        n1 = tmp$samples[1]
+        n2 = tmp$samples[2]
+        v =  tmp$var.pooled   
         sd = sqrt( (1/n1 + 1/n2)*v )
       }
       else # allow different variances in each class
       {
-        X1 = X[idx1,]
-        X2 = X[idx2,]
-        v1 = as.vector(var.shrink(X1, verbose=verbose))
-        v2 = as.vector(var.shrink(X2, verbose=verbose))
+        tmp = centroids(X, L, var.pooled=FALSE, var.groups=TRUE, shrink=TRUE, verbose=verbose)
+        n1 = tmp$samples[1]
+        n2 = tmp$samples[2]
+
+        # differences between the two groups
+        diff = tmp$means[,1]-tmp$means[,2]
+
+        v1 = as.vector(tmp$var.groups[,1])
+        v2 = as.vector(tmp$var.groups[,2])
 
         # standard error of diff 
         sd = sqrt( v1/n1 + v2/n2 )