version 1.7

cran · Sep 23, 2011 · 2c89a43 · 2c89a43
1 parent c5591f7
commit 2c89a43
Show file tree

Hide file tree

Showing 8 changed files with 61 additions and 61 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rrBLUP
 Title: Ridge regression-BLUP and related methods for genomic selection
-Version: 1.6
+Version: 1.7
 Author: Jeffrey Endelman
 Maintainer: Jeffrey Endelman <j.endelman@gmail.com>
 Description: One application is to estimate marker effects by ridge
@@ -10,6 +10,6 @@ Description: One application is to estimate marker effects by ridge
         models.
 License: GPL-3
 LazyLoad: yes
-Packaged: 2011-09-20 21:43:55 UTC; Jeffrey
+Packaged: 2011-09-23 13:26:27 UTC; Jeffrey
 Repository: CRAN
-Date/Publication: 2011-09-21 13:18:15
+Date/Publication: 2011-09-23 15:01:37
diff --git a/MD5 b/MD5
@@ -1,12 +1,12 @@
-d38a5c6de9cacbf2cc7c99053aec59d2 *DESCRIPTION
+9d51b1e41204f16325554d62ceac9591 *DESCRIPTION
 45ceeb22353b35f781a704b3515b18e7 *NAMESPACE
-2e8de839ef7da1aada45c7bff305ea92 *R/GWA.R
+01b1faae985de4c92f3ddd1f771a2fbb *R/GWA.R
 5792b7a01d77e442834c329800f1c1d6 *R/distance.R
 bc7bd2a82366d5afaced115c7affe55a *R/impute.R
-da1cf7c29db700b91130120a34bbfeb0 *R/kinship.BLUP.R
-c030bb3b53c888a5a19dca22b8b792e1 *R/mixed.solve.R
-9a7c2c1a70d89793d88f5ddc61d19b6b *man/GWA.Rd
+127e1fca4bf0315d36bdfd65cba5f5c0 *R/kinship.BLUP.R
+6ea68871acb02034b53908d3059f237c *R/mixed.solve.R
+be5fbfb20ec993da916e34e007aabbe0 *man/GWA.Rd
 bf294ef1e7f065e4f31b4750b5075689 *man/distance.Rd
 d9523fd7251239954e08678bceba04f1 *man/impute.Rd
-1b3c555abde4812db1b9e3d2f5ef4080 *man/kinship.BLUP.Rd
-72d1044467f8f477d2341ffec3526c66 *man/mixed.solve.Rd
+26b3a4ec5ec4c620af07cfd46c5444b9 *man/kinship.BLUP.Rd
+1f3fecaf5e035db508bcb53ef6478a79 *man/mixed.solve.Rd
diff --git a/R/GWA.R b/R/GWA.R
@@ -20,28 +20,27 @@ if (is.null(p)) {
   X <- matrix(X,length(X),1)
 }
 stopifnot(nrow(X)==n)
-
 m <- ncol(G)  # number of markers
 if (is.null(m)) {
   m <- 1
   G <- matrix(G,length(G),1)
 }
-t <- dim(G)[1]
+t <- nrow(G)
 
 if (is.null(Z)) {Z <- diag(n)}
 
-stopifnot(dim(Z)[1]==n)
-stopifnot(dim(Z)[2]==t)
+stopifnot(nrow(Z)==n)
+stopifnot(ncol(Z)==t)
 
 if (is.null(K)) {
-  K <- G %*% t(G)/m
+  K <- tcrossprod(G,G)/m
 }
 
 stopifnot(nrow(K)==ncol(K))
 stopifnot(nrow(K)==t)
 
 out <- mixed.solve(y,X=X,Z=Z,K=K)  
-H <- out$Ve/out$Vu*diag(n)+Z%*%K%*%t(Z)
+H <- out$Ve/out$Vu*diag(n)+tcrossprod(Z%*%K,Z)
 
 Hinv <- solve(H)
 df <- p + 1
@@ -74,7 +73,6 @@ for (i in 1:m) {
   pvalue <- 1 - pf(F,1,n-df)
 
   if (pvalue==0) {
-  # use normal approximation to the t-distribution
   u <- 1/(1+AS2*sqrt(F))
   logp <- (-F/2-log(2*3.14159)/2+log(as.double(crossprod(AS1,c(u,u^2,u^3,u^4,u^5)))))/log(10)
   scores[i] <- -logp

diff --git a/R/kinship.BLUP.R b/R/kinship.BLUP.R
@@ -37,12 +37,11 @@ if (!is.null(G.pred)) {
 }
 
 t <- n.pred + n.train #total number of lines
-
 Z <- cbind(Z.train,matrix(rep(0,n.obs*n.pred),n.obs,n.pred))
 G <- rbind(G.train,G.pred)
 
 if (K.method == "RR") {
-   K <- G %*% t(G)/m
+   K <- tcrossprod(G,G)/m
    soln <- mixed.solve(y=y,X=X,Z=Z,K=K,method=mixed.method)
    if (n.pred > 0) {
      list(g.train=soln$u[1:n.train],g.pred=soln$u[n.train+1:n.pred],beta=soln$beta)
@@ -79,7 +78,7 @@ if (K.method == "RR") {
       MarkerList <- ScoreIdx[1:NumMarkers[k]]
       Zaug <- cbind(Z.train[CV.train.obs,CV.train.lines],matrix(rep(0,CV.n.train*CV.n.test),CV.n.train,CV.n.test))
       Gaug <- rbind(G.train[CV.train.lines,MarkerList],G.train[CV.test.lines,MarkerList])
-      soln <- mixed.solve(y[CV.train.obs],X=X[CV.train.obs,],Z=Zaug,K=Gaug%*%t(Gaug)/m,method=mixed.method)
+      soln <- mixed.solve(y[CV.train.obs],X=X[CV.train.obs,],Z=Zaug,K=tcrossprod(Gaug,Gaug)/m,method=mixed.method)
       g.pred <- Z.train[CV.test.obs,CV.test.lines]%*%soln$u[CV.n.train+1:CV.n.test]
       r.gy[k,j] <- cor(g.pred,y[CV.test.obs]-X[CV.test.obs,]%*%soln$beta)
     } #for k
@@ -93,7 +92,7 @@ if (K.method == "RR") {
   ScoreIdx <- sort(Scores,decreasing=TRUE,index.return=TRUE)$ix
   MarkerList <- ScoreIdx[1:opt.n.mark]
 
-  soln <- mixed.solve(y=y,X=X,Z=Z,K=G[,MarkerList]%*%t(G[,MarkerList])/m,method=mixed.method)
+  soln <- mixed.solve(y=y,X=X,Z=Z,K=tcrossprod(G[,MarkerList],G[,MarkerList])/m,method=mixed.method)
 
   if (n.pred > 0) {
     list(g.train=soln$u[1:n.train],g.pred=soln$u[n.train+1:n.pred],beta=soln$beta,profile=cbind(NumMarkers,r.gy.mean))

diff --git a/R/mixed.solve.R b/R/mixed.solve.R
@@ -1,43 +1,64 @@
 mixed.solve <- function (y, Z, K, X = NULL, method = "REML", bounds = c(1e-09,1e+09), SE = FALSE) {
 pi <- 3.14159
 n <- length(y)
-y <- matrix(y, n, 1)
+y <- matrix(y,n,1)
 if (is.null(X)) {
 p <- 1
-X <- matrix(rep(1, n), n, 1)
+X <- matrix(rep(1,n),n,1)
 }
 p <- ncol(X)
 if (is.null(p)) {
 p <- 1
-X <- matrix(X, length(X), 1)
+X <- matrix(X,length(X),1)
 }
 m <- ncol(Z)
 if (is.null(m)) {
 m <- 1
-Z <- matrix(Z, length(Z), 1)
+Z <- matrix(Z,length(Z),1)
 }
 stopifnot(nrow(Z) == n)
 stopifnot(nrow(X) == n)
 stopifnot(nrow(K) == m)
 stopifnot(ncol(K) == m)
-Xt <- t(X)
-Zt <- t(Z)
 XtX <- crossprod(X, X)
 rank.X <- qr(XtX)$rank
 stopifnot(p == rank.X)
 XtXinv <- solve(XtX)
-S <- diag(n) - X %*% XtXinv %*% Xt
+S <- diag(n) - tcrossprod(X%*%XtXinv,X)
+if (n <= m) {
+  spectral.method <- "eigen"
+} else {
+  spectral.method <- "cholesky"
+  B <- try(chol(K),silent=TRUE)
+  if (class(B)=="try-error") {
+    stop("Error: K not positive definite.")
+  }
+}
+if (spectral.method=="cholesky") {
+ZBt <- tcrossprod(Z,B) 
+svd.ZBt <- svd(ZBt,nu=n)
+U <- svd.ZBt$u
+phi <- c(svd.ZBt$d^2,rep(0,n-m))
+SZBt <- S %*% ZBt
+svd.SZBt <- svd(SZBt)
+QR <- qr(cbind(X,svd.SZBt$u))
+Q <- qr.Q(QR,complete=TRUE)[,(p+1):n]
+R <- qr.R(QR)[(p+1):min(m+p,n),(p+1):(m+p)]
+theta <- c(forwardsolve(t(R^2),svd.SZBt$d^2),rep(0,max(0,n-p-m)))
+} else {
+# spectral.method is "eigen"
 offset <- sqrt(n)
-Hb <- Z %*% K %*% Zt + offset * diag(n)
+Hb <- tcrossprod(Z%*%K,Z) + offset*diag(n)
 Hb.system <- eigen(Hb, symmetric = TRUE)
 phi <- Hb.system$values - offset
 min.phi <- min(phi)
-if (min.phi < 0) {warning(paste("K not positive semi-definite; min(phi) =",min.phi))}
+if (min.phi < -1e-6) {stop("Error: K is not positive semi-definite.")}
 U <- Hb.system$vectors
 SHbS <- S %*% Hb %*% S
 SHbS.system <- eigen(SHbS, symmetric = TRUE)
 theta <- SHbS.system$values[1:(n - p)] - offset
 Q <- SHbS.system$vectors[, 1:(n - p)]
+}  #if (n > m)
 omega <- crossprod(Q, y)
 omega.sq <- omega^2
 if (method == "ML") {
@@ -58,35 +79,19 @@ df <- n - p
 Vu.opt <- sum(omega.sq/(theta + lambda.opt))/df
 Ve.opt <- lambda.opt * Vu.opt
 UtX <- crossprod(U,X)
-W <- 0*diag(p)
-if (p > 1) {
-for (i in 1:(p-1)) {
-for (j in (i+1):p) {
-W[i,j] <- sum(UtX[,i]*UtX[,j]/(phi+lambda.opt))
-W[j,i] <- W[i,j]
-}
-} #for i
-}  #if p > 1
-for (i in 1:p) {W[i,i] <- sum(UtX[,i]^2/(phi+lambda.opt))}
+W <- crossprod(UtX,UtX/(phi+lambda.opt))
 beta <- solve(W,crossprod(UtX,crossprod(U,y)/(phi+lambda.opt)))
-u <- K %*% Zt %*% U %*% (crossprod(U,(y - X %*% beta))/(phi+lambda.opt))	    
+C <- K %*% crossprod(Z,U)
+u <-  C %*% (crossprod(U,(y - X %*% beta))/(phi+lambda.opt))	    
 LL = -0.5 * (soln$objective + df + df * log(2 * pi/df))
 if (SE == FALSE) {
 list(Vu = Vu.opt, Ve = Ve.opt, beta = as.vector(beta), u = as.vector(u), LL = LL)
 } else {
 beta.var <- Vu.opt * solve(W)
 beta.SE <- sqrt(diag(beta.var))
-C <- K %*% Zt
-CU <- C %*% U
-WW <- rep(0,m)
-for (i in 1:m) {WW[i]<-sum(CU[i,]^2/(phi+lambda.opt))}
-WWW <- matrix(rep(0,m*p),m,p)
-for (i in 1:m) {
-for (j in 1:p) {
-WWW[i,j] <- sum(CU[i,]*UtX[,j]/(phi+lambda.opt))
-}
-}  #for i 
-u.SE <- sqrt(Vu.opt * (diag(K) - WW + diag(WWW %*% beta.var %*% t(WWW))))
+WW <- tcrossprod(C%*%D,C)
+WWW <- C%*%(UtX/(phi+lambda.opt))
+u.SE <- sqrt(Vu.opt * (diag(K) - diag(WW) + diag(tcrossprod(WWW%*%beta.var,WWW))))
 list(Vu = Vu.opt, Ve = Ve.opt, beta = as.vector(beta), beta.SE = as.vector(beta.SE), u = as.vector(u), u.SE = as.vector(u.SE), LL = LL)
 }
 }
diff --git a/man/GWA.Rd b/man/GWA.Rd
@@ -76,7 +76,7 @@ for (i in 1:200) {
 QTL <- 100*(1:5) #pick 5 QTL
 u <- rep(0,1000) #marker effects
 u[QTL] <- 1
-g <- crossprod(t(G),u)
+g <- as.vector(crossprod(t(G),u))
 h2 <- 0.5
 y <- g + rnorm(200,mean=0,sd=sqrt((1-h2)/h2*var(g)))
 

diff --git a/man/kinship.BLUP.Rd b/man/kinship.BLUP.Rd
@@ -85,7 +85,7 @@ for (i in 1:200) {
 }
 
 #random phenotypes
-g <- crossprod(t(G),rnorm(1000))
+g <- as.vector(crossprod(t(G),rnorm(1000)))
 h2 <- 0.5 
 y <- g + rnorm(200,mean=0,sd=sqrt((1-h2)/h2*var(g)))
 

diff --git a/man/mixed.solve.Rd b/man/mixed.solve.Rd
@@ -27,7 +27,7 @@ Vector (\eqn{n \times 1}) of observations.
 Design matrix (\eqn{n \times m}) for the random effects.
 }
   \item{K}{
-Covariance matrix (\eqn{m \times m}) for random effects; must be positive semi-definite.
+Covariance matrix (\eqn{m \times m}) for random effects; must be positive definite.
 }
   \item{X}{
 Design matrix (\eqn{n \times p}) for the fixed effects.  If not passed, a vector of 1's is used
@@ -45,10 +45,8 @@ If TRUE, standard errors are calculated.
 }
 \details{
 This function can be used to predict marker effects or breeding values (see examples).  The numerical method 
-uses the spectral decomposition of \eqn{Z K Z'} and \eqn{S Z K Z' S}, where \eqn{S = I - X (X' X)^{-1} X'} is 
-the projection operator for the nullspace of \eqn{X} (Kang et al., 2008). Since the computational complexity scales 
-with \eqn{n}, with replicated measurements it is faster to first calculate line means and then use these for \eqn{y}.  
-If the genotypes are unreplicated with respect to environmental factors, these can be modeled as fixed effects.
+is based on the spectral decomposition of \eqn{Z K Z'} and \eqn{S Z K Z' S}, where \eqn{S = I - X (X' X)^{-1} X'} is 
+the projection operator for the nullspace of \eqn{X} (Kang et al., 2008). 
 }
 \value{
 If SE=FALSE, the function returns a list containing
@@ -82,7 +80,7 @@ for (i in 1:200) {
 
 #random phenotypes
 u <- rnorm(1000)
-g <- crossprod(t(G),u)
+g <- as.vector(crossprod(t(G),u))
 h2 <- 0.5  #heritability
 y <- g + rnorm(200,mean=0,sd=sqrt((1-h2)/h2*var(g)))
 
@@ -91,7 +89,7 @@ ans <- mixed.solve(y,Z=G,K=diag(1000))
 accuracy <- cor(u,ans$u)
 
 #predict breeding values
-ans <- mixed.solve(y,Z=diag(200),K=crossprod(t(G),t(G)))
+ans <- mixed.solve(y,Z=diag(200),K=tcrossprod(G,G))
 accuracy <- cor(g,ans$u)
 
 }