diff --git a/DESCRIPTION b/DESCRIPTION index 656f569..01aa95d 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rrBLUP Title: Ridge regression-BLUP and related methods for genomic selection -Version: 1.6 +Version: 1.7 Author: Jeffrey Endelman Maintainer: Jeffrey Endelman Description: One application is to estimate marker effects by ridge @@ -10,6 +10,6 @@ Description: One application is to estimate marker effects by ridge models. License: GPL-3 LazyLoad: yes -Packaged: 2011-09-20 21:43:55 UTC; Jeffrey +Packaged: 2011-09-23 13:26:27 UTC; Jeffrey Repository: CRAN -Date/Publication: 2011-09-21 13:18:15 +Date/Publication: 2011-09-23 15:01:37 diff --git a/MD5 b/MD5 index e9ed812..11bcaed 100644 --- a/MD5 +++ b/MD5 @@ -1,12 +1,12 @@ -d38a5c6de9cacbf2cc7c99053aec59d2 *DESCRIPTION +9d51b1e41204f16325554d62ceac9591 *DESCRIPTION 45ceeb22353b35f781a704b3515b18e7 *NAMESPACE -2e8de839ef7da1aada45c7bff305ea92 *R/GWA.R +01b1faae985de4c92f3ddd1f771a2fbb *R/GWA.R 5792b7a01d77e442834c329800f1c1d6 *R/distance.R bc7bd2a82366d5afaced115c7affe55a *R/impute.R -da1cf7c29db700b91130120a34bbfeb0 *R/kinship.BLUP.R -c030bb3b53c888a5a19dca22b8b792e1 *R/mixed.solve.R -9a7c2c1a70d89793d88f5ddc61d19b6b *man/GWA.Rd +127e1fca4bf0315d36bdfd65cba5f5c0 *R/kinship.BLUP.R +6ea68871acb02034b53908d3059f237c *R/mixed.solve.R +be5fbfb20ec993da916e34e007aabbe0 *man/GWA.Rd bf294ef1e7f065e4f31b4750b5075689 *man/distance.Rd d9523fd7251239954e08678bceba04f1 *man/impute.Rd -1b3c555abde4812db1b9e3d2f5ef4080 *man/kinship.BLUP.Rd -72d1044467f8f477d2341ffec3526c66 *man/mixed.solve.Rd +26b3a4ec5ec4c620af07cfd46c5444b9 *man/kinship.BLUP.Rd +1f3fecaf5e035db508bcb53ef6478a79 *man/mixed.solve.Rd diff --git a/R/GWA.R b/R/GWA.R index c3076c2..6e41742 100755 --- a/R/GWA.R +++ b/R/GWA.R @@ -20,28 +20,27 @@ if (is.null(p)) { X <- matrix(X,length(X),1) } stopifnot(nrow(X)==n) - m <- ncol(G) # number of markers if (is.null(m)) { m <- 1 G <- matrix(G,length(G),1) } -t <- dim(G)[1] +t <- nrow(G) if (is.null(Z)) {Z <- diag(n)} -stopifnot(dim(Z)[1]==n) -stopifnot(dim(Z)[2]==t) +stopifnot(nrow(Z)==n) +stopifnot(ncol(Z)==t) if (is.null(K)) { - K <- G %*% t(G)/m + K <- tcrossprod(G,G)/m } stopifnot(nrow(K)==ncol(K)) stopifnot(nrow(K)==t) out <- mixed.solve(y,X=X,Z=Z,K=K) -H <- out$Ve/out$Vu*diag(n)+Z%*%K%*%t(Z) +H <- out$Ve/out$Vu*diag(n)+tcrossprod(Z%*%K,Z) Hinv <- solve(H) df <- p + 1 @@ -74,7 +73,6 @@ for (i in 1:m) { pvalue <- 1 - pf(F,1,n-df) if (pvalue==0) { - # use normal approximation to the t-distribution u <- 1/(1+AS2*sqrt(F)) logp <- (-F/2-log(2*3.14159)/2+log(as.double(crossprod(AS1,c(u,u^2,u^3,u^4,u^5)))))/log(10) scores[i] <- -logp diff --git a/R/kinship.BLUP.R b/R/kinship.BLUP.R index cd4499f..0994a69 100755 --- a/R/kinship.BLUP.R +++ b/R/kinship.BLUP.R @@ -37,12 +37,11 @@ if (!is.null(G.pred)) { } t <- n.pred + n.train #total number of lines - Z <- cbind(Z.train,matrix(rep(0,n.obs*n.pred),n.obs,n.pred)) G <- rbind(G.train,G.pred) if (K.method == "RR") { - K <- G %*% t(G)/m + K <- tcrossprod(G,G)/m soln <- mixed.solve(y=y,X=X,Z=Z,K=K,method=mixed.method) if (n.pred > 0) { list(g.train=soln$u[1:n.train],g.pred=soln$u[n.train+1:n.pred],beta=soln$beta) @@ -79,7 +78,7 @@ if (K.method == "RR") { MarkerList <- ScoreIdx[1:NumMarkers[k]] Zaug <- cbind(Z.train[CV.train.obs,CV.train.lines],matrix(rep(0,CV.n.train*CV.n.test),CV.n.train,CV.n.test)) Gaug <- rbind(G.train[CV.train.lines,MarkerList],G.train[CV.test.lines,MarkerList]) - soln <- mixed.solve(y[CV.train.obs],X=X[CV.train.obs,],Z=Zaug,K=Gaug%*%t(Gaug)/m,method=mixed.method) + soln <- mixed.solve(y[CV.train.obs],X=X[CV.train.obs,],Z=Zaug,K=tcrossprod(Gaug,Gaug)/m,method=mixed.method) g.pred <- Z.train[CV.test.obs,CV.test.lines]%*%soln$u[CV.n.train+1:CV.n.test] r.gy[k,j] <- cor(g.pred,y[CV.test.obs]-X[CV.test.obs,]%*%soln$beta) } #for k @@ -93,7 +92,7 @@ if (K.method == "RR") { ScoreIdx <- sort(Scores,decreasing=TRUE,index.return=TRUE)$ix MarkerList <- ScoreIdx[1:opt.n.mark] - soln <- mixed.solve(y=y,X=X,Z=Z,K=G[,MarkerList]%*%t(G[,MarkerList])/m,method=mixed.method) + soln <- mixed.solve(y=y,X=X,Z=Z,K=tcrossprod(G[,MarkerList],G[,MarkerList])/m,method=mixed.method) if (n.pred > 0) { list(g.train=soln$u[1:n.train],g.pred=soln$u[n.train+1:n.pred],beta=soln$beta,profile=cbind(NumMarkers,r.gy.mean)) diff --git a/R/mixed.solve.R b/R/mixed.solve.R index 7eddad7..e9ca6b1 100755 --- a/R/mixed.solve.R +++ b/R/mixed.solve.R @@ -1,43 +1,64 @@ mixed.solve <- function (y, Z, K, X = NULL, method = "REML", bounds = c(1e-09,1e+09), SE = FALSE) { pi <- 3.14159 n <- length(y) -y <- matrix(y, n, 1) +y <- matrix(y,n,1) if (is.null(X)) { p <- 1 -X <- matrix(rep(1, n), n, 1) +X <- matrix(rep(1,n),n,1) } p <- ncol(X) if (is.null(p)) { p <- 1 -X <- matrix(X, length(X), 1) +X <- matrix(X,length(X),1) } m <- ncol(Z) if (is.null(m)) { m <- 1 -Z <- matrix(Z, length(Z), 1) +Z <- matrix(Z,length(Z),1) } stopifnot(nrow(Z) == n) stopifnot(nrow(X) == n) stopifnot(nrow(K) == m) stopifnot(ncol(K) == m) -Xt <- t(X) -Zt <- t(Z) XtX <- crossprod(X, X) rank.X <- qr(XtX)$rank stopifnot(p == rank.X) XtXinv <- solve(XtX) -S <- diag(n) - X %*% XtXinv %*% Xt +S <- diag(n) - tcrossprod(X%*%XtXinv,X) +if (n <= m) { + spectral.method <- "eigen" +} else { + spectral.method <- "cholesky" + B <- try(chol(K),silent=TRUE) + if (class(B)=="try-error") { + stop("Error: K not positive definite.") + } +} +if (spectral.method=="cholesky") { +ZBt <- tcrossprod(Z,B) +svd.ZBt <- svd(ZBt,nu=n) +U <- svd.ZBt$u +phi <- c(svd.ZBt$d^2,rep(0,n-m)) +SZBt <- S %*% ZBt +svd.SZBt <- svd(SZBt) +QR <- qr(cbind(X,svd.SZBt$u)) +Q <- qr.Q(QR,complete=TRUE)[,(p+1):n] +R <- qr.R(QR)[(p+1):min(m+p,n),(p+1):(m+p)] +theta <- c(forwardsolve(t(R^2),svd.SZBt$d^2),rep(0,max(0,n-p-m))) +} else { +# spectral.method is "eigen" offset <- sqrt(n) -Hb <- Z %*% K %*% Zt + offset * diag(n) +Hb <- tcrossprod(Z%*%K,Z) + offset*diag(n) Hb.system <- eigen(Hb, symmetric = TRUE) phi <- Hb.system$values - offset min.phi <- min(phi) -if (min.phi < 0) {warning(paste("K not positive semi-definite; min(phi) =",min.phi))} +if (min.phi < -1e-6) {stop("Error: K is not positive semi-definite.")} U <- Hb.system$vectors SHbS <- S %*% Hb %*% S SHbS.system <- eigen(SHbS, symmetric = TRUE) theta <- SHbS.system$values[1:(n - p)] - offset Q <- SHbS.system$vectors[, 1:(n - p)] +} #if (n > m) omega <- crossprod(Q, y) omega.sq <- omega^2 if (method == "ML") { @@ -58,35 +79,19 @@ df <- n - p Vu.opt <- sum(omega.sq/(theta + lambda.opt))/df Ve.opt <- lambda.opt * Vu.opt UtX <- crossprod(U,X) -W <- 0*diag(p) -if (p > 1) { -for (i in 1:(p-1)) { -for (j in (i+1):p) { -W[i,j] <- sum(UtX[,i]*UtX[,j]/(phi+lambda.opt)) -W[j,i] <- W[i,j] -} -} #for i -} #if p > 1 -for (i in 1:p) {W[i,i] <- sum(UtX[,i]^2/(phi+lambda.opt))} +W <- crossprod(UtX,UtX/(phi+lambda.opt)) beta <- solve(W,crossprod(UtX,crossprod(U,y)/(phi+lambda.opt))) -u <- K %*% Zt %*% U %*% (crossprod(U,(y - X %*% beta))/(phi+lambda.opt)) +C <- K %*% crossprod(Z,U) +u <- C %*% (crossprod(U,(y - X %*% beta))/(phi+lambda.opt)) LL = -0.5 * (soln$objective + df + df * log(2 * pi/df)) if (SE == FALSE) { list(Vu = Vu.opt, Ve = Ve.opt, beta = as.vector(beta), u = as.vector(u), LL = LL) } else { beta.var <- Vu.opt * solve(W) beta.SE <- sqrt(diag(beta.var)) -C <- K %*% Zt -CU <- C %*% U -WW <- rep(0,m) -for (i in 1:m) {WW[i]<-sum(CU[i,]^2/(phi+lambda.opt))} -WWW <- matrix(rep(0,m*p),m,p) -for (i in 1:m) { -for (j in 1:p) { -WWW[i,j] <- sum(CU[i,]*UtX[,j]/(phi+lambda.opt)) -} -} #for i -u.SE <- sqrt(Vu.opt * (diag(K) - WW + diag(WWW %*% beta.var %*% t(WWW)))) +WW <- tcrossprod(C%*%D,C) +WWW <- C%*%(UtX/(phi+lambda.opt)) +u.SE <- sqrt(Vu.opt * (diag(K) - diag(WW) + diag(tcrossprod(WWW%*%beta.var,WWW)))) list(Vu = Vu.opt, Ve = Ve.opt, beta = as.vector(beta), beta.SE = as.vector(beta.SE), u = as.vector(u), u.SE = as.vector(u.SE), LL = LL) } } \ No newline at end of file diff --git a/man/GWA.Rd b/man/GWA.Rd index fb86c2e..43a8a08 100755 --- a/man/GWA.Rd +++ b/man/GWA.Rd @@ -76,7 +76,7 @@ for (i in 1:200) { QTL <- 100*(1:5) #pick 5 QTL u <- rep(0,1000) #marker effects u[QTL] <- 1 -g <- crossprod(t(G),u) +g <- as.vector(crossprod(t(G),u)) h2 <- 0.5 y <- g + rnorm(200,mean=0,sd=sqrt((1-h2)/h2*var(g))) diff --git a/man/kinship.BLUP.Rd b/man/kinship.BLUP.Rd index ec18e6e..bccf737 100755 --- a/man/kinship.BLUP.Rd +++ b/man/kinship.BLUP.Rd @@ -85,7 +85,7 @@ for (i in 1:200) { } #random phenotypes -g <- crossprod(t(G),rnorm(1000)) +g <- as.vector(crossprod(t(G),rnorm(1000))) h2 <- 0.5 y <- g + rnorm(200,mean=0,sd=sqrt((1-h2)/h2*var(g))) diff --git a/man/mixed.solve.Rd b/man/mixed.solve.Rd index 06e0e6f..5b7ef8d 100755 --- a/man/mixed.solve.Rd +++ b/man/mixed.solve.Rd @@ -27,7 +27,7 @@ Vector (\eqn{n \times 1}) of observations. Design matrix (\eqn{n \times m}) for the random effects. } \item{K}{ -Covariance matrix (\eqn{m \times m}) for random effects; must be positive semi-definite. +Covariance matrix (\eqn{m \times m}) for random effects; must be positive definite. } \item{X}{ Design matrix (\eqn{n \times p}) for the fixed effects. If not passed, a vector of 1's is used @@ -45,10 +45,8 @@ If TRUE, standard errors are calculated. } \details{ This function can be used to predict marker effects or breeding values (see examples). The numerical method -uses the spectral decomposition of \eqn{Z K Z'} and \eqn{S Z K Z' S}, where \eqn{S = I - X (X' X)^{-1} X'} is -the projection operator for the nullspace of \eqn{X} (Kang et al., 2008). Since the computational complexity scales -with \eqn{n}, with replicated measurements it is faster to first calculate line means and then use these for \eqn{y}. -If the genotypes are unreplicated with respect to environmental factors, these can be modeled as fixed effects. +is based on the spectral decomposition of \eqn{Z K Z'} and \eqn{S Z K Z' S}, where \eqn{S = I - X (X' X)^{-1} X'} is +the projection operator for the nullspace of \eqn{X} (Kang et al., 2008). } \value{ If SE=FALSE, the function returns a list containing @@ -82,7 +80,7 @@ for (i in 1:200) { #random phenotypes u <- rnorm(1000) -g <- crossprod(t(G),u) +g <- as.vector(crossprod(t(G),u)) h2 <- 0.5 #heritability y <- g + rnorm(200,mean=0,sd=sqrt((1-h2)/h2*var(g))) @@ -91,7 +89,7 @@ ans <- mixed.solve(y,Z=G,K=diag(1000)) accuracy <- cor(u,ans$u) #predict breeding values -ans <- mixed.solve(y,Z=diag(200),K=crossprod(t(G),t(G))) +ans <- mixed.solve(y,Z=diag(200),K=tcrossprod(G,G)) accuracy <- cor(g,ans$u) }