diff --git a/DESCRIPTION b/DESCRIPTION index 8eaa26a..5ebb985 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rrBLUP -Title: Ridge regression and other kernels for genomic selection -Version: 4.3 +Title: Ridge Regression and Other Kernels for Genomic Selection +Version: 4.4 Author: Jeffrey Endelman Maintainer: Jeffrey Endelman Depends: R (>= 2.14) @@ -8,7 +8,7 @@ Suggests: parallel Description: Software for genomic prediction with the RR-BLUP mixed model. One application is to estimate marker effects by ridge regression; alternatively, BLUPs can be calculated based on an additive relationship matrix or a Gaussian kernel. License: GPL-3 URL: http://potatobreeding.cals.wisc.edu/software -Packaged: 2014-03-05 23:29:59 UTC; endelman NeedsCompilation: no +Packaged: 2015-10-28 15:06:56 UTC; endelman Repository: CRAN -Date/Publication: 2014-03-06 08:05:31 +Date/Publication: 2015-10-28 18:39:27 diff --git a/MD5 b/MD5 index 4ef94b6..86cebea 100644 --- a/MD5 +++ b/MD5 @@ -1,15 +1,15 @@ -41cd4dd2098e98459558ad6b204cfb04 *DESCRIPTION -2db41218a311fcff4ac4a7e935824847 *NAMESPACE -cbda85063d13017022f0a258b3ecbbf6 *NEWS -d6b3136d8ba559a1bbd8543f8126a82b *R/A.mat.R -208cc38bf26fc1428e39ffb4b545b2a9 *R/GWAS.R -fdc99f8f2baadb6af59e3ddd5c75b80e *R/kin.blup.R -14ce41254b4d8d2dbb5726e5568a5058 *R/kinship.BLUP.R +a5defd66a48c047942a4bbff085c2422 *DESCRIPTION +b0b53ae1d38c5b5ae5fcf4de70fc2ed0 *NAMESPACE +6c94f1b7f2e660c15d3b07d50c439544 *NEWS +8820918511720645c96d67b9dc3c3896 *R/A.mat.R +946cff66e41193cac43598c65e498c18 *R/GWAS.R +3ac7786ac6bef4539e1fa3b31e524292 *R/kin.blup.R +8337be307fa8255206b4c505abb2a2e5 *R/kinship.BLUP.R 30987d73531406ecaed9b86ae0299a6f *R/mixed.solve.R b74e4e9e2c643e550a83c5ee4eb7e905 *inst/CITATION 67d2f31d86e1890252474ee480426638 *man/A.mat.Rd -180b5e342a77ba0d6b8bd0dd5759643d *man/GWAS.Rd -8d3fe7d8f7b4c240531c65db2c5dfad8 *man/kin.blup.Rd +8873b1be71d10607ffd207d29d03415a *man/GWAS.Rd +0c21053c4ce0e0e1f7e0f1b1b563c7af *man/kin.blup.Rd aeaff872c68712be50b3941de3873b50 *man/kinship.BLUP.Rd c2d59e14f73fae955f3cd17eecd6145e *man/mixed.solve.Rd bbc78a796618896c11a8627c2611dd22 *man/rrBLUP-package.Rd diff --git a/NAMESPACE b/NAMESPACE index 6c8bd9b..bce9700 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,6 @@ +importFrom("grDevices", "dev.cur", "dev.new", "dev.next", "dev.set") +importFrom("graphics", "axis", "lines", "par", "points", "title") +importFrom("stats", "cov", "dist", "median", "model.matrix", "optimize", "pbeta", "ppoints", "predict", "smooth.spline") export(mixed.solve,GWAS,kinship.BLUP,A.mat,kin.blup) diff --git a/NEWS b/NEWS index 5fc8925..6b5cb21 100755 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +Changes in 4.4: +* Modified GWAS plot functionality to play nice with RStudio +* kin.blup returns predicted values, averaged over the fixed effects +* kin.blup allows specification of heterogeneous error variance + Changes in 4.3: * Old-style vignettes are no longer allowed on CRAN, so they have been moved to http://potatobreeding.cals.wisc.edu/software. diff --git a/R/A.mat.R b/R/A.mat.R index 8e9dd6c..641d6f4 100755 --- a/R/A.mat.R +++ b/R/A.mat.R @@ -94,10 +94,9 @@ if (!missing) { A.old <- A.new cov.mat.old <- cov.mat.new mean.vec.old <- mean.vec.new - if (n.core > 1) { - library(parallel) - it <- split(1:m,factor(cut(1:m,n.core,labels=FALSE))) - pieces <- mclapply(it,function(mark2){impute.EM(W[,mark2],cov.mat.old,mean.vec.old)},mc.cores=n.core) + if ((n.core > 1) & requireNamespace("parallel",quietly=TRUE)) { + it <- split(1:m,factor(cut(1:m,n.core,labels=FALSE))) + pieces <- parallel::mclapply(it,function(mark2){impute.EM(W[,mark2],cov.mat.old,mean.vec.old)},mc.cores=n.core) } else { pieces <- list() pieces[[1]] <- impute.EM(W,cov.mat.old,mean.vec.old) diff --git a/R/GWAS.R b/R/GWAS.R index 64c8584..7d2fa93 100755 --- a/R/GWAS.R +++ b/R/GWAS.R @@ -242,10 +242,9 @@ for (i in 1:n.phenos) { print("Variance components estimated. Testing markers.") } - if (n.core > 1) { +if ((n.core > 1) & requireNamespace("parallel",quietly=TRUE)) { it <- split(1:m,factor(cut(1:m,n.core,labels=FALSE))) - library(parallel) - scores <- unlist(mclapply(it,function(markers){score.calc(M[ix.pheno,markers])},mc.cores=n.core)) + scores <- unlist(parallel::mclapply(it,function(markers){score.calc(M[ix.pheno,markers])},mc.cores=n.core)) } else { scores <- score.calc(M[ix.pheno,]) } @@ -257,11 +256,13 @@ for (i in 1:n.phenos) { } if (plot) { - if (dev.cur()==dev.next()) { - dev.new() - } else { - dev.set(dev.next()) - } + if (length(grep("RStudio",names(dev.cur())))==0) { + if (dev.cur()==dev.next()) { + dev.new() + } else { + dev.set(dev.next()) + } + } par(mfrow=c(p1,p2)) for (j in 1:n.phenos) { manhattan(cbind(map,all.scores[,j])) diff --git a/R/kin.blup.R b/R/kin.blup.R old mode 100755 new mode 100644 index 67a2972..4c7b350 --- a/R/kin.blup.R +++ b/R/kin.blup.R @@ -1,4 +1,4 @@ -kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NULL,PEV=FALSE,n.core=1,theta.seq=NULL,reduce=FALSE) { +kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NULL,PEV=FALSE,n.core=1,theta.seq=NULL,reduce=FALSE,R=NULL) { make.full <- function(X) { svd.X <- svd(X) @@ -11,15 +11,22 @@ kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NUL if (is.na(ypos)) { stop("Phenotype name does not appear in data.") } else { - y <- data[,ypos] + y <- as.numeric(data[,ypos]) } + + if (!is.null(R)&(length(R)!=length(y))) { + stop("Length of R does not equal length of y") + } + not.miss <- which(!is.na(y)) + resid <- rep(NA,length(y)) if (length(not.miss) v)&(reduce)) { #transform - w <- sqrt(diag(crossprod(Z))) - X2 <- make.full(crossprod(Z,X)/w) - y2 <- crossprod(Z,y)/w + w <- sqrt(diag(crossprod(Z2))) + X2 <- make.full(crossprod(Z2,X2)/w) + y2 <- crossprod(Z2,y2)/w Z2 <- cbind(diag(w),matrix(0,v,nrow(K)-v)) reduced <- TRUE } else { - X2 <- make.full(X) - y2 <- y - Z2 <- cbind(Z,matrix(0,n,nrow(K)-v)) + X2 <- make.full(X2) + Z2 <- cbind(Z2,matrix(0,n,nrow(K)-v)) reduced <- FALSE } + rm(X,Z,y) if (!GAUSS) { @@ -93,12 +111,12 @@ kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NUL if (reduced) { resid <- NULL } else { - resid <- y2-X2%*%ans$beta-Z2%*%ans$u + resid[not.miss] <- y2-X2%*%ans$beta-Z2%*%ans$u } if (PEV) { - return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u[ix],PEV=ans$u.SE[ix]^2,resid=resid)) + return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u[ix],PEV=ans$u.SE[ix]^2,resid=resid,pred=ans$u[ix]+as.numeric(colMeans(X2)%*%ans$beta))) } else { - return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u[ix],resid=resid)) + return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u[ix],resid=resid,pred=ans$u[ix]+as.numeric(colMeans(X2)%*%ans$beta))) } } else { @@ -118,10 +136,9 @@ kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NUL return(soln) } - if (n.core > 1) { - library(parallel) + if ((n.core > 1) & requireNamespace("parallel",quietly=TRUE)) { it <- split(theta,factor(cut(theta,n.core,labels=FALSE))) - soln <- unlist(mclapply(it,ms.fun,mc.cores=n.core),recursive=FALSE) + soln <- unlist(parallel::mclapply(it,ms.fun,mc.cores=n.core),recursive=FALSE) } else { soln <- ms.fun(theta) } @@ -135,13 +152,13 @@ kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NUL if (reduced) { resid <- NULL } else { - resid <- y2-X2%*%ans$beta-Z2%*%ans$u + resid[not.miss] <- y2-X2%*%ans$beta-Z2%*%ans$u } if (PEV) { - return(list(Vg=ans$Vu,Ve=ans$Ve,profile=profile,g=ans$u[ix],PEV=ans$u.SE[ix]^2,resid=resid)) + return(list(Vg=ans$Vu,Ve=ans$Ve,profile=profile,g=ans$u[ix],PEV=ans$u.SE[ix]^2,resid=resid,pred=ans$u[ix]+as.numeric(colMeans(X2)%*%ans$beta))) } else { - return(list(Vg=ans$Vu,Ve=ans$Ve,profile=profile,g=ans$u[ix],resid=resid)) + return(list(Vg=ans$Vu,Ve=ans$Ve,profile=profile,g=ans$u[ix],resid=resid,pred=ans$u[ix]+as.numeric(colMeans(X2)%*%ans$beta))) } } #else GAUSS diff --git a/R/kinship.BLUP.R b/R/kinship.BLUP.R index e3cdecc..f79dae0 100755 --- a/R/kinship.BLUP.R +++ b/R/kinship.BLUP.R @@ -64,10 +64,9 @@ if (K.method == "RR") { return(soln) } - if (n.core > 1) { - library(parallel) + if ((n.core > 1) & requireNamespace("parallel",quietly=TRUE)) { it <- split(theta,factor(cut(theta,n.core,labels=FALSE))) - soln <- unlist(mclapply(it,ms.fun,mc.cores=n.core),recursive=FALSE) + soln <- unlist(parallel::mclapply(it,ms.fun,mc.cores=n.core),recursive=FALSE) } else { soln <- ms.fun(theta) } diff --git a/man/GWAS.Rd b/man/GWAS.Rd index dff7b4e..91e8878 100755 --- a/man/GWAS.Rd +++ b/man/GWAS.Rd @@ -7,9 +7,11 @@ Genome-wide association analysis \description{ Performs genome-wide association analysis based on the mixed model (Yu et al. 2006): -\deqn{y = X \beta + Z g + \varepsilon} +\deqn{y = X \beta + Z g + S \tau + \varepsilon} -where \eqn{\beta} is a vector of fixed effects that can model both environmental factors and population structure. The variable \eqn{g} models the genetic background of each line as a random effect with \eqn{Var[g] = K \sigma^2}. The residual variance is \eqn{Var[\varepsilon] = I \sigma_e^2}. +where \eqn{\beta} is a vector of fixed effects that can model both environmental factors and population structure. +The variable \eqn{g} models the genetic background of each line as a random effect with \eqn{Var[g] = K \sigma^2}. +The variable \eqn{\tau} models the additive SNP effect as a fixed effect. The residual variance is \eqn{Var[\varepsilon] = I \sigma_e^2}. } \usage{ GWAS(pheno, geno, fixed=NULL, K=NULL, n.PC=0, diff --git a/man/kin.blup.Rd b/man/kin.blup.Rd index 4c2527b..97c6a7a 100755 --- a/man/kin.blup.Rd +++ b/man/kin.blup.Rd @@ -9,7 +9,7 @@ Genotypic value prediction by G-BLUP, where the genotypic covariance G can be ad } \usage{ kin.blup(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NULL, - PEV=FALSE,n.core=1,theta.seq=NULL,reduce=FALSE) + PEV=FALSE,n.core=1,theta.seq=NULL,reduce=FALSE,R=NULL) } \arguments{ @@ -49,12 +49,15 @@ The scale parameter for the Gaussian kernel is set by maximizing the restricted \item{reduce}{ To speed up one-step predictions when there are repeated observations, try reduce=TRUE to reduce the dimension of the problem to equal the number of lines (see details). } +\item{R}{ +Optional vector to allow for heterogeneous error variance: \eqn{Var[\varepsilon_i] = R_i \sigma^2_e}. The length of R must equal the number of rows in the phenotype data frame. +} } \details{ This function is a wrapper for \code{\link{mixed.solve}} and thus solves mixed models of the form: \deqn{y = X \beta + [Z \: 0] g + \varepsilon} where \eqn{\beta} is a vector of fixed effects, \eqn{g} is a vector of random genotypic values with covariance -\eqn{G = Var[g]}, and the residuals are i.i.d. (\eqn{Var[\varepsilon] = I \: V_e}). The design matrix for the genetic values has been partitioned to illustrate that not all lines need phenotypes (i.e., for genomic selection). Unlike \code{\link{mixed.solve}}, this function does not return estimates of the fixed effects, only the BLUP solution for the genotypic values. It was designed to replace \code{\link{kinship.BLUP}} and to relieve the user of having to explicitly construct design matrices. Variance components are estimated by REML and BLUP values are returned for every entry in K, regardless of whether it has been phenotyped. The rownames of K must match the genotype labels in the data frame for phenotyped lines; missing phenotypes (NA) are simply omitted. +\eqn{G = Var[g]}, and the residuals follow \eqn{Var[\varepsilon_i] = R_i \sigma^2_e}, with \eqn{R_i = 1} by default. The design matrix for the genetic values has been partitioned to illustrate that not all lines need phenotypes (i.e., for genomic selection). Unlike \code{\link{mixed.solve}}, this function does not return estimates of the fixed effects, only the BLUP solution for the genotypic values. It was designed to replace \code{\link{kinship.BLUP}} and to relieve the user of having to explicitly construct design matrices. Variance components are estimated by REML and BLUP values are returned for every entry in K, regardless of whether it has been phenotyped. The rownames of K must match the genotype labels in the data frame for phenotyped lines; missing phenotypes (NA) are simply omitted. Unlike its predecessor, this function does not handle marker data directly. For breeding value prediction, the user must supply a relationship matrix, which can be calculated from markers with \code{\link{A.mat}}. For Gaussian kernel predictions, pass the Euclidean distance matrix for K, which can be calculated with \code{\link{dist}}. @@ -71,6 +74,7 @@ The function always returns \item{$Ve}{REML estimate of the error variance} \item{$g}{BLUP solution for the genetic values} \item{$resid}{residuals} +\item{$pred}{predicted genetic values, averaged over the fixed effects} } If PEV = TRUE, the list also includes \describe{