version 4.4

cran · Oct 28, 2015 · 4497059 · 4497059
1 parent 0957439
commit 4497059
Show file tree

Hide file tree

Showing 10 changed files with 83 additions and 53 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,14 +1,14 @@
 Package: rrBLUP
-Title: Ridge regression and other kernels for genomic selection
-Version: 4.3
+Title: Ridge Regression and Other Kernels for Genomic Selection
+Version: 4.4
 Author: Jeffrey Endelman 
 Maintainer: Jeffrey Endelman <endelman@wisc.edu>
 Depends: R (>= 2.14)
 Suggests: parallel
 Description: Software for genomic prediction with the RR-BLUP mixed model. One application is to estimate marker effects by ridge regression; alternatively, BLUPs can be calculated based on an additive relationship matrix or a Gaussian kernel.
 License: GPL-3
 URL: http://potatobreeding.cals.wisc.edu/software
-Packaged: 2014-03-05 23:29:59 UTC; endelman
 NeedsCompilation: no
+Packaged: 2015-10-28 15:06:56 UTC; endelman
 Repository: CRAN
-Date/Publication: 2014-03-06 08:05:31
+Date/Publication: 2015-10-28 18:39:27
diff --git a/MD5 b/MD5
@@ -1,15 +1,15 @@
-41cd4dd2098e98459558ad6b204cfb04 *DESCRIPTION
-2db41218a311fcff4ac4a7e935824847 *NAMESPACE
-cbda85063d13017022f0a258b3ecbbf6 *NEWS
-d6b3136d8ba559a1bbd8543f8126a82b *R/A.mat.R
-208cc38bf26fc1428e39ffb4b545b2a9 *R/GWAS.R
-fdc99f8f2baadb6af59e3ddd5c75b80e *R/kin.blup.R
-14ce41254b4d8d2dbb5726e5568a5058 *R/kinship.BLUP.R
+a5defd66a48c047942a4bbff085c2422 *DESCRIPTION
+b0b53ae1d38c5b5ae5fcf4de70fc2ed0 *NAMESPACE
+6c94f1b7f2e660c15d3b07d50c439544 *NEWS
+8820918511720645c96d67b9dc3c3896 *R/A.mat.R
+946cff66e41193cac43598c65e498c18 *R/GWAS.R
+3ac7786ac6bef4539e1fa3b31e524292 *R/kin.blup.R
+8337be307fa8255206b4c505abb2a2e5 *R/kinship.BLUP.R
 30987d73531406ecaed9b86ae0299a6f *R/mixed.solve.R
 b74e4e9e2c643e550a83c5ee4eb7e905 *inst/CITATION
 67d2f31d86e1890252474ee480426638 *man/A.mat.Rd
-180b5e342a77ba0d6b8bd0dd5759643d *man/GWAS.Rd
-8d3fe7d8f7b4c240531c65db2c5dfad8 *man/kin.blup.Rd
+8873b1be71d10607ffd207d29d03415a *man/GWAS.Rd
+0c21053c4ce0e0e1f7e0f1b1b563c7af *man/kin.blup.Rd
 aeaff872c68712be50b3941de3873b50 *man/kinship.BLUP.Rd
 c2d59e14f73fae955f3cd17eecd6145e *man/mixed.solve.Rd
 bbc78a796618896c11a8627c2611dd22 *man/rrBLUP-package.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,3 +1,6 @@
+importFrom("grDevices", "dev.cur", "dev.new", "dev.next", "dev.set")
+importFrom("graphics", "axis", "lines", "par", "points", "title")
+importFrom("stats", "cov", "dist", "median", "model.matrix", "optimize", "pbeta", "ppoints", "predict", "smooth.spline")
 export(mixed.solve,GWAS,kinship.BLUP,A.mat,kin.blup)
 
 
diff --git a/NEWS b/NEWS
@@ -1,3 +1,8 @@
+Changes in 4.4:
+* Modified GWAS plot functionality to play nice with RStudio
+* kin.blup returns predicted values, averaged over the fixed effects
+* kin.blup allows specification of heterogeneous error variance
+
 Changes in 4.3:
 * Old-style vignettes are no longer allowed on CRAN, so they have been moved to http://potatobreeding.cals.wisc.edu/software.
 

diff --git a/R/A.mat.R b/R/A.mat.R
@@ -94,10 +94,9 @@ if (!missing) {
 				A.old <- A.new
 				cov.mat.old <- cov.mat.new
 				mean.vec.old <- mean.vec.new
-				if (n.core > 1) {
-            		library(parallel)
-					it <- split(1:m,factor(cut(1:m,n.core,labels=FALSE)))
-					pieces <- mclapply(it,function(mark2){impute.EM(W[,mark2],cov.mat.old,mean.vec.old)},mc.cores=n.core)
+				if ((n.core > 1) & requireNamespace("parallel",quietly=TRUE)) {
+ 					it <- split(1:m,factor(cut(1:m,n.core,labels=FALSE)))
+					pieces <- parallel::mclapply(it,function(mark2){impute.EM(W[,mark2],cov.mat.old,mean.vec.old)},mc.cores=n.core)
 				} else {
 					pieces <- list()
 					pieces[[1]] <- impute.EM(W,cov.mat.old,mean.vec.old)

diff --git a/R/GWAS.R b/R/GWAS.R
@@ -242,10 +242,9 @@ for (i in 1:n.phenos) {
 		print("Variance components estimated. Testing markers.")
 	}
 
-	if (n.core > 1) {
+if ((n.core > 1) & requireNamespace("parallel",quietly=TRUE)) {
     	it <- split(1:m,factor(cut(1:m,n.core,labels=FALSE)))
-	    library(parallel)
-    	scores <- unlist(mclapply(it,function(markers){score.calc(M[ix.pheno,markers])},mc.cores=n.core))
+    	scores <- unlist(parallel::mclapply(it,function(markers){score.calc(M[ix.pheno,markers])},mc.cores=n.core))
 	 } else {
     	scores <- score.calc(M[ix.pheno,])
 	 }
@@ -257,11 +256,13 @@ for (i in 1:n.phenos) {
 }
 
 if (plot) {
-	if (dev.cur()==dev.next()) {
-      dev.new()   
-    } else {
-      dev.set(dev.next())
-    }
+	if (length(grep("RStudio",names(dev.cur())))==0) { 
+		if (dev.cur()==dev.next()) {
+    	  dev.new()   
+	    } else {
+    	  dev.set(dev.next())
+	    }
+	}
 	par(mfrow=c(p1,p2))
 	for (j in 1:n.phenos) {
 		manhattan(cbind(map,all.scores[,j]))

diff --git a/R/kin.blup.R b/R/kin.blup.R
@@ -1,4 +1,4 @@
-kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NULL,PEV=FALSE,n.core=1,theta.seq=NULL,reduce=FALSE)  {
+kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NULL,PEV=FALSE,n.core=1,theta.seq=NULL,reduce=FALSE,R=NULL)  {
 
 	make.full <- function(X) {
 		svd.X <- svd(X)
@@ -11,15 +11,22 @@ kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NUL
 	if (is.na(ypos)) {
 		stop("Phenotype name does not appear in data.")
 	} else {
-		y <- data[,ypos]
+		y <- as.numeric(data[,ypos])
 	}
+
+	if (!is.null(R)&(length(R)!=length(y))) {
+		stop("Length of R does not equal length of y")
+	}
+
 	not.miss <- which(!is.na(y))
+	resid <- rep(NA,length(y))
 	if (length(not.miss)<length(y)) {
 		data <- data[not.miss,]
 		y <- y[not.miss]
+		if (!is.null(R)) {R <- R[not.miss]}
 	} 
 	n <- length(y)
-    
+
 	X <- matrix(1,n,1)		
 	if (!is.null(fixed)) {
 		p <- length(fixed)
@@ -50,11 +57,11 @@ kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NUL
 		Z[cbind(1:n,match(data[,gid.pos],gid))] <- 1
 		X2 <- make.full(X)
 		ans <- mixed.solve(y=y,X=X2,Z=Z,SE=PEV)
-		resid <- y-X2%*%ans$beta-Z%*%ans$u
+		resid[not.miss] <- y-X2%*%ans$beta-Z%*%ans$u
 		if (PEV) {
-			return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u,PEV=ans$u.SE^2,resid=resid))
+			return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u,PEV=ans$u.SE^2,resid=resid,pred=ans$u+as.numeric(colMeans(X2)%*%ans$beta)))
 		} else {
-			return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u,resid=resid))
+			return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u,resid=resid,pred=ans$u+as.numeric(colMeans(X2)%*%ans$beta)))
 		}
 	} else {
 
@@ -72,19 +79,30 @@ kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NUL
 		Z <- matrix(0,n,v)
 		Z[cbind(1:n,match(data[,gid.pos],not.miss.gid))] <- 1
 
+		if (!is.null(R)) {
+			sqrt.R <- sqrt(R)
+			X2 <- X/sqrt.R
+			y2 <- y/sqrt.R
+			Z2 <- Z/sqrt.R
+		} else {
+			X2 <- X
+			y2 <- y
+			Z2 <- Z
+		}
+
 		if ((n > v)&(reduce)) {
 			#transform
-			w <- sqrt(diag(crossprod(Z)))
-			X2 <- make.full(crossprod(Z,X)/w)
-			y2 <- crossprod(Z,y)/w
+			w <- sqrt(diag(crossprod(Z2)))
+			X2 <- make.full(crossprod(Z2,X2)/w)
+			y2 <- crossprod(Z2,y2)/w
 			Z2 <- cbind(diag(w),matrix(0,v,nrow(K)-v))
 			reduced <- TRUE
 		} else {
-			X2 <- make.full(X)
-			y2 <- y
-			Z2 <- cbind(Z,matrix(0,n,nrow(K)-v))
+			X2 <- make.full(X2)
+			Z2 <- cbind(Z2,matrix(0,n,nrow(K)-v))			
 			reduced <- FALSE
 		}
+
 		rm(X,Z,y)
 
 		if (!GAUSS) {
@@ -93,12 +111,12 @@ kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NUL
 			if (reduced) {
 				resid <- NULL
 			} else {			
-				resid <- y2-X2%*%ans$beta-Z2%*%ans$u
+				resid[not.miss] <- y2-X2%*%ans$beta-Z2%*%ans$u
 			}
 			if (PEV) {
-				return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u[ix],PEV=ans$u.SE[ix]^2,resid=resid))
+				return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u[ix],PEV=ans$u.SE[ix]^2,resid=resid,pred=ans$u[ix]+as.numeric(colMeans(X2)%*%ans$beta)))
 			} else {
-				return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u[ix],resid=resid))
+				return(list(Vg=ans$Vu,Ve=ans$Ve,g=ans$u[ix],resid=resid,pred=ans$u[ix]+as.numeric(colMeans(X2)%*%ans$beta)))
 			}
 
 		} else {
@@ -118,10 +136,9 @@ kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NUL
 	    	return(soln)
   		}
 
-		if (n.core > 1) {
-    		library(parallel)
+	if ((n.core > 1) & requireNamespace("parallel",quietly=TRUE)) {
     		it <- split(theta,factor(cut(theta,n.core,labels=FALSE)))
-	    	soln <- unlist(mclapply(it,ms.fun,mc.cores=n.core),recursive=FALSE)
+	    	soln <- unlist(parallel::mclapply(it,ms.fun,mc.cores=n.core),recursive=FALSE)
   		} else {
     		soln <- ms.fun(theta)
 	  	}      
@@ -135,13 +152,13 @@ kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NUL
 		if (reduced) {
 			resid <- NULL
 		} else {			
-			resid <- y2-X2%*%ans$beta-Z2%*%ans$u
+			resid[not.miss] <- y2-X2%*%ans$beta-Z2%*%ans$u
 		}
 
 		if (PEV) {
-			return(list(Vg=ans$Vu,Ve=ans$Ve,profile=profile,g=ans$u[ix],PEV=ans$u.SE[ix]^2,resid=resid))
+			return(list(Vg=ans$Vu,Ve=ans$Ve,profile=profile,g=ans$u[ix],PEV=ans$u.SE[ix]^2,resid=resid,pred=ans$u[ix]+as.numeric(colMeans(X2)%*%ans$beta)))
 		} else {
-			return(list(Vg=ans$Vu,Ve=ans$Ve,profile=profile,g=ans$u[ix],resid=resid))
+			return(list(Vg=ans$Vu,Ve=ans$Ve,profile=profile,g=ans$u[ix],resid=resid,pred=ans$u[ix]+as.numeric(colMeans(X2)%*%ans$beta)))
 		}	
 
 		} #else GAUSS

diff --git a/R/kinship.BLUP.R b/R/kinship.BLUP.R
@@ -64,10 +64,9 @@ if (K.method == "RR") {
     return(soln)
   }
 
-  if (n.core > 1) {
-    library(parallel)
+  if ((n.core > 1) & requireNamespace("parallel",quietly=TRUE)) {
     it <- split(theta,factor(cut(theta,n.core,labels=FALSE)))
-    soln <- unlist(mclapply(it,ms.fun,mc.cores=n.core),recursive=FALSE)
+    soln <- unlist(parallel::mclapply(it,ms.fun,mc.cores=n.core),recursive=FALSE)
   } else {
     soln <- ms.fun(theta)
   }      

diff --git a/man/GWAS.Rd b/man/GWAS.Rd
@@ -7,9 +7,11 @@ Genome-wide association analysis
 \description{
 Performs genome-wide association analysis based on the mixed model (Yu et al. 2006):
 
-\deqn{y = X \beta + Z g + \varepsilon}
+\deqn{y = X \beta + Z g + S \tau + \varepsilon}
 
-where \eqn{\beta} is a vector of fixed effects that can model both environmental factors and population structure.  The variable \eqn{g} models the genetic background of each line as a random effect with \eqn{Var[g] = K \sigma^2}.  The residual variance is \eqn{Var[\varepsilon] = I \sigma_e^2}.
+where \eqn{\beta} is a vector of fixed effects that can model both environmental factors and population structure.  
+The variable \eqn{g} models the genetic background of each line as a random effect with \eqn{Var[g] = K \sigma^2}.  
+The variable \eqn{\tau} models the additive SNP effect as a fixed effect.  The residual variance is \eqn{Var[\varepsilon] = I \sigma_e^2}.
 }
 \usage{
 GWAS(pheno, geno, fixed=NULL, K=NULL, n.PC=0,

diff --git a/man/kin.blup.Rd b/man/kin.blup.Rd
@@ -9,7 +9,7 @@ Genotypic value prediction by G-BLUP, where the genotypic covariance G can be ad
 }
 \usage{
 kin.blup(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NULL,
-         PEV=FALSE,n.core=1,theta.seq=NULL,reduce=FALSE)
+         PEV=FALSE,n.core=1,theta.seq=NULL,reduce=FALSE,R=NULL)
 }
 
 \arguments{
@@ -49,12 +49,15 @@ The scale parameter for the Gaussian kernel is set by maximizing the restricted
 \item{reduce}{
 To speed up one-step predictions when there are repeated observations, try reduce=TRUE to reduce the dimension of the problem to equal the number of lines (see details). 
 }
+\item{R}{
+Optional vector to allow for heterogeneous error variance: \eqn{Var[\varepsilon_i] = R_i \sigma^2_e}.  The length of R must equal the number of rows in the phenotype data frame. 
+}
 }
 \details{
 This function is a wrapper for \code{\link{mixed.solve}} and thus solves mixed models of the form:
 \deqn{y = X \beta + [Z \: 0] g + \varepsilon}
 where \eqn{\beta} is a vector of fixed effects, \eqn{g} is a vector of random genotypic values with covariance 
-\eqn{G = Var[g]}, and the residuals are i.i.d. (\eqn{Var[\varepsilon] = I \: V_e}).  The design matrix for the genetic values has been partitioned to illustrate that not all lines need phenotypes (i.e., for genomic selection).  Unlike \code{\link{mixed.solve}}, this function does not return estimates of the fixed effects, only the BLUP solution for the genotypic values.  It was designed to replace \code{\link{kinship.BLUP}} and to relieve the user of having to explicitly construct design matrices.  Variance components are estimated by REML and BLUP values are returned for every entry in K, regardless of whether it has been phenotyped.  The rownames of K must match the genotype labels in the data frame for phenotyped lines; missing phenotypes (NA) are simply omitted.  
+\eqn{G = Var[g]}, and the residuals follow \eqn{Var[\varepsilon_i] = R_i \sigma^2_e}, with \eqn{R_i = 1} by default.  The design matrix for the genetic values has been partitioned to illustrate that not all lines need phenotypes (i.e., for genomic selection).  Unlike \code{\link{mixed.solve}}, this function does not return estimates of the fixed effects, only the BLUP solution for the genotypic values.  It was designed to replace \code{\link{kinship.BLUP}} and to relieve the user of having to explicitly construct design matrices.  Variance components are estimated by REML and BLUP values are returned for every entry in K, regardless of whether it has been phenotyped.  The rownames of K must match the genotype labels in the data frame for phenotyped lines; missing phenotypes (NA) are simply omitted.  
 
 Unlike its predecessor, this function does not handle marker data directly.  For breeding value prediction, the user must supply a relationship matrix, which can be calculated from markers with \code{\link{A.mat}}.  For Gaussian kernel predictions, pass the Euclidean distance matrix for K, which can be calculated with \code{\link{dist}}.  
 
@@ -71,6 +74,7 @@ The function always returns
 \item{$Ve}{REML estimate of the error variance}
 \item{$g}{BLUP solution for the genetic values}
 \item{$resid}{residuals}
+\item{$pred}{predicted genetic values, averaged over the fixed effects}
 }
 If PEV = TRUE, the list also includes
 \describe{