version 4.2

cran · Jun 15, 2013 · 0afb8c9 · 0afb8c9
1 parent e42582d
commit 0afb8c9
Show file tree

Hide file tree

Showing 16 changed files with 63 additions and 47 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,17 @@
 Package: rrBLUP
 Title: Ridge regression and other kernels for genomic selection
-Version: 4.1
+Version: 4.2
 Author: Jeffrey Endelman
-Maintainer: Jeffrey Endelman <j.endelman@gmail.com>
+Maintainer: Jeffrey Endelman <endelman@wisc.edu>
+Depends: R (>= 2.14)
+Suggests: parallel
 Description: Software for genomic prediction with the RR-BLUP mixed
         model. One application is to estimate marker effects by ridge
         regression; alternatively, BLUPs can be calculated based on an
         additive relationship matrix or a Gaussian kernel.
-Suggests: multicore
 License: GPL-3
 LazyLoad: yes
-Packaged: 2012-12-21 00:52:34 UTC; jeffendelman
+Packaged: 2013-06-15 18:49:09 UTC; jeffendelman
+NeedsCompilation: no
 Repository: CRAN
-Date/Publication: 2012-12-21 18:03:00
+Date/Publication: 2013-06-18 18:22:09
diff --git a/MD5 b/MD5
@@ -1,19 +1,21 @@
-440271e17dba48eed4dec7033b5f6dd7 *DESCRIPTION
+94fc0ac98bb704635b39e7fb4ad2296a *DESCRIPTION
 2db41218a311fcff4ac4a7e935824847 *NAMESPACE
-948cd609b59c2e956b18a647336e77c5 *NEWS
-9dd5f2f572cd8dd70f5bf40b72dba960 *R/A.mat.R
-a6ad4c1bdab9a16b5432d5ca43d454a9 *R/GWAS.R
-67b053df9f1a7548844b39a52042d69f *R/kin.blup.R
-19ca4dd8c3675164289dcd4d281718d1 *R/kinship.BLUP.R
+c3c9a4442dba309f5b46f2c55a8ca42b *NEWS
+d6b3136d8ba559a1bbd8543f8126a82b *R/A.mat.R
+208cc38bf26fc1428e39ffb4b545b2a9 *R/GWAS.R
+fdc99f8f2baadb6af59e3ddd5c75b80e *R/kin.blup.R
+14ce41254b4d8d2dbb5726e5568a5058 *R/kinship.BLUP.R
 30987d73531406ecaed9b86ae0299a6f *R/mixed.solve.R
 b74e4e9e2c643e550a83c5ee4eb7e905 *inst/CITATION
+289ffdafbe078405e751a31b52270f97 *inst/doc/GS_tutorial.R
 bbac1a12855ffe3bcaded37dc25c5990 *inst/doc/GS_tutorial.Rnw
-dae16ea09833e7d2f108fd21f6ba6c0c *inst/doc/GS_tutorial.pdf
+c98f6363ab1aad719128cdd2b7d710e3 *inst/doc/GS_tutorial.pdf
+d44e9d8551bd2ca6ecb3a37d0d6b7e04 *inst/doc/GWAS_tutorial.R
 100de153fff1f2a2735c29d099557c6e *inst/doc/GWAS_tutorial.Rnw
-fdb51983dbfac8241692068fede87cc0 *inst/doc/GWAS_tutorial.pdf
-cdd7beb99963bc3dbd134fda8f3e68cb *man/A.mat.Rd
-199ee3ab214d8191bd4ebd76fb795af7 *man/GWAS.Rd
-8daa97c0af3c1f20d0f8a00c9bab477c *man/kin.blup.Rd
-6679f528ebe5723f7bac70dcd81898ef *man/kinship.BLUP.Rd
+1626c03469f015bab30195da89fc45a3 *inst/doc/GWAS_tutorial.pdf
+67d2f31d86e1890252474ee480426638 *man/A.mat.Rd
+180b5e342a77ba0d6b8bd0dd5759643d *man/GWAS.Rd
+8d3fe7d8f7b4c240531c65db2c5dfad8 *man/kin.blup.Rd
+aeaff872c68712be50b3941de3873b50 *man/kinship.BLUP.Rd
 c2d59e14f73fae955f3cd17eecd6145e *man/mixed.solve.Rd
-6260ee867b33dec6702e238a989bbf31 *man/rrBLUP-package.Rd
+3808ce9d35394361e2096af3c05c25f6 *man/rrBLUP-package.Rd
diff --git a/NEWS b/NEWS
@@ -1,3 +1,7 @@
+Changes in 4.2:
+* shrink=FALSE is now the default in A.mat
+* Only return imputed markers in A.mat when impute=TRUE
+
 Changes in 4.1:
 * Replaced function GWA with GWAS for association mapping.  Improvements include (1) P3D is now optional, (2) multiple phenotypes handled, (3) Manhattan and qq plots are generated, (4) uses data frame for input.
 * Changed default to reduce=FALSE in kin.blup

diff --git a/R/A.mat.R b/R/A.mat.R
@@ -1,4 +1,4 @@
-A.mat <- function(X,min.MAF=NULL,max.missing=NULL,impute.method="mean",tol=0.02,n.core=1,shrink=NULL,return.imputed=FALSE){
+A.mat <- function(X,min.MAF=NULL,max.missing=NULL,impute.method="mean",tol=0.02,n.core=1,shrink=FALSE,return.imputed=FALSE){
 
 impute.EM <- function(W, cov.mat, mean.vec) {
 	n <- nrow(W)
@@ -35,6 +35,7 @@ cov.W.shrink <- function(W) {
 	print(paste("Shrinkage intensity:",round(delta,2)))
 	return(target*delta + (1-delta)*S)
 }
+
 X <- as.matrix(X)
 n <- nrow(X)
 frac.missing <- apply(X,2,function(x){length(which(is.na(x)))/n})
@@ -54,8 +55,6 @@ X[,mono] <- 2*tcrossprod(one,matrix(freq[mono],length(mono),1))-1
 freq.mat <- tcrossprod(one, matrix(freq[markers], m, 1))
 W <- X[, markers] + 1 - 2 *freq.mat 
 
-if (is.null(shrink)) {if (m < 5*n) {shrink <- TRUE} else {shrink <- FALSE}}
-
 if (!missing) {
     if (shrink) {
 		W.mean <- rowMeans(W)
@@ -65,6 +64,7 @@ if (!missing) {
 		A <- tcrossprod(W)/var.A/m	
 	}
 	rownames(A) <- rownames(X)
+	colnames(A) <- rownames(A)
 	if (return.imputed) {
 		return(list(A=A,imputed=X))		
 	} else {
@@ -74,7 +74,8 @@ if (!missing) {
     #impute
     isna <- which(is.na(W)) 
 	W[isna] <- 0
-	if ((impute.method=="em")|(impute.method=="EM")) {
+
+	if (toupper(impute.method)=="EM") {
 		if (m < n) {
 			print("Linear dependency among the lines: imputing with mean instead of EM algorithm.")
 		} else {
@@ -94,9 +95,9 @@ if (!missing) {
 				cov.mat.old <- cov.mat.new
 				mean.vec.old <- mean.vec.new
 				if (n.core > 1) {
-            		library(multicore)
+            		library(parallel)
 					it <- split(1:m,factor(cut(1:m,n.core,labels=FALSE)))
-					pieces <- mclapply(it,function(mark2){impute.EM(W[,mark2],cov.mat.old,mean.vec.old)})
+					pieces <- mclapply(it,function(mark2){impute.EM(W[,mark2],cov.mat.old,mean.vec.old)},mc.cores=n.core)
 				} else {
 					pieces <- list()
 					pieces[[1]] <- impute.EM(W,cov.mat.old,mean.vec.old)
@@ -115,9 +116,13 @@ if (!missing) {
 				print(err,digits=3)
 			}
 			rownames(A.new) <- rownames(X)
+			colnames(A.new) <- rownames(A.new)
+
 			if (return.imputed) {
-				X[,markers] <- W.imp - 1 + 2*freq.mat
-				return(list(A=A.new,imputed=X))
+				Ximp <- W.imp - 1 + 2*freq.mat
+				colnames(Ximp) <- colnames(X)[markers]
+				rownames(Ximp) <- rownames(X)
+				return(list(A=A.new,imputed=Ximp))
 			} else {
 				return(A.new)
 			}
@@ -134,10 +139,13 @@ if (!missing) {
 		A <- tcrossprod(W)/var.A/m	
 	}
 	rownames(A) <- rownames(X)
+	colnames(A) <- rownames(A)
 
 	if (return.imputed) {
-		X[,markers] <- W - 1 + 2*freq.mat
-		return(list(A=A,imputed=X))		
+		Ximp <- W - 1 + 2*freq.mat
+		colnames(Ximp) <- colnames(X)[markers]
+		rownames(Ximp) <- rownames(X)
+		return(list(A=A,imputed=Ximp))		
 	} else {
 		return(A)
 	}

diff --git a/R/GWAS.R b/R/GWAS.R
@@ -244,8 +244,8 @@ for (i in 1:n.phenos) {
 
 	if (n.core > 1) {
     	it <- split(1:m,factor(cut(1:m,n.core,labels=FALSE)))
-	    library(multicore)
-    	scores <- unlist(mclapply(it,function(markers){score.calc(M[ix.pheno,markers])}))
+	    library(parallel)
+    	scores <- unlist(mclapply(it,function(markers){score.calc(M[ix.pheno,markers])},mc.cores=n.core))
 	 } else {
     	scores <- score.calc(M[ix.pheno,])
 	 }

diff --git a/R/kin.blup.R b/R/kin.blup.R
@@ -119,9 +119,9 @@ kin.blup <- function(data,geno,pheno,GAUSS=FALSE,K=NULL,fixed=NULL,covariate=NUL
   		}
 
 		if (n.core > 1) {
-    		library(multicore)
+    		library(parallel)
     		it <- split(theta,factor(cut(theta,n.core,labels=FALSE)))
-	    	soln <- unlist(mclapply(it,ms.fun),recursive=FALSE)
+	    	soln <- unlist(mclapply(it,ms.fun,mc.cores=n.core),recursive=FALSE)
   		} else {
     		soln <- ms.fun(theta)
 	  	}      

diff --git a/R/kinship.BLUP.R b/R/kinship.BLUP.R
@@ -65,9 +65,9 @@ if (K.method == "RR") {
   }
 
   if (n.core > 1) {
-    library(multicore)
+    library(parallel)
     it <- split(theta,factor(cut(theta,n.core,labels=FALSE)))
-    soln <- unlist(mclapply(it,ms.fun),recursive=FALSE)
+    soln <- unlist(mclapply(it,ms.fun,mc.cores=n.core),recursive=FALSE)
   } else {
     soln <- ms.fun(theta)
   }      

diff --git a/inst/doc/GS_tutorial.R b/inst/doc/GS_tutorial.R
@@ -0,0 +1,2 @@
+### R code from vignette source 'GS_tutorial.Rnw'
+
diff --git a/inst/doc/GS_tutorial.pdf b/inst/doc/GS_tutorial.pdf
diff --git a/inst/doc/GWAS_tutorial.R b/inst/doc/GWAS_tutorial.R
@@ -0,0 +1,2 @@
+### R code from vignette source 'GWAS_tutorial.Rnw'
+
diff --git a/inst/doc/GWAS_tutorial.pdf b/inst/doc/GWAS_tutorial.pdf
diff --git a/man/A.mat.Rd b/man/A.mat.Rd
@@ -9,7 +9,7 @@ Calculates the realized additive relationship matrix.
 }
 \usage{
 A.mat(X,min.MAF=NULL,max.missing=NULL,impute.method="mean",tol=0.02,
-      n.core=1,shrink=NULL,return.imputed=FALSE)
+      n.core=1,shrink=FALSE,return.imputed=FALSE)
 }
 
 \arguments{
@@ -24,16 +24,16 @@ Minimum minor allele frequency. The A matrix is not sensitive to rare alleles, s
 Maximum proportion of missing data; default removes completely missing markers.
 }
 \item{impute.method}{
-There are two options.  The default is "mean", which imputes with the mean for each marker.  The "EM" option imputes with an EM algorithm (see details).
+There are two options. The default is "mean", which imputes with the mean for each marker.  The "EM" option imputes with an EM algorithm (see details).
 }
 \item{tol}{
 Specifies the convergence criterion for the EM algorithm (see details). 
 }
 \item{n.core}{
-Specifies the number of cores to use for parallel execution of the EM algorithm. 
+Specifies the number of cores to use for parallel execution of the EM algorithm (use only at UNIX command line).
 }
 \item{shrink}{
-Default behavior (shrink=NULL) is to use shrinkage estimation (see details) when the # of markers is less than 5 times the # of lines (m < 5n).  To use shrinkage when m > 5n, set shrink=TRUE.  To turn off shrinkage when m < 5n, set shrink=FALSE. 
+Set shrink=TRUE to use the shrinkage estimation procedure (see Details).  
 }
 \item{return.imputed}{
 When TRUE, the imputed marker matrix is returned.  
@@ -44,12 +44,10 @@ At high marker density, the relationship matrix is estimated as \eqn{A=W W'/c},
 
 The EM imputation algorithm is based on the multivariate normal distribution and was designed for use with GBS (genotyping-by-sequencing) markers, which tend to be high density but with lots of missing data.  Details are given in Poland et al. (2012). The EM algorithm stops at iteration \eqn{t} when the RMS error = \eqn{n^{-1} \|A_{t} - A_{t-1}\|_2} < tol.  
 
-At low marker density, shrinkage estimation can improve the estimate of the relationship matrix and the accuracy of GEBV for lines with low accuracy phenotypes (Endelman and Jannink 2012).  The shrinkage intensity ranges from 0 (no shrinkage, same estimator as high density formula) to 1 (completely shrunk to \eqn{(1+f)I}). The shrinkage intensity is chosen to minimize the expected mean-squared error and printed to the screen as output. 
+At low marker density (m < n), shrinkage estimation can improve the estimate of the relationship matrix and the accuracy of GEBVs for lines with low accuracy phenotypes (Endelman and Jannink 2012).  The shrinkage intensity ranges from 0 (no shrinkage, same estimator as high density formula) to 1 (completely shrunk to \eqn{(1+f)I}). The shrinkage intensity is chosen to minimize the expected mean-squared error and printed to the screen as output. 
 
 The shrinkage and EM options are designed for opposite scenarios (low vs. high density) and cannot be used simultaneously. 
 
-The multicore functionality only works for Mac, Linux, and UNIX users who have installed R package multicore. Do not use this option from the R GUI; you must execute from the command line.
-
 When the EM algorithm is used, the imputed alleles can lie outside the interval [-1,1].  Polymorphic markers that do not meet the min.MAF and max.missing criteria are not imputed.
 }
 \value{

diff --git a/man/GWAS.Rd b/man/GWAS.Rd
@@ -36,7 +36,7 @@ Number of principal components to include as fixed effects.  Default is 0 (equal
 Specifies the minimum minor allele frequency (MAF).  If a marker has a MAF less than min.MAF, it is assigned a zero score.  
 }
 \item{n.core}{
-For Mac, Linux, and UNIX users, setting n.core > 1 will enable parallel execution on a machine with multiple cores. R package multicore must be installed for this to work.   
+Setting n.core > 1 will enable parallel execution on a machine with multiple cores (use only at UNIX command line).  
 }
 \item{P3D}{
 When P3D=TRUE, variance components are estimated by REML only once, without any markers in the model.  When P3D=FALSE, variance components are estimated by REML for each marker separately. 

diff --git a/man/kin.blup.Rd b/man/kin.blup.Rd
@@ -41,7 +41,7 @@ An array of strings containing the names of columns that should be included as c
 When PEV=TRUE, the function returns the prediction error variance for the genotypic values (\eqn{PEV_i = Var[g^*_i-g_i]}). 
 }
 \item{n.core}{
-Specifies the number of cores to use for parallel execution of the Gaussian kernel method.  Requires R package multicore and cannot be used with the R GUI.
+Specifies the number of cores to use for parallel execution of the Gaussian kernel method (use only at UNIX command line).
 }
 \item{theta.seq}{
 The scale parameter for the Gaussian kernel is set by maximizing the restricted log-likelihood over a grid of values. By default, the grid is constructed by dividing the interval (0,max(K)] into 10 points.  Passing a numeric array to this variable (theta.seq = "theta sequence") will specify a different set of grid points (e.g., for large problems you might want fewer than 10).  

diff --git a/man/kinship.BLUP.Rd b/man/kinship.BLUP.Rd
@@ -43,7 +43,7 @@ For K.method = "GAUSS" or "EXP", the number of points to use in the log-likeliho
 Either "REML" (default) or "ML".
 }
 \item{n.core}{
-For Mac, Linux, and UNIX users, setting n.core > 1 will enable parallel execution on a machine with multiple cores. R package multicore must be installed for this to work.  Do not run multicore from within the R GUI; you must use the command line.   
+Setting n.core > 1 will enable parallel execution of the Gaussian kernel computation (use only at UNIX command line).   
 }
 }
 \value{

diff --git a/man/rrBLUP-package.Rd b/man/rrBLUP-package.Rd
@@ -4,9 +4,7 @@
 \title{Ridge regression and other kernels for genomic selection}
 \description{This package has been developed primarily for genomic prediction with mixed models (but it can also do genome-wide association mapping with \code{\link{GWAS}}). The heart of the package is the function \code{\link{mixed.solve}}, which is a general-purpose solver for mixed models with a single variance component other than the error.  Genomic predictions can be made by estimating marker effects (RR-BLUP) or by estimating line effects (G-BLUP). In Endelman (2011) I made the poor choice of using the letter G to denotype the genotype or marker data.  To be consistent with Endelman (2011) I have retained this notation in \code{\link{kinship.BLUP}}.  However, that function has now been superseded by \code{\link{kin.blup}} and \code{\link{A.mat}}, the latter being a utility for estimating the additive relationship matrix (A) from markers.  In these newer functions I adopt the usual convention that G is the genetic covariance (not the marker data), which is also consistent with the notation in Endelman and Jannink (2012).
 }
-\section{Parallel computing}{
-For Mac, Linux, and UNIX users, R package multicore can be used in conjunction with rrBLUP to take advantage of multiple processors on a single machine.  This is useful for Gaussian kernel predictions and when using the EM imputation algorithm in \code{\link{A.mat}}.  You need R >= 2.14.1 for this to work properly, and you must also use R from the command line (not the GUI).  
-}
+
 \references{
 Endelman, J.B. 2011. Ridge regression and other kernels for genomic selection with R package rrBLUP. Plant Genome 4:250-255. doi: 10.3835/plantgenome2011.08.0024