version 1.2

cran · Aug 17, 2011 · 705d7b4 · 705d7b4
1 parent f29d828
commit 705d7b4
Show file tree

Hide file tree

Showing 16 changed files with 414 additions and 343 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,17 @@
 Package: rrBLUP
-Title: Genomic selection and association analysis
-Version: 1.1
+Title: Ridge Regression-BLUP and related methods for genomic selection
+Version: 1.2
 Author: Jeffrey Endelman
 Maintainer: Jeffrey Endelman <j.endelman@gmail.com>
-Description: Contains tools for genomics research: a maximum-likelihood
-        algorithm for ridge regression-BLUP (RR.BLUP), a function for
-        genome-wide association analysis (GWA), and a genomic selection
-        algorithm that combines ridge regression and subset selection
-        (RR.BLUP.MR).
+Description: At the core of this package is the function mixed.solve,
+        which contains a fast ML/REML algorithm for mixed models with
+        one variance component (other than the residual error).  This
+        algorithm can be used for association analysis (function GWA)
+        and genomic prediction (function kinship.BLUP), for which
+        several kinship models are available (e.g., realized
+        relationship and Gaussian).
 License: GPL-3
 LazyLoad: yes
-Packaged: 2011-05-19 18:06:15 UTC; Jeffrey
+Packaged: 2011-08-17 15:52:24 UTC; Jeffrey
 Repository: CRAN
-Date/Publication: 2011-05-20 12:05:42
+Date/Publication: 2011-08-17 20:27:22
diff --git a/MD5 b/MD5
@@ -0,0 +1,8 @@
+eb256e62bd0337e6f2e53cea67a004e3 *DESCRIPTION
+68765541f0ed0a69fc4e38167978c876 *NAMESPACE
+abbfda1cf497e494357264a5e38e14f9 *R/GWA.R
+2fe153cb593874e9401b8309cd4f6875 *R/kinship.BLUP.R
+1ef41630295e87aea7c79c8e0c469079 *R/mixed.solve.R
+fb7f5f4673fb2409ea852ed925b211d1 *man/GWA.Rd
+f4a0a5df6600c33f8eb15ab851a3ed7b *man/kinship.BLUP.Rd
+17ec7f6c8c1252e88ba800a932f9e0ea *man/mixed.solve.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1 @@
+export(mixed.solve,GWA,kinship.BLUP)
diff --git a/R/GWA.R b/R/GWA.R
@@ -1,31 +1,34 @@
 GWA <-
-function(y,G,Z,X = NULL,K = NULL,min.MAF=0.01,check.rank="FALSE") {
+function(y,G,Z=NULL,X = NULL,K = NULL,min.MAF=0.05,check.rank=FALSE) {
 
 pi <- 3.14159
 AS1 <- c(0.31938,-0.35656,1.78148,-1.82126,1.33027)  #for p-value approximation
 AS2 <- 0.2316419
 
 n <- length(y)
+y <- matrix(y,n,1)
 if (is.null(X)) {
   p <- 1
   X <- matrix(rep(1,n),n,1)
 }
-p <- dim(X)[2]  
+p <- ncol(X)
 if (is.null(p)) {
   p <- 1
   X <- matrix(X,length(X),1)
 }
 rX <- qr(X)$rank
 stopifnot(rX==p)  #must be full rank design matrix
-stopifnot(dim(X)[1]==n)
+stopifnot(nrow(X)==n)
 
-m <- dim(G)[2]  # number of markers
+m <- ncol(G)  # number of markers
 if (is.null(m)) {
   m <- 1
   G <- matrix(G,length(G),1)
 }
 t <- dim(G)[1]
 
+if (is.null(Z)) {Z <- diag(n)}
+
 stopifnot(dim(Z)[1]==n)
 stopifnot(dim(Z)[2]==t)
 
@@ -35,14 +38,14 @@ if (is.null(K)) {
 stopifnot(nrow(K)==ncol(K))
 stopifnot(nrow(K)==t)
 
-out <- RR.BLUP(y,X=X,Z=Z,K=K)  
-H <- out$Ve/out$Vg*diag(n)+Z%*%K%*%t(Z)
+out <- mixed.solve(y,X=X,Z=Z,K=K)  
+H <- out$Ve/out$Vu*diag(n)+Z%*%K%*%t(Z)
 
 Hinv <- solve(H)
 df <- p + 1
 
 scores <- rep(0,m)
-freq <- colMeans((G+1)/2)
+freq <- colMeans(G+1)/2
 for (i in 1:m) {
   MAF <- min(freq[i],1-freq[i])
   if (MAF < min.MAF) {
@@ -51,7 +54,7 @@ for (i in 1:m) {
 
   Xsnp <- cbind(X,Z%*%G[,i])
 
-  if (check.rank=="TRUE") {
+  if (check.rank==TRUE) {
     rXsnp <- qr(Xsnp)$rank
   } else {
     rXsnp <- df

diff --git a/R/RR.BLUP.MR.R b/R/RR.BLUP.MR.R
diff --git a/R/kinship.BLUP.R b/R/kinship.BLUP.R
@@ -0,0 +1,145 @@
+kinship.BLUP <- function(y,G.train,G.pred=NULL,X=NULL,Z.train=NULL,K.method="RR",n.profile=10,mixed.method="REML") {
+#assumes genotypes coded as {-1,0,1}
+
+K.method <- toupper(K.method)
+
+n.obs <- length(y)
+y <- matrix(y,n.obs,1)
+
+if (is.null(X)) {
+  p <- 1
+  X <- matrix(rep(1,n.obs),n.obs,1)
+}
+p <- ncol(X)  
+if (is.null(p)) {
+  p <- 1
+  X <- matrix(X,length(X),1)
+}
+
+rX <- qr(X)$rank
+stopifnot(rX==p)  #must be full rank design matrix
+stopifnot(nrow(X)==n.obs)
+
+if (is.null(Z.train)) {
+  Z.train <- diag(n.obs)
+}
+
+m <- ncol(G.train)
+n.train <- nrow(G.train)
+
+stopifnot(ncol(Z.train)==n.train)
+stopifnot(nrow(Z.train)==n.obs)
+
+if (!is.null(G.pred)) {
+  stopifnot(ncol(G.pred)==m)
+  n.pred <- nrow(G.pred)
+} else {
+  n.pred <- 0
+}
+
+t <- n.pred + n.train #total number of lines
+
+Z <- cbind(Z.train,matrix(rep(0,n.obs*n.pred),n.obs,n.pred))
+G <- rbind(G.train,G.pred)
+
+if (K.method == "RR") {
+   freq <- colMeans(G+1)/2
+   K.RR <- G %*% t(G)/2/sum(freq*(1-freq))
+   soln <- mixed.solve(y=y,X=X,Z=Z,K=K.RR,method=mixed.method)
+   if (n.pred > 0) {
+     list(g.train=soln$u[1:n.train],g.pred=soln$u[n.train+1:n.pred],beta=soln$beta,Vg=soln$Vu,Ve=soln$Ve)
+   } else {
+     list(g.train=soln$u[1:n.train],beta=soln$beta,Vg=soln$Vu,Ve=soln$Ve)
+   }
+} else if (K.method == "MR") {
+
+  #Partition lines in training set for cross-validation
+  n.fold <- 5
+  sets <- rep(0,n.train)
+  ix <- sort(runif(n.train),index.return=TRUE)$ix
+  SetSize <- floor(n.train/n.fold)
+  for (k in 1:(n.fold-1)) {sets[ix[((k-1)*SetSize+1):(k*SetSize)]] <- k}
+  sets[ix[((n.fold-1)*SetSize+1):n.train]] <- n.fold
+
+  NumMarkers <- round(exp(seq(log(10),log(m),length.out=n.profile)))
+
+  r.yy <- matrix(rep(0,n.fold*n.profile),n.profile,n.fold)
+  for (j in 1:n.fold) {
+    CV.test.lines <- which(sets==j)
+    CV.train.lines <- setdiff(1:n.train,CV.test.lines)
+    CV.test.obs <- NULL
+    for (line in CV.test.lines) {
+       CV.test.obs <- union(CV.test.obs,which(Z.train[,line]==1))
+    }
+    CV.train.obs <- setdiff(1:n.obs,CV.test.obs)
+    CV.n.train <- length(CV.train.obs)
+    CV.n.test <- length(CV.test.obs)
+
+    Scores <- GWA(y=y[CV.train.obs],X=X[CV.train.obs,],Z=Z.train[CV.train.obs,CV.train.lines],G=G.train[CV.train.lines,])
+    ScoreIdx <- sort(Scores,decreasing=TRUE,index.return=TRUE)$ix
+
+    for (k in 1:n.profile) {
+      MarkerList <- ScoreIdx[1:NumMarkers[k]]
+      Zaug <- cbind(Z.train[CV.train.obs,CV.train.lines],matrix(rep(0,CV.n.train*CV.n.test),CV.n.train,CV.n.test))
+      Gaug <- rbind(G.train[CV.train.lines,MarkerList],G.train[CV.test.lines,MarkerList])
+      soln <- mixed.solve(y[CV.train.obs],X=X[CV.train.obs,],Z=Zaug,K=Gaug%*%t(Gaug),method=mixed.method)
+      g.pred <- Z.train[CV.test.obs,CV.test.lines]%*%soln$u[CV.n.train+1:CV.n.test]
+      r.yy[k,j] <- cor(g.pred,y[CV.test.obs])
+    } #for k
+  } #for j
+
+  #find CV maximum
+  r.yy.mean <- rowMeans(r.yy)
+  opt.n.mark <- NumMarkers[which.max(r.yy.mean)]
+
+  Scores <- GWA(y=y,X=X,Z=Z.train,G=G.train)
+  ScoreIdx <- sort(Scores,decreasing=TRUE,index.return=TRUE)$ix
+  MarkerList <- ScoreIdx[1:opt.n.mark]
+
+  freq <- colMeans(G[,MarkerList]+1)/2
+  K.RR <- G[,MarkerList] %*% t(G[,MarkerList])/2/sum(freq*(1-freq))
+  soln <- mixed.solve(y=y,X=X,Z=Z,K=K.RR,method=mixed.method)
+
+  if (n.pred > 0) {
+    list(g.train=soln$u[1:n.train],g.pred=soln$u[n.train+1:n.pred],beta=soln$beta,Vg=soln$Vu,Ve=soln$Ve,profile=cbind(NumMarkers,r.yy.mean))
+  } else {
+    list(g.train=soln$u[1:n.train],beta=soln$beta,Vg=soln$Vu,Ve=soln$Ve,profile=cbind(NumMarkers,r.yy.mean))
+  }
+
+} else {
+  # "exp" or "gauss"
+  theta <- setdiff(seq(0,2,length.out=n.profile+1),0)
+  LL <- rep(0,n.profile)
+  soln <- list()
+
+  D <- 0*diag(t)
+  for (i in 1:(t-1)) {
+   for (j in (i+1):t) {
+      D[i,j] <- sqrt(sum((G[i,]-G[j,])^2))
+   } #for j
+  }  #for i
+
+  D <- (D + t(D))/sqrt(m)
+  for (i in 1:n.profile) {
+    if (K.method == "EXP") {K <- exp(-D/theta[i])} 
+    if (K.method == "GAUSS") {K <- exp(-(D/theta[i])^2) }
+    soln[[i]] <- mixed.solve(y=y,X=X,Z=Z,K=K,method=mixed.method)
+    LL[i] <- soln[[i]]$LL
+  } #for i
+
+  #find maximum LL soln
+  max.LL <- which.max(LL)
+  g.train <- soln[[max.LL]]$u[1:n.train]
+  if (n.pred > 0) {
+    g.pred <- soln[[max.LL]]$u[n.train+1:n.pred]
+    list(profile=cbind(theta,LL),g.train=g.train,g.pred=g.pred,beta=soln[[max.LL]]$beta,Vg=soln[[max.LL]]$Vu,Ve=soln[[max.LL]]$Ve)
+  } else {
+    list(profile=cbind(theta,LL),g.train=g.train,beta=soln[[max.LL]]$beta,Vg=soln[[max.LL]]$Vu,Ve=soln[[max.LL]]$Ve)
+  }
+
+} #if K.method
+} #function 
+
+
+
+