Skip to content

Commit

Permalink
version 2.7
Browse files Browse the repository at this point in the history
  • Loading branch information
jbe907 authored and gaborcsardi committed Dec 1, 2011
1 parent fa3c345 commit e100c93
Show file tree
Hide file tree
Showing 11 changed files with 114 additions and 58 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
@@ -1,11 +1,12 @@
Package: rrBLUP
Title: Ridge regression and other kernels for genomic selection
Version: 2.5
Version: 2.7
Author: Jeffrey Endelman
Maintainer: Jeffrey Endelman <j.endelman@gmail.com>
Description: One application is to estimate marker effects by ridge regression; alternatively, BLUPs can be calculated based on kinship. The genetic covariance between lines can be modeled using a marker-based, additive relationship matrix, or via Gaussian and exponential kernels.
Depends: Matrix
License: GPL-3
LazyLoad: yes
Packaged: 2011-11-23 15:18:07 UTC; jeffendelman
Packaged: 2011-12-01 21:44:59 UTC; jeffendelman
Repository: CRAN
Date/Publication: 2011-11-23 16:04:50
Date/Publication: 2011-12-02 16:23:08
20 changes: 10 additions & 10 deletions MD5
@@ -1,13 +1,13 @@
ebd0f441dd7cc78ac5317311a1315f55 *DESCRIPTION
725cf45b6b4fc35a46619125c9a56ed5 *NAMESPACE
c0b87a2036de8c52f506910c514b523e *R/A.mat.R
7ffecd7c7d19252b56998ccd147228c4 *DESCRIPTION
cb2bbc1c5b8ede3c305c7f88a3534ab4 *NAMESPACE
1b905944441d29705c33c76a49c037b0 *R/A.mat.R
ed40c1ee328c5c1a360f8f19e1c3da6f *R/GWA.R
1eb18cf7496f7a40675c50be85207f5b *R/impute.R
12d0dfead32ae78381bb56fe1b09573e *R/kinship.BLUP.R
2a44ea8e534f08600a1cb26458a8ae28 *R/mixed.solve.R
17386c1cd7a17192e8fd416a61490556 *R/kinship.BLUP.R
e6a37c2566e88f73c91bd18120dd5923 *R/mixed.solve.R
b74e4e9e2c643e550a83c5ee4eb7e905 *inst/CITATION
ac7f617ca9597ea9a6c7dae0442cf0b6 *man/A.mat.Rd
be5fbfb20ec993da916e34e007aabbe0 *man/GWA.Rd
5f2221a320a663d862e94a88b6b6f8db *man/impute.Rd
4c7aa162fd21063299e894fed121f018 *man/kinship.BLUP.Rd
db24d688f1817159d86971b2500c5a54 *man/mixed.solve.Rd
cdbdc329a3d94433d03b1cfbc9b4fe22 *man/A.mat.Rd
30f584df388d3bd33156b8b0d23c3f1e *man/GWA.Rd
10f9448d8e12fd7ae964728e953fb2aa *man/impute.Rd
9e7862d767ca15b2e4416b9bce8254b0 *man/kinship.BLUP.Rd
8b32359363bbf005fb6ee26ea7dd6725 *man/mixed.solve.Rd
1 change: 1 addition & 0 deletions NAMESPACE
@@ -1 +1,2 @@
importFrom(Matrix,nearPD)
export(mixed.solve,impute,GWA,kinship.BLUP,A.mat)
82 changes: 60 additions & 22 deletions R/A.mat.R 100755 → 100644
@@ -1,22 +1,60 @@
A.mat <- function(G,method,min.MAF=0.01) {
method = toupper(method)
freq <- colMeans(G+1)/2
markers <- which((freq >= min.MAF)&(1-freq >= min.MAF))
freq <- freq[markers]
G <- G[,markers]
m <- ncol(G)
n <- nrow(G)

if (method=="IBS") {
A <- tcrossprod(G)/m+1
} else if (method=="UAR") {
X <- G + 1
Y <- t(t(X)/sqrt(2*freq*(1-freq)))
c <- sqrt(2*freq/(1-freq))
z <- matrix(colSums(t(Y)*c),n,1)
one <- matrix(rep(1,n),n,1)
q <- sum(c^2)
A <- (tcrossprod(Y)-tcrossprod(one,z)-tcrossprod(z,one)+tcrossprod(one)*q)/m
} else {stop("Invalid method")}
A
}
A.mat <- function(G,method="IBS",min.MAF=0.01,max.missing=1,PD=TRUE) {

crossprod.na <- function(x1,x2) {
mean(x1 * x2,na.rm=TRUE)
}
tcrossprod.na <- function(G) {
apply(G,1,function(x,G){apply(G,1,crossprod.na,x)},G)
}

method <- toupper(method)
n <- nrow(G)

if (length(which(is.na(G))) > 0) {

freq <- apply(G+1,2,function(x){mean(x,na.rm=TRUE)})/2
frac.missing <- apply(G,2,function(x){length(which(is.na(x)))})/n
markers <- which((freq >= min.MAF)&(1-freq >= min.MAF)&(frac.missing < max.missing))
freq <- freq[markers]
G <- G[,markers]
m <- ncol(G)

if (method=="IBS") {
A <- tcrossprod.na(G)+1
} else if (method=="UAR") {
X <- G + 1
Y <- t(t(X)/sqrt(2*freq*(1-freq)))
c <- sqrt(2*freq/(1-freq))
z <- matrix(colMeans(t(Y)*c,na.rm=TRUE),n,1)
one <- matrix(rep(1,n),n,1)
q <- mean(c^2)
A <- tcrossprod.na(Y)-tcrossprod(one,z)-tcrossprod(z,one)+tcrossprod(one)*q
} else {stop("Invalid method")}

if (PD) {
as.matrix(nearPD(A)$mat)
} else {A}

} else {

freq <- colMeans(G+1)/2
markers <- which((freq >= min.MAF)&(1-freq >= min.MAF))
freq <- freq[markers]
G <- G[,markers]
m <- ncol(G)

if (method=="IBS") {
A <- tcrossprod(G)/m+1
} else if (method=="UAR") {
X <- G + 1
Y <- t(t(X)/sqrt(2*freq*(1-freq)))
c <- sqrt(2*freq/(1-freq))
z <- matrix(colSums(t(Y)*c),n,1)
one <- matrix(rep(1,n),n,1)
q <- sum(c^2)
A <- (tcrossprod(Y)-tcrossprod(one,z)-tcrossprod(z,one)+tcrossprod(one)*q)/m
} else {stop("Invalid method")}

A
} # if missing data
}
4 changes: 2 additions & 2 deletions R/kinship.BLUP.R
Expand Up @@ -41,7 +41,7 @@ Z <- cbind(Z.train,matrix(rep(0,n.obs*n.pred),n.obs,n.pred))
G <- rbind(G.train,G.pred)

if (K.method == "RR") {
K <- tcrossprod(G,G)/m
K <- A.mat(G,min.MAF=0)
soln <- mixed.solve(y=y,X=X,Z=Z,K=K,method=mixed.method)
if (n.pred > 0) {
list(g.train=soln$u[1:n.train],g.pred=soln$u[n.train+1:n.pred],beta=soln$beta)
Expand Down Expand Up @@ -75,4 +75,4 @@ if (K.method == "RR") {
}

} #if K.method
} #function
} #function
16 changes: 14 additions & 2 deletions R/mixed.solve.R
@@ -1,7 +1,10 @@
mixed.solve <- function (y, Z, K = NULL, X = NULL, method = "REML", bounds = c(1e-09,1e+09), SE = FALSE, return.Hinv = FALSE) {
mixed.solve <- function (y, Z = NULL, K = NULL, X = NULL, method = "REML", bounds = c(1e-09,1e+09), SE = FALSE, return.Hinv = FALSE) {
pi <- 3.14159
n <- length(y)
y <- matrix(y,n,1)

not.NA <- which(!is.na(y))

if (is.null(X)) {
p <- 1
X <- matrix(rep(1,n),n,1)
Expand All @@ -11,6 +14,9 @@ if (is.null(p)) {
p <- 1
X <- matrix(X,length(X),1)
}
if (is.null(Z)) {
Z <- diag(n)
}
m <- ncol(Z)
if (is.null(m)) {
m <- 1
Expand All @@ -22,6 +28,12 @@ if (!is.null(K)) {
stopifnot(nrow(K) == m)
stopifnot(ncol(K) == m)
}

Z <- Z[not.NA,]
X <- X[not.NA,]
n <- length(not.NA)
y <- matrix(y[not.NA],n,1)

XtX <- crossprod(X, X)
rank.X <- qr(XtX)$rank
stopifnot(p == rank.X)
Expand Down Expand Up @@ -133,4 +145,4 @@ if (!SE) {
list(Vu = Vu.opt, Ve = Ve.opt, beta = as.vector(beta), beta.SE = as.vector(beta.SE), u = as.vector(u), u.SE = as.vector(u.SE), LL = LL)
}
}
}
}
14 changes: 11 additions & 3 deletions man/A.mat.Rd
Expand Up @@ -8,25 +8,33 @@ Additive relationship matrix
Calculates an additive relationship matrix.
}
\usage{
A.mat(G,method,min.MAF=0.01)
A.mat(G,method="IBS",min.MAF=0.01,max.missing=1,PD=TRUE)
}

\arguments{
\item{G}{
Matrix (\eqn{n \times m}) of unphased genotypes for \eqn{n} lines and \eqn{m} biallelic markers,
coded as \{-1,0,1\} = \{aa,Aa,AA\}. Missing data are not allowed (\code{\link{impute}} first).
coded as \{-1,0,1\} = \{aa,Aa,AA\}. Fractional (imputed) and missing values (NA) are allowed.
}
\item{method}{
Must be either "IBS" or "UAR", depending on what reference population is desired (see details).
}
\item{min.MAF}{
Minimum minor allele frequency; default is 0.01.
}
\item{max.missing}{
Maximum proportion of entries that can be missing for each marker. Markers above this limit are not used.
}
\item{PD}{
This option only applies when alleles are missing, in which case A is not positive semidefinite by construction. If PD=TRUE, the function \code{\link{nearPD}} is used to return the closest positive definite matrix, which can then be used in \code{\link{mixed.solve}}
}
}
\details{
For method "IBS", \eqn{A = 1 + G G' / m }, which means the coefficient of coancestry equals the identity-by-state (IBS). The assumption that two alleles IBS are also identical by descent (IBD) is consistent with coalescence theory, in which all alleles are derived from a common ancestor but at different times in the past (Powell et al., 2010).
For method "UAR" (Unified Additive Relationship), \eqn{A = W W'/m}, where \eqn{W_{ij} = (X_{ij} - 2 p_i)/\sqrt{2 p_i (1 - p_i)}} and \eqn{X = G + 1}. This \eqn{A} matrix has the property that the average of its off-diagonal elements is zero, which can be interpreted as meaning the IBD relationships are relative to the current population as the reference population (Powell et al., 2010). In the raw UAR model of Powell et al. (2010), the diagonal elements of \eqn{A} are calculated as \eqn{1 + F}, where \eqn{F} is the correlation between the two gametes in a diploid individual. This model, although unbiased, is in general not positive semi-definite. The diagonal elements of \eqn{A = W W'/m} are slightly different than the raw UAR model of Powell et al. (2010), but the two models have the same average F for the population.
When alleles are missing, each pairwise calculation is based only on the markers that are present for both lines.
}
\value{
\eqn{n \times n} additive relationship matrix
Expand All @@ -42,6 +50,6 @@ for (i in 1:200) {
}
#Additive relationship matrix
A <- A.mat(G,method="UAR")
A <- A.mat(G)
}
4 changes: 2 additions & 2 deletions man/GWA.Rd
Expand Up @@ -21,7 +21,7 @@ GWA(y, G, Z=NULL, X=NULL, K=NULL, min.MAF=0.05,

\arguments{
\item{y}{
Vector (\eqn{n \times 1}) of observations
Vector (\eqn{n \times 1}) of observations. Missing values (NA) are omitted.
}
\item{G}{
Matrix (\eqn{t \times m}) of genotypes for \eqn{t} lines with \eqn{m} bi-allelic markers.
Expand Down Expand Up @@ -56,7 +56,7 @@ The use of a minimum MAF is typically adequate to ensure the problem is well-pos
if an error message indicates the problem is singular, set check.rank to TRUE. This will slow
down the algorithm but should fix the error.

Missing data are not allowed (\code{\link{impute}} first).
Missing marker data are not allowed (\code{\link{impute}} first).
}
\value{
Returns \eqn{m \times 1} vector of the marker scores, which equal \eqn{-log_{10}}(p-value)
Expand Down
2 changes: 1 addition & 1 deletion man/impute.Rd
Expand Up @@ -19,7 +19,7 @@ Matrix (\eqn{n \times m}) of unphased genotypes for \eqn{n} lines and \eqn{m} ma
Number of nearest neighbors to use for imputation. If not specified, the population mean is used.
}
\item{D}{
Distance matrix (\eqn{n \times n}) between lines. If not specified, calculated by \code{dist}.
Distance matrix (\eqn{n \times n}) between lines. If not specified, calculated by \code{\link{dist}}.
}
}
\details{
Expand Down
14 changes: 5 additions & 9 deletions man/kinship.BLUP.Rd
Expand Up @@ -14,15 +14,15 @@ kinship.BLUP(y, G.train, G.pred=NULL, X=NULL, Z.train=NULL,

\arguments{
\item{y}{
Vector (\eqn{n.obs \times 1}) of observations
Vector (\eqn{n.obs \times 1}) of observations. Missing values (NA) are omitted (see \code{\link{mixed.solve}}).
}
\item{G.train}{
Matrix (\eqn{n.train \times m}) of unphased genotypes for the training population: \eqn{n.train} lines with \eqn{m} bi-allelic markers.
Genotypes should be coded as \{-1,0,1\} = \{aa,Aa,AA\}; fractional (imputed) alleles are allowed.
Genotypes should be coded as \{-1,0,1\} = \{aa,Aa,AA\}; fractional (imputed) and missing (NA) alleles are allowed.
}
\item{G.pred}{
Matrix (\eqn{n.pred \times m}) of unphased genotypes for the prediction population: \eqn{n.pred} lines with \eqn{m} bi-allelic markers.
Genotypes should be coded as \{-1,0,1\} = \{aa,Aa,AA\}; fractional (imputed) alleles are allowed.
Genotypes should be coded as \{-1,0,1\} = \{aa,Aa,AA\}; fractional (imputed) and missing (NA) alleles are allowed.
}
\item{X}{
Design matrix (\eqn{n.obs \times p}) of fixed effects. If not passed, a vector of 1's is used
Expand All @@ -33,8 +33,7 @@ to model the intercept.
the identity matrix is used.
}
\item{K.method}{
"RR" (default) is ridge regression (\eqn{K = G G'/m}), "GAUSS" is a Gaussian kernel (\eqn{K = e^{-D^2/\theta^2}}) and
"EXP" is an exponential kernel (\eqn{K = e^{-D/\theta}}), where Euclidean distances \eqn{D} are calculated with \code{dist}.
"RR" (default) is ridge regression, for which K is the identity-by-state additive relationship matrix computed with \code{\link{A.mat}}. The option "GAUSS" is a Gaussian kernel (\eqn{K = e^{-D^2/\theta^2}}) and "EXP" is an exponential kernel (\eqn{K = e^{-D/\theta}}), where Euclidean distances \eqn{D} are computed with \code{\link{dist}}.
}
\item{n.profile}{
Expand All @@ -44,9 +43,6 @@ For K.method = "GAUSS" or "EXP", the number of points to use in the log-likeliho
Either "REML" (default) or "ML".
}
}
\details{
Missing alleles (NA) are permitted with GAUSS and EXP but not with RR (\code{\link{impute}} first).
}
\value{
\describe{
\item{$g.train}{BLUP solution for the training set}
Expand Down Expand Up @@ -79,7 +75,7 @@ y <- g + rnorm(200,mean=0,sd=sqrt((1-h2)/h2*var(g)))
#split in half for training and prediction
train <- 1:100
pred <- 1:100
pred <- 101:200
ans <- kinship.BLUP(y=y[train],G.train=G[train,],G.pred=G[pred,],K.method="GAUSS")
#correlation accuracy
Expand Down
8 changes: 4 additions & 4 deletions man/mixed.solve.Rd
Expand Up @@ -15,16 +15,16 @@ of mixed models, in which there is a single variance component other than the re
has a close relationship with ridge regression (ridge parameter \eqn{\lambda = \sigma_e^2 / \sigma^2_u}).
}
\usage{
mixed.solve(y, Z, K=NULL, X=NULL, method="REML",
mixed.solve(y, Z=NULL, K=NULL, X=NULL, method="REML",
bounds=c(1e-09, 1e+09), SE=FALSE, return.Hinv=FALSE)
}

\arguments{
\item{y}{
Vector (\eqn{n \times 1}) of observations.
Vector (\eqn{n \times 1}) of observations. Missing values (NA) are omitted, along with the corresponding rows of X and Z.
}
\item{Z}{
Design matrix (\eqn{n \times m}) for the random effects.
Design matrix (\eqn{n \times m}) for the random effects. If not passed, assumed to be the identity matrix.
}
\item{K}{
Covariance matrix (\eqn{m \times m}) for random effects; must be positive semi-definite. If not passed, assumed to
Expand Down Expand Up @@ -99,7 +99,7 @@ ans <- mixed.solve(y,Z=G) #By default K = I
accuracy <- cor(u,ans$u)

#predict breeding values
ans <- mixed.solve(y,Z=diag(200),K=tcrossprod(G,G))
ans <- mixed.solve(y,K=A.mat(G))
accuracy <- cor(g,ans$u)

}
Expand Down

0 comments on commit e100c93

Please sign in to comment.