Skip to content

Commit

Permalink
version 1.2
Browse files Browse the repository at this point in the history
  • Loading branch information
jbe907 authored and gaborcsardi committed Aug 17, 2011
1 parent f29d828 commit 705d7b4
Show file tree
Hide file tree
Showing 16 changed files with 414 additions and 343 deletions.
20 changes: 11 additions & 9 deletions DESCRIPTION
@@ -1,15 +1,17 @@
Package: rrBLUP
Title: Genomic selection and association analysis
Version: 1.1
Title: Ridge Regression-BLUP and related methods for genomic selection
Version: 1.2
Author: Jeffrey Endelman
Maintainer: Jeffrey Endelman <j.endelman@gmail.com>
Description: Contains tools for genomics research: a maximum-likelihood
algorithm for ridge regression-BLUP (RR.BLUP), a function for
genome-wide association analysis (GWA), and a genomic selection
algorithm that combines ridge regression and subset selection
(RR.BLUP.MR).
Description: At the core of this package is the function mixed.solve,
which contains a fast ML/REML algorithm for mixed models with
one variance component (other than the residual error). This
algorithm can be used for association analysis (function GWA)
and genomic prediction (function kinship.BLUP), for which
several kinship models are available (e.g., realized
relationship and Gaussian).
License: GPL-3
LazyLoad: yes
Packaged: 2011-05-19 18:06:15 UTC; Jeffrey
Packaged: 2011-08-17 15:52:24 UTC; Jeffrey
Repository: CRAN
Date/Publication: 2011-05-20 12:05:42
Date/Publication: 2011-08-17 20:27:22
8 changes: 8 additions & 0 deletions MD5
@@ -0,0 +1,8 @@
eb256e62bd0337e6f2e53cea67a004e3 *DESCRIPTION
68765541f0ed0a69fc4e38167978c876 *NAMESPACE
abbfda1cf497e494357264a5e38e14f9 *R/GWA.R
2fe153cb593874e9401b8309cd4f6875 *R/kinship.BLUP.R
1ef41630295e87aea7c79c8e0c469079 *R/mixed.solve.R
fb7f5f4673fb2409ea852ed925b211d1 *man/GWA.Rd
f4a0a5df6600c33f8eb15ab851a3ed7b *man/kinship.BLUP.Rd
17ec7f6c8c1252e88ba800a932f9e0ea *man/mixed.solve.Rd
1 change: 1 addition & 0 deletions NAMESPACE
@@ -0,0 +1 @@
export(mixed.solve,GWA,kinship.BLUP)
19 changes: 11 additions & 8 deletions R/GWA.R
@@ -1,31 +1,34 @@
GWA <-
function(y,G,Z,X = NULL,K = NULL,min.MAF=0.01,check.rank="FALSE") {
function(y,G,Z=NULL,X = NULL,K = NULL,min.MAF=0.05,check.rank=FALSE) {

pi <- 3.14159
AS1 <- c(0.31938,-0.35656,1.78148,-1.82126,1.33027) #for p-value approximation
AS2 <- 0.2316419

n <- length(y)
y <- matrix(y,n,1)
if (is.null(X)) {
p <- 1
X <- matrix(rep(1,n),n,1)
}
p <- dim(X)[2]
p <- ncol(X)
if (is.null(p)) {
p <- 1
X <- matrix(X,length(X),1)
}
rX <- qr(X)$rank
stopifnot(rX==p) #must be full rank design matrix
stopifnot(dim(X)[1]==n)
stopifnot(nrow(X)==n)

m <- dim(G)[2] # number of markers
m <- ncol(G) # number of markers
if (is.null(m)) {
m <- 1
G <- matrix(G,length(G),1)
}
t <- dim(G)[1]

if (is.null(Z)) {Z <- diag(n)}

stopifnot(dim(Z)[1]==n)
stopifnot(dim(Z)[2]==t)

Expand All @@ -35,14 +38,14 @@ if (is.null(K)) {
stopifnot(nrow(K)==ncol(K))
stopifnot(nrow(K)==t)

out <- RR.BLUP(y,X=X,Z=Z,K=K)
H <- out$Ve/out$Vg*diag(n)+Z%*%K%*%t(Z)
out <- mixed.solve(y,X=X,Z=Z,K=K)
H <- out$Ve/out$Vu*diag(n)+Z%*%K%*%t(Z)

Hinv <- solve(H)
df <- p + 1

scores <- rep(0,m)
freq <- colMeans((G+1)/2)
freq <- colMeans(G+1)/2
for (i in 1:m) {
MAF <- min(freq[i],1-freq[i])
if (MAF < min.MAF) {
Expand All @@ -51,7 +54,7 @@ for (i in 1:m) {

Xsnp <- cbind(X,Z%*%G[,i])

if (check.rank=="TRUE") {
if (check.rank==TRUE) {
rXsnp <- qr(Xsnp)$rank
} else {
rXsnp <- df
Expand Down
93 changes: 0 additions & 93 deletions R/RR.BLUP.MR.R

This file was deleted.

145 changes: 145 additions & 0 deletions R/kinship.BLUP.R
@@ -0,0 +1,145 @@
kinship.BLUP <- function(y,G.train,G.pred=NULL,X=NULL,Z.train=NULL,K.method="RR",n.profile=10,mixed.method="REML") {
#assumes genotypes coded as {-1,0,1}

K.method <- toupper(K.method)

n.obs <- length(y)
y <- matrix(y,n.obs,1)

if (is.null(X)) {
p <- 1
X <- matrix(rep(1,n.obs),n.obs,1)
}
p <- ncol(X)
if (is.null(p)) {
p <- 1
X <- matrix(X,length(X),1)
}

rX <- qr(X)$rank
stopifnot(rX==p) #must be full rank design matrix
stopifnot(nrow(X)==n.obs)

if (is.null(Z.train)) {
Z.train <- diag(n.obs)
}

m <- ncol(G.train)
n.train <- nrow(G.train)

stopifnot(ncol(Z.train)==n.train)
stopifnot(nrow(Z.train)==n.obs)

if (!is.null(G.pred)) {
stopifnot(ncol(G.pred)==m)
n.pred <- nrow(G.pred)
} else {
n.pred <- 0
}

t <- n.pred + n.train #total number of lines

Z <- cbind(Z.train,matrix(rep(0,n.obs*n.pred),n.obs,n.pred))
G <- rbind(G.train,G.pred)

if (K.method == "RR") {
freq <- colMeans(G+1)/2
K.RR <- G %*% t(G)/2/sum(freq*(1-freq))
soln <- mixed.solve(y=y,X=X,Z=Z,K=K.RR,method=mixed.method)
if (n.pred > 0) {
list(g.train=soln$u[1:n.train],g.pred=soln$u[n.train+1:n.pred],beta=soln$beta,Vg=soln$Vu,Ve=soln$Ve)
} else {
list(g.train=soln$u[1:n.train],beta=soln$beta,Vg=soln$Vu,Ve=soln$Ve)
}
} else if (K.method == "MR") {

#Partition lines in training set for cross-validation
n.fold <- 5
sets <- rep(0,n.train)
ix <- sort(runif(n.train),index.return=TRUE)$ix
SetSize <- floor(n.train/n.fold)
for (k in 1:(n.fold-1)) {sets[ix[((k-1)*SetSize+1):(k*SetSize)]] <- k}
sets[ix[((n.fold-1)*SetSize+1):n.train]] <- n.fold

NumMarkers <- round(exp(seq(log(10),log(m),length.out=n.profile)))

r.yy <- matrix(rep(0,n.fold*n.profile),n.profile,n.fold)
for (j in 1:n.fold) {
CV.test.lines <- which(sets==j)
CV.train.lines <- setdiff(1:n.train,CV.test.lines)
CV.test.obs <- NULL
for (line in CV.test.lines) {
CV.test.obs <- union(CV.test.obs,which(Z.train[,line]==1))
}
CV.train.obs <- setdiff(1:n.obs,CV.test.obs)
CV.n.train <- length(CV.train.obs)
CV.n.test <- length(CV.test.obs)

Scores <- GWA(y=y[CV.train.obs],X=X[CV.train.obs,],Z=Z.train[CV.train.obs,CV.train.lines],G=G.train[CV.train.lines,])
ScoreIdx <- sort(Scores,decreasing=TRUE,index.return=TRUE)$ix

for (k in 1:n.profile) {
MarkerList <- ScoreIdx[1:NumMarkers[k]]
Zaug <- cbind(Z.train[CV.train.obs,CV.train.lines],matrix(rep(0,CV.n.train*CV.n.test),CV.n.train,CV.n.test))
Gaug <- rbind(G.train[CV.train.lines,MarkerList],G.train[CV.test.lines,MarkerList])
soln <- mixed.solve(y[CV.train.obs],X=X[CV.train.obs,],Z=Zaug,K=Gaug%*%t(Gaug),method=mixed.method)
g.pred <- Z.train[CV.test.obs,CV.test.lines]%*%soln$u[CV.n.train+1:CV.n.test]
r.yy[k,j] <- cor(g.pred,y[CV.test.obs])
} #for k
} #for j

#find CV maximum
r.yy.mean <- rowMeans(r.yy)
opt.n.mark <- NumMarkers[which.max(r.yy.mean)]

Scores <- GWA(y=y,X=X,Z=Z.train,G=G.train)
ScoreIdx <- sort(Scores,decreasing=TRUE,index.return=TRUE)$ix
MarkerList <- ScoreIdx[1:opt.n.mark]

freq <- colMeans(G[,MarkerList]+1)/2
K.RR <- G[,MarkerList] %*% t(G[,MarkerList])/2/sum(freq*(1-freq))
soln <- mixed.solve(y=y,X=X,Z=Z,K=K.RR,method=mixed.method)

if (n.pred > 0) {
list(g.train=soln$u[1:n.train],g.pred=soln$u[n.train+1:n.pred],beta=soln$beta,Vg=soln$Vu,Ve=soln$Ve,profile=cbind(NumMarkers,r.yy.mean))
} else {
list(g.train=soln$u[1:n.train],beta=soln$beta,Vg=soln$Vu,Ve=soln$Ve,profile=cbind(NumMarkers,r.yy.mean))
}

} else {
# "exp" or "gauss"
theta <- setdiff(seq(0,2,length.out=n.profile+1),0)
LL <- rep(0,n.profile)
soln <- list()

D <- 0*diag(t)
for (i in 1:(t-1)) {
for (j in (i+1):t) {
D[i,j] <- sqrt(sum((G[i,]-G[j,])^2))
} #for j
} #for i

D <- (D + t(D))/sqrt(m)
for (i in 1:n.profile) {
if (K.method == "EXP") {K <- exp(-D/theta[i])}
if (K.method == "GAUSS") {K <- exp(-(D/theta[i])^2) }
soln[[i]] <- mixed.solve(y=y,X=X,Z=Z,K=K,method=mixed.method)
LL[i] <- soln[[i]]$LL
} #for i

#find maximum LL soln
max.LL <- which.max(LL)
g.train <- soln[[max.LL]]$u[1:n.train]
if (n.pred > 0) {
g.pred <- soln[[max.LL]]$u[n.train+1:n.pred]
list(profile=cbind(theta,LL),g.train=g.train,g.pred=g.pred,beta=soln[[max.LL]]$beta,Vg=soln[[max.LL]]$Vu,Ve=soln[[max.LL]]$Ve)
} else {
list(profile=cbind(theta,LL),g.train=g.train,beta=soln[[max.LL]]$beta,Vg=soln[[max.LL]]$Vu,Ve=soln[[max.LL]]$Ve)
}

} #if K.method
} #function




0 comments on commit 705d7b4

Please sign in to comment.