-
Notifications
You must be signed in to change notification settings - Fork 67
/
FW_BGLR.R
166 lines (126 loc) · 4.89 KB
/
FW_BGLR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#Function to fit Finlay-Wilkinson model in 2 steps using BGLR
#The model is:
# y[ijk] = mu + g[i] + h[j] + b[i] * h[j] + e[ijk]
#i indexed genotypes, j indexes environments and k indexes replicate.
# We assume g ~ N(0, sigma^2_g G), b ~ N(0, sigma^2_ G)
#
#Function arguments
#pheno: A data.frame with 3 columns
# $VAR: variety
# $ENV: environment
# $y: response variable
# X: matrix of markers
# G: Genomic relationship matrix
FW.BGLR<-function(pheno,X=NULL,G=NULL,model1='BRR',model2='BRR',saveAt='',...){
varieties<-unique(as.character(pheno$VAR))
environments<-unique(as.character(pheno$ENV))
common<-intersect(rownames(G),varieties)
pheno<-pheno[pheno$VAR%in%common,]
if(!is.null(G)){
tmp<-rownames(G)%in%common
G<-G[tmp,tmp]
}
if(!is.null(X)){
tmp<-rownames(X)%in%common
X<-X[tmp,]
X<-scale(X,center=TRUE,scale=FALSE)
G<-tcrossprod(X)/ncol(X)
}
#Incidence matrix for environments
pheno$ENV<-as.factor(pheno$ENV)
ZE<-model.matrix(~pheno$ENV-1)
ZE<-scale(ZE,center=T,scale=F)
#Incidenve matrix for main effect of Genotypes
pheno$VAR<-factor(x=pheno$VAR,levels=rownames(G),ordered=TRUE)
ZVAR<-model.matrix(~pheno$VAR-1)
if(!is.null(G))
{
#Eigen-value decomposition for G
evdG<-eigen(G)
PC<-sweep(evdG$vector,STATS=sqrt(evdG$values),MARGIN=2,FUN="*")
XVAR<-ZVAR%*%PC
colnames(XVAR)=colnames(ZVAR)
}
#Finlay-Wilkinson
#Step 1 Compute environmental means
eta1<-list(list(X=ZE,model="BRR"),#*# centering ZE should help convergence
list(X=XVAR,model=model1))
fm1<-BGLR(y=pheno$y,ETA=eta1,saveAt=paste0(saveAt,'step1_'),...)
hHat1<-fm1$ETA[[1]]$b #*# Why don't we add intercept? #because these are interpreted as deviations from general mean
#due to environmental effects?
names(hHat1)<-levels(pheno$ENV)
#Step 2
hHatExpanded<-as.vector(ZE%*%hHat1)
yc<-pheno$y-hHatExpanded # why not centering yc? #Once that we estimated the environmental effect we fit the model
#y[ijk]-hat(h[j]) = mu + g[i] + b[i]*hat(h[j]) + e[ijk]
#Weight each row in ZVAR
XInteraction<-sweep(ZVAR,1L,hHatExpanded,"*")
XInteraction<-XInteraction%*%PC
eta2<-list( int=list(X=XVAR,model=model2,saveEffects=TRUE),
slope=list( X=XInteraction,model=model2,saveEffects=TRUE))
saveAt=paste0(saveAt,'step2_')
fm2<-BGLR(y=yc,ETA=eta2,saveAt=saveAt,...)
BInt=readBinMat(paste0(saveAt,'ETA_int_b.bin'))
BSlope=readBinMat(paste0(saveAt,'ETA_slope_b.bin'))
INT=tcrossprod(PC,BInt)
SLOPE=tcrossprod(PC,BSlope)
INT<-INT + mean(pheno$y,na.rm=TRUE)
SLOPE<-SLOPE+1
rownames(INT)<-levels(pheno$VAR)
rownames(SLOPE)<-levels(pheno$VAR)
Int=rowMeans(INT)
Slope=rowMeans(SLOPE)
yHat=fm2$mu+ZE%*%hHat1+ZVAR%*%(Int-mean(pheno$y,na.rm=TRUE))+XInteraction%*%Slope
#return the goodies
out<-list(yHat=yHat,VAR=data.frame( ID=rownames(INT),int=Int,intSD=apply(FUN=sd,X=INT,MARGIN=1),
slope=Slope,slopeSD=apply(FUN=sd,X=SLOPE,MARGIN=1),stringsAsFactors=FALSE),ENV=hHat1)
return(out)
}
plot.FW <- function(out,pheno)
{
## set par settings
par(mar=c(5,5,2,2))
xlim <- range(out$ENV)
ylim <- range(out$VAR$int)
ylim[1]<-ylim[1]-0.35*ylim[1]
ylim[2]<-1.35*ylim[2]
## draw an empty plot
plot(as.numeric()~1, ylab = "Genotype performance",
xlab = "Environment effect", cex.axis=1.2, cex.lab=1.5, xlim=xlim, ylim=ylim)
## draw regression lines for all the genotypes
for(i in 1:nrow(out$VAR))
{
#we can add intercept to match scale of original data if needed
abline(a=out$VAR$int[i],b=out$VAR$slope[i],col="lightgrey")
}
## add vertical line @ hHat=0 to see G effect
abline(v=0, lty=2)
## add dashed line for b=1
abline(mean(pheno$y, na.rm=TRUE), 1, lty=2, lwd=1)
## select genotypes to plot
selgeno <- out$VAR$slope
names(selgeno) <- rownames(out$VAR)
selgeno <- selgeno[order(selgeno)]
## plot genotypes ith min/close to 1/max slope
selgeno <- c(selgeno[1], selgeno[floor(length(selgeno)/2)], selgeno[length(selgeno)])
mycols <- c("blue", "violetred", "orange")
count <- 1
for (i in names(selgeno))
{
points(out$ENV[pheno$ENV[pheno$VAR == i]],
pheno$y[pheno$VAR == i],
pch=19, col=mycols[count])
abline(a=out$VAR$int[rownames(out$VAR) == i],
b=out$VAR$slope[ rownames(out$VAR) == i],
col=mycols[count], lwd=2)
count <- count+1
}
## add legend
z <- match(names(selgeno), rownames(out$VAR))
mylegend <- paste0(names(selgeno), ": ", "g=", round(out$VAR$int[z], 2), "; b=", round(out$VAR$slope[z], 2))
legend("topleft", bty="n", cex=1.2, legend=mylegend, col=mycols, pch=rep(19, length(selgeno)))
## just to get nice and neat box
box()
## reset par settings
par(mar=c(5,4,4,2))
}