---
### **Data Bootcamp for Genomic Prediction in Plant Breeding** ###
### **University of Minnesota Plant Breeding Center** ###
#### **June 20 - 22, 2022** ####
---

### **Practical 5:  Modeling Genotype-Environment Interactions (GxE)** ###

#### **Source Scripts and Load Data**


In [1]:
WorkDir <- getwd()

##Source in functions to be used
source("GS_Pipeline_Jan_2022_FnsApp.R")
source("bootcamp_functions.R")





   *****       ***   vcfR   ***       *****
   This is vcfR 1.12.0 
     browseVignettes('vcfR') # Documentation
     citation('vcfR') # Citation
   *****       *****      *****       *****



Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Attaching package: 'bWGR'


The following objects are masked from 'package:NAM':

    CNT, GAU, GRM, IMP, KMUP, KMUP2, SPC, SPM, emBA, emBB, emBC, emBL,
    emCV, emDE, emEN, emGWA, emML, emML2, emRR, markov, mkr, mkr2X,
    mrr, mrr2X, mrrFast, wgr


Loading required package: AlgDesign

Loading required package: scales

Loading required package: scatterplot3d

Loading required package: emoa


Attaching package: 'emoa'


The following object is masked from 'package:dplyr':

    coalesce


Loading required package: Matrix

Loading required package: MASS


Attaching package: 'MASS'


The following ob

#### **Read Genotype File using vcfR** ####

In [2]:

##Load in genotype data. Use package vcfR to read in and work with vcf file.
infileVCF <- "SoyNAM_Geno.vcf"
genotypes_VCF <- read.table(infileVCF)
vcf <- read.vcfR(infileVCF, verbose = FALSE)
vcf

***** Object of Class vcfR *****
5189 samples
20 CHROMs
4,292 variants
Object size: 171.1 Mb
25.41 percent missing data
*****        *****         *****


#### **Convert VCF file format to numerical matrix format.**
#### Final genotype matrix is geno_num

In [3]:
gt <- extract.gt(vcf, element = "GT", as.numeric = F)
fix_T <- as_tibble(getFIX(vcf))
gt2 <- matrix(0, ncol = ncol(gt), nrow = nrow(gt))
colnames(gt2) <- colnames(gt)
rownames(gt2) <- rownames(gt)
gt2a <- apply(gt,2, function(x) gsub("1/1","1",x))
gt2b <- gsub("0[/|]0","0",gt2a)
gt2c <- gsub("[10][/|][10]","0.5",gt2b)
gt2d <- gsub("\\.[/|]\\.","NA",gt2c)

gt2d_num<- apply(gt2d,2,as.numeric)
rownames(gt2d_num)<- rownames(gt2d)
geno_num <- t(gt2d_num)
dim(geno_num)
rm(gt2d_num)
rm(gt2d)
rm(gt2c)
rm(gt2b)
rm(gt2a)
rm(gt2)


#### **Filter Genotypic Data**

In [4]:
##Filter markers on % missing
miss <- function(x){length(which(is.na(x)))}
mrkNA <- (apply(geno_num, MARGIN=2, FUN=miss))/dim(geno_num)[1]
ndx <- which(mrkNA > 0.2)

if (length(ndx)>0) geno_num2 <- geno_num[, -ndx] else geno_num2 <- geno_num

##Filter individuals on % missing
indNA <- (apply(geno_num2, MARGIN=1, FUN=miss))/dim(geno_num2)[2]
ndx2 <- which(indNA > 0.5)

 if(length(ndx2)>0) geno_num3 <- geno_num2[-ndx2, ] else geno_num3 <- geno_num2


##Filter markers based on MAF
maf <- apply(geno_num3, MARGIN=2, FUN=mean, na.rm=T)
ndx3 <- which(maf<0.05 | maf>0.95) 

if (length(ndx3)>0) geno_num4 <- geno_num2[, -ndx3] else geno_num4 <- geno_num3
  
dim(geno_num4)

#### **Import Phenotypic Data**

In [5]:

pheno <- read.csv("SoyNAM_Pheno.csv")
geno_num4_x <- cbind(rownames(geno_num4),geno_num4)

colnames(geno_num4_x)[1]<- "strain"

## Merge Geno and Pheno Data
Data <- merge(geno_num4_x,pheno,by="strain",all=TRUE)

## Remove with missing yiled_blup values 

YldNA_Indices <- which(is.na(Data$yield))
Data_Sub <- Data[-YldNA_Indices,]


genoStrain <- unique(geno_num4_x[,"strain"])
genoStrainIndices <- which(Data_Sub[,"strain"] %in% genoStrain)

genoIndices <- grep("ss",colnames(geno_num4_x))
initGenoIndx <- genoIndices[1]
finalGenoIndx <- genoIndices[length(genoIndices)]
phenoIndices <- c(1,c((finalGenoIndx+1):ncol(Data_Sub)))

pheno_sub <- Data_Sub[genoStrainIndices,phenoIndices]
geno_num4b <- Data_Sub[genoStrainIndices,c(1,genoIndices)]

uniqueStrainIndices<- which(!duplicated(geno_num4b[,"strain"]))
geno_num5 <- geno_num4b[uniqueStrainIndices,]


### set 'yield' colname to 'Yield_blup'

yldCol <- which(colnames(pheno_sub) %in% "yield")
colnames(pheno_sub)[yldCol] <- "Yield_blup" 

dim(geno_num5)

In [6]:
### Select 3 environs with largest number of evaluations (lines)  

env_sub <-  names(which(table(pheno_sub[,"environ"])>5100)[1:3])

env_sub_indices <- which(pheno_sub[,"environ"] %in% env_sub)

## Subset Data and Geno tables 
DT <- pheno_sub[env_sub_indices,]

DT$environ <- as.factor(DT$environ)

dim(DT)

In [7]:
#### Impute genotable

geno_imp <- markov(apply(geno_num5[,-1],2,as.numeric))
rownames(geno_imp) <- geno_num5[,"strain"]
dim(geno_imp)

In [8]:
### 
env_geno_sub_indices <- which(rownames(geno_imp) %in% unique(DT[,"strain"]))
geno_imp_sub <- geno_imp[env_geno_sub_indices,]

dim(geno_imp_sub)

In [9]:
K_rr <- A.mat(geno_imp_sub)
colnames(K_rr) <-rownames(geno_imp_sub)
rownames(K_rr) <- rownames(geno_imp_sub)
A <- K_rr

dim(A)

### Limit no_of_environments to create EA matrix of reasonable mem size.. 

A_Sub <- A[1:500,1:500]
DT_Sub <- DT[which(DT[,"strain"] %in% rownames(A_Sub)),]

E <- diag(length(unique(DT$environ)))
rownames(E) <- colnames(E) <- unique(DT$environ)
dim(E)

In [10]:

### Same set of strains in each of the environments 

rmStrains <- names(which(table(DT_Sub[,"strain"]) <3))
DT_Sub1 <- DT_Sub[-which(DT_Sub[,"strain"] %in% rmStrains),]

A_Sub1 <- A_Sub[-which(rownames(A_Sub) %in% rmStrains),-which(rownames(A_Sub) %in% rmStrains)]
dim(A_Sub1)

### **Exercise - Compare a few of the var-covar structures in SOMMER package**


#### **Main Effect** #### 
##### Model environment as fixed effect (estimate population mean for each of the environments) and estimate random effects for genotypes 


In [11]:

fitMain <- mmer(Yield_blup~environ-1,
                random=~vs(strain,Gu=A_Sub1),
                rcov=~units,
                data=DT_Sub1,verbose=FALSE)
summary(fitMain)


Unnamed: 0,Yield_blup
u:strain,494

Unnamed: 0_level_0,VarComp,VarCompSE,Zratio,Constraint
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>
u:strain.Yield_blup-Yield_blup,13376.89,7393.675,1.809235,Positive
units.Yield_blup-Yield_blup,372917.05,13908.256,26.81264,Positive

Trait,Effect,Estimate,Std.Error,t.value
<fct>,<fct>,<dbl>,<dbl>,<dbl>
Yield_blup,environIA_2012,3133.676,28.00058,111.9147
Yield_blup,environIA_2013,2800.65,28.00058,100.0211
Yield_blup,environIL_2012,3617.185,28.00058,129.1825

Unnamed: 0_level_0,logLik,AIC,BIC,Method,Converge
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>,<lgl>
Value,-545.1281,1096.256,1112.16,NR,True


In [12]:

m <- model.matrix(~ environ-1 ,data=DT_Sub1)
m_beta <- m %*% as.numeric(fitMain$Beta[,3]) 
PredMain <- m_beta+fitMain$U$`u:strain`$Yield_blup
cor(PredMain,DT_Sub1[,"Yield_blup"]) 

0
0.4803591


#### **Model with Compound Symmetry var-covar structure** ####
##### Compound symmetry assumes GxE effects and also assumes constant correlation among environments

In [13]:

E <- diag(length(unique(DT_Sub1$environ)))
rownames(E) <- colnames(E) <- unique(DT_Sub1$environ)

EA <- kronecker(E,A_Sub1, make.dimnames = TRUE)
DT_Sub1$environ <- as.factor(DT_Sub1$environ)
DT_Sub1$strain <- as.factor(DT_Sub1$strain)

fitCS <- mmer(Yield_blup~environ-1,
              random= ~ vs(strain, Gu=A_Sub1) + vs(environ:strain, Gu=EA),
              rcov= ~ units,
              data=DT_Sub1, verbose = FALSE)
summary(fitCS)

Unnamed: 0,Yield_blup
u:strain,494
u:environ:strain,1482

Unnamed: 0_level_0,VarComp,VarCompSE,Zratio,Constraint
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>
u:strain.Yield_blup-Yield_blup,0.0,11503.43,0.0,Positive
u:environ:strain.Yield_blup-Yield_blup,65969.28,20118.21,3.279082,Positive
units.Yield_blup-Yield_blup,343674.11,13300.93,25.838345,Positive

Trait,Effect,Estimate,Std.Error,t.value
<fct>,<fct>,<dbl>,<dbl>,<dbl>
Yield_blup,environIA_2012,3146.983,28.54727,110.23762
Yield_blup,environIA_2013,2778.546,28.54727,97.33139
Yield_blup,environIL_2012,3633.347,28.54727,127.27474

Unnamed: 0_level_0,logLik,AIC,BIC,Method,Converge
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>,<lgl>
Value,-523.9647,1053.929,1069.833,NR,True


In [14]:

m <- model.matrix(~ environ-1 ,data=DT_Sub1)
m_beta <- m %*% as.numeric(fitCS$Beta[,3]) 
PredCS <- m_beta+fitCS$U$`u:environ:strain`$Yield_blup
cor(PredCS,DT_Sub1[,"Yield_blup"]) 


0
0.4676182


#### **CS - Diagonal Structure** ####

In [15]:
fitCSDG <- mmer(Yield_blup~environ-1,
                random=~vs(strain,Gu=A_Sub1) +vs(ds(environ),strain,Gu=A_Sub1),
                rcov=~units,
                data=DT_Sub1,verbose=FALSE) 

summary(fitCSDG)

Unnamed: 0,Yield_blup
u:strain,494
IA_2012:strain,494
IA_2013:strain,494
IL_2012:strain,494

Unnamed: 0_level_0,VarComp,VarCompSE,Zratio,Constraint
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>
u:strain.Yield_blup-Yield_blup,10027.1,12112.62,0.8278228,Positive
IA_2012:strain.Yield_blup-Yield_blup,10797.13,17422.86,0.6197106,Positive
IA_2013:strain.Yield_blup-Yield_blup,102207.76,40069.62,2.5507542,Positive
IL_2012:strain.Yield_blup-Yield_blup,84568.31,35697.88,2.3690005,Positive
units.Yield_blup-Yield_blup,341746.84,13228.61,25.8339227,Positive

Trait,Effect,Estimate,Std.Error,t.value
<fct>,<fct>,<dbl>,<dbl>,<dbl>
Yield_blup,environIA_2012,3137.91,27.2589,115.11509
Yield_blup,environIA_2013,2779.264,29.2888,94.89168
Yield_blup,environIL_2012,3637.888,28.99965,125.44593

Unnamed: 0_level_0,logLik,AIC,BIC,Method,Converge
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>,<lgl>
Value,-521.4306,1048.861,1064.765,NR,True


In [16]:

m2 <- cbind(c(rep(1,nrow(DT_Sub1)/3),rep(0,2*nrow(DT_Sub1)/3)),c(rep(1,nrow(DT_Sub1)/3),rep(0,nrow(DT_Sub1)/3),rep(1,nrow(DT_Sub1)/3)),
c(rep(1,nrow(DT_Sub1)/3),rep(1,nrow(DT_Sub1)/3),rep(0,nrow(DT_Sub1)/3)))
dim(m2)

m_beta <- m2 %*% as.numeric(fitCSDG$Beta[,3]) 
length(m_beta)
m_env_strain <- do.call(cbind,lapply(fitCSDG$U,function(x) x$Yield_blup))
dim(m_env_strain)
envStrain_blup <-c(m_env_strain[,2:4])                              
                        
dim(m)
length(envStrain_blup)
strain_blup <- rep(fitCSDG$U$`u:strain`$Yield_blup,3)
length(strain_blup)

In [17]:
PredCSDG <- m_beta+strain_blup+envStrain_blup

indES <-  sort.int(as.numeric(DT_Sub1[,"environ"]),decreasing=FALSE,index.return=TRUE)[[2]]
DT_Sub1[1:10,"environ"]
DT_Sub1[indES[1:10],"environ"]
cor(PredCSDG,DT_Sub1[indES,"Yield_blup"]) 


0
-0.08983118


#### **US - Unstructured Variance-Covariance** ####

In [18]:
fitUS <- mmer(Yield_blup~environ-1,
                random=~vs(us(environ),strain,Gu=A_Sub1),
                rcov=~units,
                data=DT_Sub1,verbose=FALSE) 
summary(fitUS)

Unnamed: 0,Yield_blup
IA_2012:strain,494
IA_2013:IA_2012:strain,988
IA_2013:strain,494
IL_2012:IA_2012:strain,988
IL_2012:IA_2013:strain,988
IL_2012:strain,494

Unnamed: 0_level_0,VarComp,VarCompSE,Zratio,Constraint
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>
IA_2012:strain.Yield_blup-Yield_blup,27496.322,17108.41,1.6071816,Positive
IA_2013:IA_2012:strain.Yield_blup-Yield_blup,9695.035,17110.28,0.5666205,Unconstr
IA_2013:strain.Yield_blup-Yield_blup,88862.509,33186.87,2.6776402,Positive
IL_2012:IA_2012:strain.Yield_blup-Yield_blup,23197.485,17989.24,1.2895199,Unconstr
IL_2012:IA_2013:strain.Yield_blup-Yield_blup,-69692.222,26894.77,-2.5912923,Unconstr
IL_2012:strain.Yield_blup-Yield_blup,95836.031,34748.23,2.7580122,Positive
units.Yield_blup-Yield_blup,342784.165,13207.12,25.9544898,Positive

Trait,Effect,Estimate,Std.Error,t.value
<fct>,<fct>,<dbl>,<dbl>,<dbl>
Yield_blup,environIA_2012,3139.304,27.37833,114.66383
Yield_blup,environIA_2013,2776.395,28.61196,97.03615
Yield_blup,environIL_2012,3643.257,28.7032,126.92861

Unnamed: 0_level_0,logLik,AIC,BIC,Method,Converge
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>,<lgl>
Value,-515.6439,1037.288,1053.191,NR,True


In [19]:

#m <- model.matrix(~ environ-1 ,data=DT_Sub1)
#m_beta <- m %*% as.numeric(fitUS$Beta[,3]) 
#PredUS <- m_beta+fitUS$U$`u:environ:strain`$Yield_blup
#cor(PredUS,DT_Sub1[,"Yield_blup"]) 


In [78]:
names(fitUS$U)
envNames <- levels(factor(DT_Sub1$environ))
env1Ind <- c(1,3,6)
U_envStrain <- list()
  for(i in 1:length(envNames)){
       envInd <-  grep(envNames[i],names(fitUS$U))
       U_envStrain[[i]] <-  as.numeric(fitUS$U[[env1Ind[i]]]$Yield_blup)
     for(j  in 2:length(envInd)){ 
         indJ <- envInd[j]
         b <- cbind(names(fitUS$U[[indJ]]$Yield_blup),fitUS$U[[indJ]]$Yield_blup)
         colnames(b) <- c("strain","Yield_blup")
         b_group <- as_tibble(b) %>% group_by(strain)
         YldBlup_group <- b_group %>% summarise(Yield_blup = sum(as.numeric(Yield_blup)))
         U_envStrain[[i]] <- U_envStrain[[i]] +YldBlup_group[,2] + fitUS$Beta[i,3]
       } 
     }
    

lapply(U_envStrain,function(x) length(x[[1]]))
lapply(U_envStrain,summary)
PredUS <- c(unlist(U_envStrain))
        
indES <-  sort.int(as.numeric(DT_Sub1[,"environ"]),decreasing=FALSE,index.return=TRUE)[[2]]
DT_Sub1[1:10,"environ"]
DT_Sub1[indES[tstIndices3][1:10],"environ"]
length(PredUS)                         
cor(PredUS[indES[tstIndices3]],DT_Sub1[indES[tstIndices3],"Yield_blup"]) 



[[1]]
   Yield_blup  
 Min.   :5768  
 1st Qu.:6123  
 Median :6236  
 Mean   :6256  
 3rd Qu.:6394  
 Max.   :6822  

[[2]]
   Yield_blup  
 Min.   :4961  
 1st Qu.:5431  
 Median :5592  
 Mean   :5582  
 3rd Qu.:5740  
 Max.   :6021  

[[3]]
   Yield_blup  
 Min.   :6603  
 1st Qu.:7093  
 Median :7283  
 Mean   :7265  
 3rd Qu.:7425  
 Max.   :7990  


### **Exercise - Predict performance of tested and untested genotypes in tested and untested environments** ###

#### **Tested Genotypes in Untested Environment**

In [21]:
### Remove lines from IA2013 and train the model using IA2012 and IL2013 only and predict 
### performance of lines for IA2013 (untested environ) and compare accuracy with model 
### incorporating data from IA2013 in the training model 

tstIndices1 <- which(DT_Sub1[,"environ"] %in% "IA_2013") 

DT_Sub1A <- DT_Sub1
DT_Sub1A[tstIndices1 ,"Yeild_blup"] <- NA
#DT_Sub1A[tstIndices1 ,"environ"] <- NA

dim(DT_Sub1A)

In [22]:

E <- diag(length(unique(DT_Sub1A$environ)))
rownames(E) <- colnames(E) <- unique(DT_Sub1A$environ)
dim(E)

EA <- kronecker(E,A_Sub1, make.dimnames = TRUE)
DT_Sub1$Aenviron <- as.factor(DT_Sub1A$environ)
DT_Sub1A$strain <- as.factor(DT_Sub1A$strain) 

dim(EA)

In [23]:
fitDG1A <- mmer(Yield_blup~environ-1,
              random=~vs(ds(environ),strain,Gu=A_Sub1),
              rcov=~units,
              data=DT_Sub1A,verbose=FALSE)

summary(fitDG1A)


Unnamed: 0,Yield_blup
IA_2012:strain,494
IA_2013:strain,494
IL_2012:strain,494

Unnamed: 0_level_0,VarComp,VarCompSE,Zratio,Constraint
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>
IA_2012:strain.Yield_blup-Yield_blup,21787.27,15810.61,1.378016,Positive
IA_2013:strain.Yield_blup-Yield_blup,95090.5,35633.2,2.668593,Positive
IL_2012:strain.Yield_blup-Yield_blup,91532.63,34818.99,2.628814,Positive
units.Yield_blup-Yield_blup,342872.02,13262.48,25.852781,Positive

Trait,Effect,Estimate,Std.Error,t.value
<fct>,<fct>,<dbl>,<dbl>,<dbl>
Yield_blup,environIA_2012,3137.698,27.36345,114.66749
Yield_blup,environIA_2013,2777.267,29.06154,95.56506
Yield_blup,environIL_2012,3636.143,29.0003,125.38295

Unnamed: 0_level_0,logLik,AIC,BIC,Method,Converge
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>,<lgl>
Value,-521.5774,1049.155,1065.058,NR,True


In [24]:
names(fitDG1A$U)

In [25]:

m2 <- cbind(c(rep(1,nrow(DT_Sub1A)/3),rep(0,2*nrow(DT_Sub1A)/3)),c(rep(1,nrow(DT_Sub1A)/3),rep(0,nrow(DT_Sub1A)/3),rep(1,nrow(DT_Sub1A)/3)),
c(rep(1,nrow(DT_Sub1A)/3),rep(1,nrow(DT_Sub1A)/3),rep(0,nrow(DT_Sub1A)/3)))
dim(m2)

m_beta <- m2 %*% as.numeric(fitDG1A$Beta[,3]) 
length(m_beta)
m_env_strain <- do.call(cbind,lapply(fitDG1A$U,function(x) x$Yield_blup))
dim(m_env_strain)
envStrain_blup <- c(m_env_strain)                        
                        

length(envStrain_blup)

PredDG1A <- m_beta+envStrain_blup

indES <-  sort.int(as.numeric(DT_Sub1A[,"environ"]),decreasing=FALSE,index.return=TRUE)[[2]]
DT_Sub1A[1:10,"environ"]
DT_Sub1A[indES[1:10],"environ"]
length(PredDG1A)                         
                                     
cor(PredDG1A,DT_Sub1A[indES,"Yield_blup"]) 
                                     

0
-0.09042033


In [26]:
m <- model.matrix(~ environ-1 ,data=DT_Sub1)
m_beta <- m %*% as.numeric(fitDG1A$Beta[,3]) 
tstPred1 <- (m_beta+fitDG1A$U$`u:environ:strain`$Yield_blup)[tstIndices1]
length(tstPred1)
cor(tstPred1,DT_Sub1[tstIndices1,"Yield_blup"])


In [27]:
fitCS1A <- mmer(Yield_blup~environ-1,
              random= ~ vs(strain, Gu=A_Sub1) + vs(environ:strain, Gu=EA),
              rcov= ~ units,
              data=DT_Sub1A, verbose = FALSE)
summary(fitCS1A)


Unnamed: 0,Yield_blup
u:strain,494
u:environ:strain,1482

Unnamed: 0_level_0,VarComp,VarCompSE,Zratio,Constraint
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>
u:strain.Yield_blup-Yield_blup,0.0,11503.43,0.0,Positive
u:environ:strain.Yield_blup-Yield_blup,65969.28,20118.21,3.279082,Positive
units.Yield_blup-Yield_blup,343674.11,13300.93,25.838345,Positive

Trait,Effect,Estimate,Std.Error,t.value
<fct>,<fct>,<dbl>,<dbl>,<dbl>
Yield_blup,environIA_2012,3146.983,28.54727,110.23762
Yield_blup,environIA_2013,2778.546,28.54727,97.33139
Yield_blup,environIL_2012,3633.347,28.54727,127.27474

Unnamed: 0_level_0,logLik,AIC,BIC,Method,Converge
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>,<lgl>
Value,-523.9647,1053.929,1069.833,NR,True


In [28]:
m <- model.matrix(~ environ-1 ,data=DT_Sub1)
m_beta <- m %*% as.numeric(fitCS1A$Beta[,3]) 
tstPred1 <- (m_beta+fitCS1A$U$`u:environ:strain`$Yield_blup)[tstIndices1]
length(tstPred1)
cor(tstPred1,DT_Sub1[tstIndices1,"Yield_blup"])


#### **Untested Genotypes in Tested Environments** ####

In [29]:

set.seed(125)
tstStrain <- sample(unique(DT_Sub1[,"strain"]),0.2*length(unique(DT_Sub1[,"strain"])))
length(tstStrain)
tstIndices2 <- which(DT_Sub1[,"strain"] %in% tstStrain)
DT_Sub1B <- DT_Sub1
DT_Sub1B[tstIndices2 ,"Yeild_blup"] <- NA
dim(DT_Sub1B)

In [30]:

E <- diag(length(unique(DT_Sub1B$environ)))
rownames(E) <- colnames(E) <- unique(DT_Sub1B$environ)
dim(E)

EA <- kronecker(E,A_Sub1, make.dimnames = TRUE)
DT_Sub1B$environ <- as.factor(DT_Sub1B$environ)
DT_Sub1B$strain <- as.factor(DT_Sub1B$strain) 

dim(EA)

In [31]:
fitCS1B <- mmer(Yield_blup~environ-1,
              random= ~ vs(strain, Gu=A_Sub1) + vs(environ:strain, Gu=EA),
              rcov= ~ units,
              data=DT_Sub1B, verbose = FALSE)
summary(fitCS1B)

Unnamed: 0,Yield_blup
u:strain,494
u:environ:strain,1482

Unnamed: 0_level_0,VarComp,VarCompSE,Zratio,Constraint
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>
u:strain.Yield_blup-Yield_blup,0.0,11503.43,0.0,Positive
u:environ:strain.Yield_blup-Yield_blup,65969.28,20118.21,3.279082,Positive
units.Yield_blup-Yield_blup,343674.11,13300.93,25.838345,Positive

Trait,Effect,Estimate,Std.Error,t.value
<fct>,<fct>,<dbl>,<dbl>,<dbl>
Yield_blup,environIA_2012,3146.983,28.54727,110.23762
Yield_blup,environIA_2013,2778.546,28.54727,97.33139
Yield_blup,environIL_2012,3633.347,28.54727,127.27474

Unnamed: 0_level_0,logLik,AIC,BIC,Method,Converge
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>,<lgl>
Value,-523.9647,1053.929,1069.833,NR,True


In [32]:
m <- model.matrix(~ environ-1 ,data=DT_Sub1)
m_beta <- m %*% as.numeric(fitCS1B$Beta[,3]) 
tstPred2 <- (m_beta+fitCS1B$U$`u:environ:strain`$Yield_blup)[tstIndices2]
length(tstPred2)
cor(tstPred2,DT_Sub1[tstIndices2,"Yield_blup"])


#### **Untested Genotypes in Untested Environment**

In [33]:
set.seed(125)
tstStrain <- sample(unique(DT_Sub1[,"strain"]),0.2*length(unique(DT_Sub1[,"strain"])))
length(tstStrain)
tstIndices2 <- which(DT_Sub1[,"strain"] %in% tstStrain)
DT_Sub1B <- DT_Sub1
DT_Sub1B[tstIndices2 ,"Yeild_blup"] <- NA
dim(DT_Sub1B) 

tstIndices1 <- which(DT_Sub1[,"environ"] %in% "IA_2013") 

tstIndices3 <- intersect(tstIndices2,tstIndices1)
DT_Sub1C <- DT_Sub1B
DT_Sub1C[tstIndices3 ,"Yeild_blup"] <- NA



In [34]:
fitUS1C <- mmer(Yield_blup ~ environ-1,
              random=~vs(us(environ),strain,Gu=A_Sub1),
              rcov=~units,
              data=DT_Sub1C,verbose=FALSE)

summary(fitUS1C)

Unnamed: 0,Yield_blup
IA_2012:strain,494
IA_2013:IA_2012:strain,988
IA_2013:strain,494
IL_2012:IA_2012:strain,988
IL_2012:IA_2013:strain,988
IL_2012:strain,494

Unnamed: 0_level_0,VarComp,VarCompSE,Zratio,Constraint
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>
IA_2012:strain.Yield_blup-Yield_blup,27496.322,17108.41,1.6071816,Positive
IA_2013:IA_2012:strain.Yield_blup-Yield_blup,9695.035,17110.28,0.5666205,Unconstr
IA_2013:strain.Yield_blup-Yield_blup,88862.509,33186.87,2.6776402,Positive
IL_2012:IA_2012:strain.Yield_blup-Yield_blup,23197.485,17989.24,1.2895199,Unconstr
IL_2012:IA_2013:strain.Yield_blup-Yield_blup,-69692.222,26894.77,-2.5912923,Unconstr
IL_2012:strain.Yield_blup-Yield_blup,95836.031,34748.23,2.7580122,Positive
units.Yield_blup-Yield_blup,342784.165,13207.12,25.9544898,Positive

Trait,Effect,Estimate,Std.Error,t.value
<fct>,<fct>,<dbl>,<dbl>,<dbl>
Yield_blup,environIA_2012,3139.304,27.37833,114.66383
Yield_blup,environIA_2013,2776.395,28.61196,97.03615
Yield_blup,environIL_2012,3643.257,28.7032,126.92861

Unnamed: 0_level_0,logLik,AIC,BIC,Method,Converge
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>,<lgl>
Value,-515.6439,1037.288,1053.191,NR,True


In [35]:
ls()

In [36]:
unsBLUP <- function(blups){
  l <- unlist(lapply(blups,function(x){length(x[[1]])}))
  lmin <- min(l); lmax <- max(l)
  indexCov1 <- 1:lmin
  indexCov2 <- (lmin+1):lmax
  ntraits <- length(blups[[1]])
  # blups follow the order of a lower triangula matrix
  # (n*(n-1))/2 = l
  # l*2 = n2 - n
  # n2 = l*2 - n
  n <- 1:100
  possibilities <- ((n*(n-1))/2) + n
  ntrue <- n[which(possibilities == length(l))]
  ## index to know how to add them up
  base <- matrix(NA,ntrue,ntrue)
  base[lower.tri(base, diag=TRUE)] <- 1:length(l)
  index <- which(!is.na(base), arr.ind = TRUE)
  index <- index[order(index[,1]), ]
  
  
  for(i in 1:ntrue){ # for each main blup
    main <- which(index[,1] == i & index[,2] == i, arr.ind = TRUE)
    cov1 <- which(index[,1] == i & index[,2] != i, arr.ind = TRUE)
    cov2 <- which(index[,1] != i & index[,2] == i, arr.ind = TRUE)
    for(itrait in 1:ntraits){
      start <- blups[[main]][[itrait]]  
      for(icov1 in cov1){
        start <- start + blups[[icov1]][[itrait]][indexCov1]
      }
      for(icov2 in cov2){
        start <- start + blups[[icov2]][[itrait]][indexCov2]
      }
      # store adjusted blup adding covariance effects in the same structure
      blups[[main]][[itrait]] <- start
    }
  }
  return(blups)
}


In [45]:
unsBLUP.adj <- unsBLUP(fitUS1C$U[1:6])
summary(unsBLUP.adj)
length(unsBLUP.adj)
lapply(unsBLUP.adj,function(x) summary(x[[1]]))
lapply(fitUS1C$U,function(x) summary(x[[1]]))
       


                       Length Class  Mode
IA_2012:strain         1      -none- list
IA_2013:IA_2012:strain 1      -none- list
IA_2013:strain         1      -none- list
IL_2012:IA_2012:strain 1      -none- list
IL_2012:IA_2013:strain 1      -none- list
IL_2012:strain         1      -none- list

$`IA_2012:strain`
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-264.231  -73.301  -14.213   -9.368   51.705  288.099 

$`IA_2013:IA_2012:strain`
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-87.5527 -20.6533  -1.1400  -0.7297  18.9677  80.5767 

$`IA_2013:strain`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-449.58 -113.20   29.67   20.51  150.08  408.02 

$`IL_2012:IA_2012:strain`
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-209.489  -54.998   -5.697   -5.018   44.858  192.797 

$`IL_2012:IA_2013:strain`
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-480.367 -130.110  -13.772   -7.158  110.909  521.530 

$`IL_2012:strain`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-556.24 -163.85  -38.04  -29.81  113.98  546.08 


$`IA_2012:strain`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-248.31  -80.88  -15.79  -10.84   60.79  228.53 

$`IA_2013:IA_2012:strain`
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-87.5527 -20.6533  -1.1400  -0.7297  18.9677  80.5767 

$`IA_2013:strain`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-664.99 -115.51   24.34   21.66  164.37  528.20 

$`IL_2012:IA_2012:strain`
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-209.489  -54.998   -5.697   -5.018   44.858  192.797 

$`IL_2012:IA_2013:strain`
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-480.367 -130.110  -13.772   -7.158  110.909  521.530 

$`IL_2012:strain`
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-702.181 -169.649   12.860   -3.678  180.908  660.569 


#### **Discussion** ####

In [51]:
summary(fitUS1C$U$`IA_2013:IA_2012:strain`$Yield_blup)

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-87.5527 -20.6533  -1.1400  -0.7297  18.9677  80.5767 

In [None]:
  b <- fitUS1C$U[[j]]$Yield_blup

In [73]:

j <- 2 
b <- cbind(names(fitUS1C$U[[j]]$Yield_blup),fitUS1C$U[[j]]$Yield_blup)
         colnames(b) <- c("strain","Yield_blup")
         b_group <- as_tibble(b) %>% group_by(strain)
        dim(b_group)
        b_group[1:5,]
         YldBlup_group <- b_group %>% summarise(Yield_blup = sum(as.numeric(Yield_blup)))
        

strain,Yield_blup
<chr>,<chr>
DS11-02003,-42.5794143903061
DS11-02005,-39.4872288477985
DS11-02006,-17.6516449194388
DS11-02007,12.6482888794692
DS11-02014,-19.9116932695286


In [76]:
envNames <- levels(factor(DT_Sub1C$environ))
env1Ind <- c(1,3,6)
U_envStrain <- list()
  for(i in 1:length(envNames)){
    # envInd <- which(names(fitUS1C$U) %in% envNames[i])
      envInd <-  grep(envNames[i],names(fitUS1C$U))
       U_envStrain[[i]] <-  as.numeric(fitUS1C$U[[env1Ind[i]]]$Yield_blup)
     for(j  in 2:length(envInd)){ 
         indJ <- envInd[j]
         b <- cbind(names(fitUS1C$U[[indJ]]$Yield_blup),fitUS1C$U[[indJ]]$Yield_blup)
         colnames(b) <- c("strain","Yield_blup")
         b_group <- as_tibble(b) %>% group_by(strain)
         YldBlup_group <- b_group %>% summarise(Yield_blup = sum(as.numeric(Yield_blup)))
         U_envStrain[[i]] <- U_envStrain[[i]] +YldBlup_group[,2] + fitUS1C$Beta[i,3]
       } 
     }
    

 lapply(U_envStrain,function(x) length(x[[1]]))
 lapply(U_envStrain,summary)
 PredUS1C <- c(unlist(U_envStrain))
        
indES <-  sort.int(as.numeric(DT_Sub1C[,"environ"]),decreasing=FALSE,index.return=TRUE)[[2]]
DT_Sub1C[1:10,"environ"]
DT_Sub1C[indES[tstIndices3][1:10],"environ"]
length(PredUS1C)                         
                                     
cor(PredUS1C[indES[tstIndices3]],DT_Sub1[indES[tstIndices3],"Yield_blup"]) 

[[1]]
   Yield_blup  
 Min.   :5768  
 1st Qu.:6123  
 Median :6236  
 Mean   :6256  
 3rd Qu.:6394  
 Max.   :6822  

[[2]]
   Yield_blup  
 Min.   :4961  
 1st Qu.:5431  
 Median :5592  
 Mean   :5582  
 3rd Qu.:5740  
 Max.   :6021  

[[3]]
   Yield_blup  
 Min.   :6603  
 1st Qu.:7093  
 Median :7283  
 Mean   :7265  
 3rd Qu.:7425  
 Max.   :7990  
