In [1]:
library(benchmarkme)
get_platform_info()$OS.type
get_r_version()$version.string
get_cpu()$model_name;get_cpu()$no_of_cores
get_ram()

8.59 GB

In [2]:
##Package was modified to allow complete cross mating
#library("devtools")
#install_github("gglinzijie/xbreed")
library("xbreed")

("|-----------------------------------------------------|")
("|                      xbreed                         |")
("|    Genomic simulation of purebreds and crossbreds   |")
("|               March 2017 Version 1.0.1              |")
("|                                                     |")
("|             H.Esfandyari,A.C.Sorensen               |")
("| Center for Quantitative Qenetics and Genomics (QGG) |")
("|             Aarhus University,Denmark               |")
("|                                                     |")
("|-----------------------------------------------------|")
("|Questions and bugs: esfandyari.hadi@gmail.com        |")
("|Development of xbreed was supported by GenSAP.       |")
("|-----------------------------------------------------|")


# Genome specification

In [6]:
#Number of markers per chr from chapter 1
lin_map<-read.table("raw.map")
m=1:22
for(i in 1:22) {m[i]=dim(lin_map[lin_map$V1==i,])[1]}
sum(m)

In [4]:
#data from chaptre 1
geno<-read.table("Hetero_realigned_cov10_filtered3.raw")
pheno<-read.csv("2017heteroPheno.csv", header=T)
attach(pheno)

In [5]:
#parameter of genome
no.chr<-22
genome<-data.frame(matrix(NA, nrow=no.chr, ncol=6))
names(genome)<-c("chr","len","nmrk","mpos","nqtl","qpos")
genome$chr<-c(1:no.chr) #Chromosome id from 1 to 22
genome$len<-c(200,rep(100,21))#Chromosome length in cM
genome$nmrk<-c(3*m) #Number of markers, 3*3928 in total 
genome$mpos<-c('even') 
genome$nqtl<-c(40) #Number of qtl  40*22 = 880 in total
genome$qpos<-c('rnd')

# Historiacal population

In [6]:
#Historical population 
hp<-make_hp(hpsize=1000 ,ng=5000,h2=0.654,d2=0,phen_var=84,
            genome=genome,mutr=2.5*10**-4,laf=1)

---sel_seq_qtl is missing, it has been set to default value of 0
---sel_seq_mrk is missing, it has been set to default value of 0
Historical pop is initialized...
Simulating trait ...
Output data preparation ...
Establishment of historical population completed


In [7]:
#validation
mutr<-2.5*10**-4
ne<-1000
k<-2 
Fneu<-4*ne*mutr
(Expected_het1<-1-((1+((Fneu)/(k-1)))/(1+((Fneu*k)/(k-1)))))
(het_observed<-mean(2*(hp$freqMrk[,3]*hp$freqMrk[,4])))

In [8]:
Male_founders<-data.frame(number=10,select='rnd')
Female_founders<-data.frame(number=10,select='rnd')

In [9]:
Selection<-data.frame(matrix(NA, nrow=2, ncol=3))
names(Selection)<-c('Number','type','Value') 
Selection$Number[1:2]<-c(10,10)
Selection$type[1:2]<-c('rnd','rnd')
Selection$Value[1:2]<-c('l','l')

In [10]:
sh_output<-data.frame(matrix(NA, nrow=1, ncol=4))
names(sh_output)<-c("data","qtl","freq_mrk","marker")
sh_output[1]<-c(1) 
sh_output[2]<-c(1) 
sh_output[3]<-c(1)
sh_output[4]<-c(1)

In [11]:
#10 sires and 10 dams perform complete cross; 200 eggs were produced per dam.
RP<-sample_hp(hp_out=hp,Male_founders= Male_founders,
              Female_founders=Female_founders,ng=1,Selection=Selection, 
              litter_size=10,saveAt="SNP3928",sh_output=sh_output,Display=FALSE)

Controlling input data ...
Intializing base population ...
Generation 0 started ......... 
Generation 0 is finished. Time taken: 14.86296
Generation 1 started ......... 
Generation 1 is finished. Time taken: 21.10412
Output data preparation ...
Writing output files ...
Sampling hp is done!


In [12]:
#function for calculate the allele coding, which is 0, 1, 2
bin_snp<-function(mat){
s1<-seq(1,ncol(mat),2)
s2<-seq(2,ncol(mat),2)
a1<-mat[,s1]+mat[,s2]
a1[a1==3]=1
a1[a1==4]=0
snp_code<-a1
return(snp_code)
 }

In [13]:
##geno and pheno from simulated data
pheno<-RP$output[[2]]$data$phen
tbvp<-RP$output[[2]]$data$tbvp
n<-bin_snp(RP$output[[2]]$mrk[,3:23570])
x<-as.matrix(n)-1
colnames(x)<-1:(3928*3)
row.names(x)<-1:1000

In [14]:
write.table(x,"2.4_geno.txt")
write.table(pheno,"2.4_pheno.txt")
write.table(tbvp,"2.4_tbvp.txt")

In [1]:
# Simulation result
x<-as.matrix(read.table("2.4_geno.txt"))
pheno<-as.numeric(unlist(read.table("2.4_pheno.txt")))
tbvp<-as.numeric(unlist(read.table("2.4_tbvp.txt")))

In [2]:
##load packages
library(doParallel)
library(foreach)
cl<-makeCluster(8) 
repeats <- 10
n.fold <- 5 
acc<-list()
library(rrBLUP)
packageVersion("rrBLUP") 

Loading required package: foreach
Loading required package: iterators
Loading required package: parallel


[1] '4.6'

In [3]:
cal_se<-function(acc){sd(acc)/sqrt(repeats-1)}

In [4]:
# Function for calculating the accuracy of GP with 
# varying size of reference population and varying denstiy of SNP panel
cal_acc<-function(x,n.sample,n.snp){
set.seed(100)
id<-sample(1:dim(x)[1],n.sample)
snpId<-sample(1:dim(x)[2],n.snp)
#relationship matrix (Endelman at al. 2011)
A <- A.mat(x[id,snpId], n.core=8)
row.names(A)=1:n.sample;colnames(A)=1:n.sample
data <- data.frame(tbcw=pheno[id],tbvp=tbvp[id],gid=1:n.sample)
result<-data.frame(heritability=rep(NA,50),
                   bias_r=rep(NA,50),
                   unbias_r=rep(NA,50),
                   true_r=rep(NA,50))    
registerDoParallel(cl)
foreach(j=1:repeats,.combine = "rbind") %do% {
        set.seed(100+3*j+1)
        id <- sample(1:n.sample %% n.fold) + 1 
        foreach(i=1:n.fold,.packages="rrBLUP") %do% {
         bcw_test <- data
         bcw_test$tbcw[id == i] <- NA
         res <- kin.blup(bcw_test, K=A, geno="gid", pheno="tbcw")
         result[5*(j-1)+i,1]<-res$Vg/(res$Vg+res$Ve)
         result[5*(j-1)+i,2]<-cor(data$tbcw[id==i],res$pred[id==i])
         result[5*(j-1)+i,3]<-cor(data$tbcw[id==i],res$pred[id==i])/sqrt(res$Vg/(res$Vg+res$Ve))
         result[5*(j-1)+i,4]<-cor(data$tbvp[id==i],res$pred[id==i])
    }
   }
stopImplicitCluster()
return(c(apply(result, 2, mean),apply(result,2,cal_se)))    
}

In [7]:
sum_m<-sum(m)
n.snp<-c(round(0.1*sum_m),round(0.2*sum_m), 
             round(0.3*sum_m),round(0.4*sum_m),
             round(0.5*sum_m),round(0.6*sum_m),
             round(0.7*sum_m),round(0.8*sum_m),
             round(0.9*sum_m),sum_m,
             2*sum_m, 3*sum_m)
             
n.sample<-rep(c(500),12)
pair<-data.frame(n.snp=n.snp,n.sample=n.sample)

In [8]:
#implemente the caculation 
all_acc<-matrix(NA,nrow = 8,ncol = length(n.snp))
all_accBA<-all_accBB<-all_accBC<-all_accBL<-all_acc
for (i in 1:length(n.snp)){
    all_acc[,i]<-cal_acc(x,n.sample[i],n.snp[i])}

In [19]:
colnames(all_acc)<-n.snp
row.names(all_acc)<-c("h2","bias_r","unbias_r","true_r","h2_se","bias_r_se","unbias_r_se","true_r_se")

In [10]:
library("BGLR")
packageVersion("BGLR") 

[1] '1.0.5'

In [11]:
# Function for calculating the accuracy of GP with 
# varying size of reference population and varying denstiy of SNP panel
cal_accB<-function(x,n.sample,n.snp,M){
set.seed(100)
id<-sample(1:dim(x)[1],n.sample)
snpId<-sample(1:dim(x)[2],n.snp)
x1<-x[id,snpId]
data <- data.frame(tbcw=pheno[id],tbvp=tbvp[id],gid=1:n.sample)
result<-data.frame(heritability=rep(NA,50),
                   bias_r=rep(NA,50),
                   unbias_r=rep(NA,50),
                   true_r=rep(NA,50))                   
registerDoParallel(cl)
foreach(j=1:repeats,.combine = "rbind") %do% {
        set.seed(100+3*j+1)
        id <- sample(1:n.sample %% n.fold) + 1 
        foreach(i=1:n.fold,.packages="BGLR") %do% {
         bcw_test <- data
         bcw_test$tbcw[id == i] <- NA
         fm=BGLR(y=bcw_test$tbcw,ETA=list(list(X=x1,model=M,saveEffects=T,saveAt='fm_')),nIter=2000,burnIn=1000,verbose=F)
         varU=var(x1%*%fm$ETA[[1]]$b)
         varE=fm$varE
         h2=varU/(varU+varE)
         result[5*(j-1)+i,1]<-h2
         result[5*(j-1)+i,2]<-cor(data$tbcw[id == i],fm$yHat[id == i])
         result[5*(j-1)+i,3]<-cor(data$tbcw[id==i],fm$yHat[id == i])/sqrt(h2)
         result[5*(j-1)+i,4]<-cor(data$tbvp[id==i],fm$yHat[id == i])
    }
   }
stopImplicitCluster()
return(c(apply(result, 2, mean),apply(result,2,cal_se)))    
}

In [12]:
for (i in 1:length(n.snp)){all_accBA[,i]<-cal_accB(x,n.sample[i],n.snp[i],"BayesA")}

In [13]:
for (i in 1:length(n.snp)){all_accBB[,i]<-cal_accB(x,n.sample[i],n.snp[i],"BayesB")}

In [14]:
for (i in 1:length(n.snp)){all_accBC[,i]<-cal_accB(x,n.sample[i],n.snp[i],"BayesC")}

In [None]:
for (i in 1:length(n.snp)){all_accBL[,i]<-cal_accB(x,n.sample[i],n.snp[i],"BL")}

In [16]:
colnames(all_accBA)<-n.snp
colnames(all_accBB)<-n.snp
colnames(all_accBC)<-n.snp
colnames(all_accBL)<-n.snp
row.names(all_accBA)<-c("h2","bias_r","unbias_r","true_r","h2_se","bias_r_se","unbias_r_se","true_r_se")
row.names(all_accBB)<-c("h2","bias_r","unbias_r","true_r","h2_se","bias_r_se","unbias_r_se","true_r_se")
row.names(all_accBC)<-c("h2","bias_r","unbias_r","true_r","h2_se","bias_r_se","unbias_r_se","true_r_se")
row.names(all_accBL)<-c("h2","bias_r","unbias_r","true_r","h2_se","bias_r_se","unbias_r_se","true_r_se")

In [18]:
(sum<-list(GBLUP=all_acc,
                BA=all_accBA,
                BB=all_accBB,
                BC=all_accBC,
                BL=all_accBL))

Unnamed: 0,393,786,1178,1571,1964,2357,2750,3142,3535,3928,7856,11784
h2,0.363310541,0.4497649,0.50848833,0.49554165,0.51455202,0.52579918,0.53275068,0.51051618,0.5112396,0.52808219,0.55792586,0.54972511
bias_r,0.435327191,0.46824804,0.48504048,0.47648301,0.48073862,0.48390766,0.48682878,0.47732866,0.47688085,0.48109928,0.48787441,0.48279206
unbias_r,0.72710882,0.70122527,0.6830168,0.6798345,0.67279724,0.67003669,0.6699476,0.67141752,0.67022667,0.66517193,0.65685898,0.6553533
true_r,0.603680908,0.62532324,0.64376388,0.63969405,0.64593359,0.64612511,0.651059,0.64418767,0.64377149,0.64709544,0.65177439,0.6490659
h2_se,0.009880405,0.01037233,0.01360362,0.01293085,0.01376099,0.01478906,0.01534238,0.01535742,0.0152241,0.01555636,0.01951229,0.02028223
bias_r_se,0.024242081,0.02230301,0.02213494,0.02257555,0.02193064,0.02207663,0.02229069,0.02316927,0.02318196,0.02275252,0.02242564,0.02258238
unbias_r_se,0.047232227,0.03831738,0.03522041,0.0365885,0.03433702,0.03430243,0.03494157,0.03715073,0.03690602,0.03575086,0.03487741,0.035789
true_r_se,0.018344938,0.01885384,0.01762707,0.01836251,0.01783496,0.01797581,0.01765945,0.01821076,0.01853163,0.01785488,0.01727918,0.01730412

Unnamed: 0,393,786,1178,1571,1964,2357,2750,3142,3535,3928,7856,11784
h2,0.261422272,0.33691048,0.38656288,0.38563216,0.39029099,0.41026821,0.41485776,0.40542715,0.40338227,0.43397677,0.48590287,0.490152
bias_r,0.43352809,0.46597317,0.48993733,0.47962319,0.48140242,0.48641454,0.49000986,0.47867546,0.47619632,0.4815313,0.49087029,0.48662654
unbias_r,0.856964219,0.80994087,0.79637977,0.78169169,0.77780616,0.76438712,0.77006558,0.76199017,0.76129825,0.7390152,0.70974282,0.7009957
true_r,0.59902358,0.6192568,0.64085469,0.63740978,0.64284538,0.64572965,0.65079259,0.64328962,0.64262656,0.64633091,0.65125759,0.65104441
h2_se,0.009123414,0.01289417,0.01628844,0.01957944,0.01851311,0.01721825,0.02148388,0.02105453,0.024498,0.022053,0.023159,0.02173439
bias_r_se,0.024104258,0.02199523,0.02156292,0.02253154,0.0220442,0.02245592,0.0220759,0.0220587,0.02344797,0.02258351,0.0217471,0.02296211
unbias_r_se,0.059462784,0.04786029,0.04685663,0.0475517,0.04315605,0.0404359,0.04563594,0.04695656,0.04892665,0.0435327,0.03616681,0.03953214
true_r_se,0.018718606,0.01864124,0.01796454,0.01820245,0.01834277,0.01868864,0.0172635,0.01814282,0.01922628,0.01808371,0.01671852,0.01737887

Unnamed: 0,393,786,1178,1571,1964,2357,2750,3142,3535,3928,7856,11784
h2,0.266753413,0.3314492,0.40115229,0.39079871,0.40225208,0.40669947,0.42976112,0.42357203,0.40650199,0.42045423,0.45618528,0.45101705
bias_r,0.43449349,0.46926378,0.49209898,0.48443387,0.48331787,0.48772538,0.49261601,0.48260829,0.47897299,0.48296699,0.49178513,0.48688875
unbias_r,0.850406855,0.82309614,0.78258488,0.78241769,0.76868121,0.76989612,0.75976429,0.74964415,0.76014894,0.75034777,0.73602637,0.73141142
true_r,0.592078212,0.6159776,0.63367578,0.63413479,0.63984079,0.64113814,0.64856802,0.64406691,0.64114338,0.64592535,0.6528962,0.65102018
h2_se,0.009672289,0.01224396,0.0149645,0.01614697,0.0150731,0.01502973,0.01940411,0.01952781,0.01841774,0.01819044,0.02119979,0.01849331
bias_r_se,0.024007254,0.02207014,0.02153158,0.02233918,0.02147075,0.02196739,0.02126814,0.0224738,0.02336795,0.02328153,0.02186292,0.02171174
unbias_r_se,0.059041799,0.05062534,0.04188547,0.04611026,0.04338203,0.04114575,0.04446837,0.04484918,0.04877351,0.04180863,0.04229817,0.04119455
true_r_se,0.019653163,0.01885724,0.01821224,0.01959548,0.01906572,0.01861276,0.01753673,0.01847607,0.01925085,0.01895782,0.01707054,0.01651115

Unnamed: 0,393,786,1178,1571,1964,2357,2750,3142,3535,3928,7856,11784
h2,0.271493254,0.337377075,0.38637825,0.37046251,0.38850086,0.39095005,0.39810183,0.38481682,0.3846151,0.39148989,0.42744127,0.42084807
bias_r,0.436332112,0.470455932,0.49379882,0.47937379,0.48174177,0.48437185,0.48914214,0.47970694,0.47832631,0.48211181,0.489582,0.48258365
unbias_r,0.845516441,0.813970995,0.80003156,0.79416384,0.77721055,0.78164944,0.78001,0.78026232,0.77899676,0.77773144,0.75679668,0.75074626
true_r,0.597985291,0.6198091,0.6432175,0.63653582,0.64575183,0.64356433,0.65137362,0.64398012,0.64279086,0.64630398,0.652279,0.64762563
h2_se,0.009140729,0.008068787,0.0132131,0.01395112,0.0140973,0.01554817,0.01297412,0.01542625,0.01716526,0.01578615,0.01867713,0.0211658
bias_r_se,0.024088139,0.022954699,0.02156381,0.02246556,0.02203973,0.02258453,0.02228425,0.02313511,0.02289354,0.02196765,0.02356289,0.02353916
unbias_r_se,0.057447919,0.04605635,0.04309909,0.04603807,0.04050371,0.04566401,0.04236389,0.04681475,0.04650312,0.04519661,0.0467904,0.04219058
true_r_se,0.0189672,0.019025288,0.01740178,0.01820951,0.01812834,0.01867814,0.01780247,0.01827756,0.01851811,0.0173157,0.0178292,0.01813067

Unnamed: 0,393,786,1178,1571,1964,2357,2750,3142,3535,3928,7856,11784
h2,0.24646461,0.30277414,0.34755792,0.3352723,0.35307615,0.35579692,0.36955515,0.35515848,0.34962798,0.3691204,0.4356515,0.4597096
bias_r,0.43483008,0.47023219,0.48754319,0.47711396,0.47913709,0.48142069,0.4888953,0.47767693,0.47693143,0.4822236,0.48924243,0.48683691
unbias_r,0.88982503,0.86437245,0.83583826,0.83291256,0.82002798,0.81565231,0.81498614,0.81184018,0.81880166,0.80279333,0.74868839,0.72774985
true_r,0.59921738,0.62097851,0.639297,0.63657584,0.64196983,0.64257937,0.64970584,0.64369956,0.64136225,0.64645369,0.65196534,0.64954343
h2_se,0.01127377,0.01197376,0.01645107,0.01623301,0.02114777,0.0181482,0.01924497,0.01841056,0.01975917,0.01821738,0.02142664,0.02436858
bias_r_se,0.02441423,0.02221192,0.0222565,0.02289488,0.02183776,0.02282409,0.02180984,0.0235124,0.02361574,0.02267446,0.02163194,0.02176317
unbias_r_se,0.06655572,0.05494118,0.04883836,0.04941354,0.05216961,0.04695764,0.04882537,0.05117838,0.05381656,0.04728181,0.04148848,0.04422894
true_r_se,0.01888144,0.01918698,0.01807389,0.01872343,0.01832461,0.01844652,0.01683831,0.01796367,0.01929765,0.0177971,0.01704868,0.01658078


In [20]:
#save to xlsx file
library("xlsx")
write.xlsx(sum, "2_4_result.xlsx") 

In [None]:
library("readxl")
plot<-read_excel("2_4_result.xlsx")