diff --git a/DESCRIPTION b/DESCRIPTION index 515b858..1734035 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,18 +1,18 @@ Package: CrypticIBDcheck Type: Package -Title: Identifying cryptic relatedness in genetic association studies -Version: 0.3-1 -Date: 2012-04-14 +Title: Identifying Cryptic Relatedness in Genetic Association Studies +Version: 0.3-3 +Date: 2018-05-14 Author: Annick Joelle Nembot-Simo, Jinko Graham and Brad McNeney Maintainer: Brad McNeney Depends: R (>= 2.14.0), rJPSGCS (>= 0.2-5), car (>= 2.0-9), ellipse (>= 0.3-5) -Imports: chopsticks +Imports: chopsticks, methods Suggests: parallel Description: Exploratory tools to identify closely related subjects using autosomal genetic marker data. License: GPL-3 -LazyData: yes -Packaged: 2013-09-20 21:38:23 UTC; mcneney +LazyData: true NeedsCompilation: yes +Packaged: 2018-05-17 08:25:12 UTC; mcneney Repository: CRAN -Date/Publication: 2013-09-21 08:05:29 +Date/Publication: 2018-05-17 09:01:55 UTC diff --git a/MD5 b/MD5 index b80b5ed..aed69dd 100644 --- a/MD5 +++ b/MD5 @@ -1,43 +1,45 @@ -c9e1369b3052a28ab6e5c29871f59a9f *DESCRIPTION -ae6f75a3bfe1fb6b19e7df3326d80a04 *NAMESPACE +cb5dc2fed06a71aadba624d7765f5447 *DESCRIPTION +78bad058e0b14992204d55dccccfe2d4 *NAMESPACE c167da9c7d2348be9e64e76300fc03cd *R/IBD-class.R 5323a70bc518f476eceec57898a586be *R/IBDcheck.R -1744013d2d0a486bfa17a149290a6f6a *R/SNPgenmap.R +f1e89194f810ed51af67a6f3fab55ba7 *R/SNPgenmap.R 3e252d6126e262418d5ded38ad4c1cc5 *R/cdlIBS.R -403d47447311294933407f27e2cbeec1 *R/countIBS.R -ca814b6e6587d75f2bd1955bde57fa01 *R/estIBD.R -d099b23b9abe86b9152c21bee57d114c *R/filters.R +57447c03248b5786e8cc4edb67ce9a0f *R/countIBS.R +98150553fa3605aab1f6d34c8d89a057 *R/estIBD.R +227d208765f76d1c14b6eb9cc7194e96 *R/filters.R 5ba5635674011597fa380a74af480be3 *R/plotIBD.R -cfc9f31f8fb4efeb134ce813576016c9 *R/simIBD.R +01a917e915046457a52bfea0b1300534 *R/simIBD.R 027e232095609bfdc25372ee00aca23f *R/snpinfos.R 5f437b99811e93a818b3aa3b3db26647 *R/zzz.R -70188af9e6c3778de620efc00f369473 *build/vignette.rds +8864223892c8609e62161f48b2f873a5 *build/vignette.rds f8e6a03a523edea22a42cafd23765630 *data/Nhlsim.RData 56867363d807115eef7760673f3803b7 *data/RutgersMapB36.RData +a93d8e49de436746a32b274041e0ee80 *data/datalist fe946fb4d7ddb1edcc37e987c7899ee4 *inst/CITATION -baa6acd62946cf31338b348bc713eba5 *inst/doc/CrypticIBDcheck.Rnw -33ffb287358af405c9f9b55345a9ee78 *inst/doc/CrypticIBDcheck.pdf +28c888e13e4f8bce5e261251437c11b5 *inst/doc/CrypticIBDcheck.Rnw +fe609367f79f3696faddecb6a973373f *inst/doc/CrypticIBDcheck.pdf 293d211d4c739711fee2b0b4ce0606e5 *inst/doc/IBDcheck-hapmap.R ac6a847d6b4918ee9d88fdec8b0c7a5c *inst/doc/IBDcheck-hapmap.Rnw -62248e95904c151e349c87984e30a9fe *inst/doc/IBDcheck-hapmap.pdf +9454119f3882881e210a8887f8f2a61a *inst/doc/IBDcheck-hapmap.pdf 2f5dbbbd1d759d8467a3a1c60be01650 *inst/scripts/thin.R 00ab1ba102fc885fb385a2046dfe3b8b *man/CrypticIBDcheck-package.Rd 4bbecb413eb3b22244ecae0b0acd7add *man/IBD.Rd -9429978fc495e28e80a813cd00883201 *man/IBDcheck.Rd +7308e6838e402ba0559cac1464d2ef98 *man/IBDcheck.Rd cd88caf8b810aa62626930aa61979ea7 *man/Nhlsim.Rd -c48a8e94ab43dd4b25927b802d4528c7 *man/RutgersMapB36.Rd -7390c26f0ef433e7d654a176b32355b9 *man/SNPgenmap.Rd +9c8935d02b3295f628fe56e7328fdb12 *man/RutgersMapB36.Rd +bb043bb68cf677890f42da44e1c54917 *man/SNPgenmap.Rd 4789438d1e62867b99a57b562d4e6d9f *man/countIBS.Rd c19f88ffa790c88b3295d8af31399fcf *man/filter.control.Rd c992fd8d496636234b9f3a1fdae59822 *man/new.IBD.Rd e7599dc866e1a087bcfd4bb49d32332f *man/plot.IBD.Rd -bfac2693c606256f19dda0fab87e7240 *man/sim.control.Rd +175df0713b31c118aa9bc4daee521f1e *man/sim.control.Rd 079411548301c4221142b6f234130acb *src/IBDest.sim.c 49d5b5b0238d4cf21dc8a58857c29ff4 *src/IBDest.study.c e4fdd2a3e75e08a61eb714e9c6a8c3cf *src/IBDestp.c 714573746c49f96028fd59ee5f4e494f *src/IBDestp.h -39a289061f81ca5107c75432f4dfc9b5 *src/countIBS.c -baa6acd62946cf31338b348bc713eba5 *vignettes/CrypticIBDcheck.Rnw +8263108717ce545678a08096c16325d6 *src/countIBS.c +0d5bc11a2ab048216f59c6cbe01078c0 *src/init.c +28c888e13e4f8bce5e261251437c11b5 *vignettes/CrypticIBDcheck.Rnw ac6a847d6b4918ee9d88fdec8b0c7a5c *vignettes/IBDcheck-hapmap.Rnw 4a6038abb1810b39143cd14f24c058f8 *vignettes/IBDcheck-hapmap.bib 1f5aeb0993bbc807aa59c56181a9efa5 *vignettes/fig005_MZ.pdf diff --git a/NAMESPACE b/NAMESPACE index f7f771c..9125223 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,12 @@ export(countIBS,IBDcheck,new.IBD,IBD,SNPgenmap,filter.control,sim.control) +importFrom("grDevices", "devAskNewPage") +importFrom("graphics", "lines", "plot", "title") +importFrom("methods", "is", "new") +importFrom("stats", "approxfun", "cor", "pnorm", "qchisq", "quantile", + "rbinom", "sd", "var") +importFrom("utils", "flush.console") +importFrom("rJPSGCS","GeneDrops","read.pedfile","write.pedfile","write.parfile","FitGMLD") +importFrom("car","showLabels") +importFrom("ellipse","ellipse") S3method(plot,IBD) +useDynLib("CrypticIBDcheck", .registration = TRUE) diff --git a/R/SNPgenmap.R b/R/SNPgenmap.R index af59c92..f3b0b43 100755 --- a/R/SNPgenmap.R +++ b/R/SNPgenmap.R @@ -1,8 +1,8 @@ SNPgenmap <- function(physmap, chromosomes) { genmap <- rep(NA, length(chromosomes)) for (chr in 1:22) { - physpos <- RutgersMapB36[[paste("chr", chr, sep="")]]$Build36_map_physical_position - genmappos<-RutgersMapB36[[paste("chr", chr, sep="")]]$Sex.averaged_map_position + physpos <- CrypticIBDcheck::RutgersMapB36[[paste0("chr", chr)]]$Build36_map_physical_position + genmappos<-CrypticIBDcheck::RutgersMapB36[[paste0("chr", chr)]]$Sex.averaged_map_position chrmap <- approxfun(physpos,genmappos) ind <- which(chromosomes == chr) genmap[ind] <- chrmap(physmap[ind]) diff --git a/R/countIBS.R b/R/countIBS.R index 04a527e..cec7797 100644 --- a/R/countIBS.R +++ b/R/countIBS.R @@ -5,8 +5,8 @@ countIBS <- function(x) { mni0=matrix(0,n,n) #matrix of count of snps with IBS=0 for pairs of individuals mni1=matrix(0,n,n) mni2=matrix(0,n,n) - out <- .C("countIBS", t(x@.Data), as.integer(n), as.integer(ncol(x)), - as.integer(mni0), as.integer(mni1), as.integer(mni2), PACKAGE="CrypticIBDcheck") + out <- .C(count_IBS, t(x@.Data), as.integer(n), as.integer(ncol(x)), + as.integer(mni0), as.integer(mni1), as.integer(mni2)) out[[4]] <- matrix(out[[4]], nrow=n, byrow=TRUE) out[[5]] <- matrix(out[[5]], nrow=n, byrow=TRUE) out[[6]] <- matrix(out[[6]], nrow=n, byrow=TRUE) diff --git a/R/estIBD.R b/R/estIBD.R index 6648701..3ddf764 100755 --- a/R/estIBD.R +++ b/R/estIBD.R @@ -5,11 +5,11 @@ IBDest.study <- function(snpobjtr, cdlibs) { excl<-is.na(cdlibs[,1]) snpobjtr<-snpobjtr[,!excl] cdlibs<-cdlibs[!excl,] - out <- .Call("IBDest_study", t(snpobjtr@.Data), + out <- .Call(IBDest_study, t(snpobjtr@.Data), nrow(snpobjtr), ncol(snpobjtr), t(cdlibs), - new.env(), PACKAGE="CrypticIBDcheck") + new.env()) pz0 <- matrix(out[[1]], nrow(snpobjtr), nrow(snpobjtr), byrow=TRUE) pz1 <- matrix(out[[2]], nrow(snpobjtr), nrow(snpobjtr), byrow=TRUE) pz2 <- matrix(out[[3]], nrow(snpobjtr), nrow(snpobjtr), byrow=TRUE) @@ -28,11 +28,11 @@ IBDest.sim <- function(snpmat, cdlibs) { excl<-is.na(cdlibs[,1]) snpmat<-snpmat[,!excl] cdlibs<-cdlibs[!excl,] - out <- .Call("IBDest_sim", t(snpmat@.Data), + out <- .Call(IBDest_sim, t(snpmat@.Data), nrow(snpmat)/2, ncol(snpmat), t(cdlibs), - new.env(), PACKAGE="CrypticIBDcheck") + new.env()) names(out) <- c("pz0", "pz1", "pz2") data.frame(out) } diff --git a/R/filters.R b/R/filters.R index ff42988..8fece9a 100755 --- a/R/filters.R +++ b/R/filters.R @@ -1,52 +1,52 @@ -## filter.control is a function to set parameters that control quality control -## filters: snpcallrate, MAF, samplecallrate and HWEp - -filter.control<-function(filter=TRUE,snpcallrate=0.9,MAF=0.01, - samplecallrate=0.9, HWEp=0.001) { - list(filter=filter,snpcallrate=snpcallrate,MAF=MAF, - samplecallrate=samplecallrate, HWEp=HWEp) -} - -################################################################ -snpfilter <- function(snpmatlist1,filter){ - -SNP.support=snpmatlist1$snp.support -SNP.support=remove.cdlIBS(SNP.support) #remove cdlIBS columns, if present -snpobjt=snpmatlist1$snp.data - -sumsnp<-chopsticks::summary(snpobjt) -nbs=ncol(snpobjt) - -#snps call rate test - callr=(sumsnp$Call.rate>=filter$snpcallrate) - snpobjt1<-snpobjt[,(callr)] - SNP.support1<-SNP.support[callr,] - -#MAF test - sumsnp1<-chopsticks::summary(snpobjt1) - maft=(sumsnp1$MAF>=filter$MAF) - snpobjt1<-snpobjt1[,(maft)] - SNP.support1<-SNP.support1[maft,] - -#samples call rate test - callrvt=(row.summary(snpobjt1)$Call.rate>=filter$samplecallrate) - snpobjt1<-snpobjt1[(callrvt),] - snpmatlist1$subject.support<-snpmatlist1$subject.support[(callrvt),] - -#Remove SNPs not found by SNPgenmap (NA genetic location) - ind<-is.na(SNP.support1$Gen_loc) - SNP.support1<-SNP.support1[!ind,] - snpobjt1<-snpobjt1[,(!ind)] - -#Do test of HWE and keep SNPs that pass - vecpval=as.numeric(as.vector(SNP.support1$pvalue_HWE)) - hwvec<-( vecpval>filter$HWEp & !is.na(SNP.support1$pvalue_HWE) ) - - snpmatlist1$snp.data<-snpobjt1[,(hwvec)] - snpmatlist1$snp.support<-SNP.support1[hwvec,] - - - return(snpmatlist1) - -} - +## filter.control is a function to set parameters that control quality control +## filters: snpcallrate, MAF, samplecallrate and HWEp + +filter.control<-function(filter=TRUE,snpcallrate=0.9,MAF=0.01, + samplecallrate=0.9, HWEp=0.001) { + list(filter=filter,snpcallrate=snpcallrate,MAF=MAF, + samplecallrate=samplecallrate, HWEp=HWEp) +} + +################################################################ +snpfilter <- function(snpmatlist1,filter){ + +SNP.support=snpmatlist1$snp.support +SNP.support=remove.cdlIBS(SNP.support) #remove cdlIBS columns, if present +snpobjt=snpmatlist1$snp.data + +sumsnp<-chopsticks::summary(snpobjt) +nbs=ncol(snpobjt) + +#snps call rate test + callr=(sumsnp$Call.rate>=filter$snpcallrate) + snpobjt1<-snpobjt[,(callr)] + SNP.support1<-SNP.support[callr,] + +#MAF test + sumsnp1<-chopsticks::summary(snpobjt1) + maft=(sumsnp1$MAF>=filter$MAF) + snpobjt1<-snpobjt1[,(maft)] + SNP.support1<-SNP.support1[maft,] + +#samples call rate test + callrvt=(chopsticks::row.summary(snpobjt1)$Call.rate>=filter$samplecallrate) + snpobjt1<-snpobjt1[(callrvt),] + snpmatlist1$subject.support<-snpmatlist1$subject.support[(callrvt),] + +#Remove SNPs not found by SNPgenmap (NA genetic location) + ind<-is.na(SNP.support1$Gen_loc) + SNP.support1<-SNP.support1[!ind,] + snpobjt1<-snpobjt1[,(!ind)] + +#Do test of HWE and keep SNPs that pass + vecpval=as.numeric(as.vector(SNP.support1$pvalue_HWE)) + hwvec<-( vecpval>filter$HWEp & !is.na(SNP.support1$pvalue_HWE) ) + + snpmatlist1$snp.data<-snpobjt1[,(hwvec)] + snpmatlist1$snp.support<-SNP.support1[hwvec,] + + + return(snpmatlist1) + +} + diff --git a/R/simIBD.R b/R/simIBD.R index cf597ee..a432d40 100755 --- a/R/simIBD.R +++ b/R/simIBD.R @@ -1,391 +1,389 @@ -sim.control<-function( - simulate=FALSE, # do we want to simulate? - # relationships to simulate -- by default, don't do cousins - rships=c("unrelated","MZtwins","parent-offspring","full-sibs","half-sibs"), - nsim=rep(200,length(rships)), # number of gene drops per rship - userdat=NULL, - geno.err=1/1000, - hom2hom.err=0, - fitLD=TRUE, #fit LD models? - LDfiles=NULL, # list of LD files - cl=NULL) #SNOW cluster object -{ - names(nsim)<-rships - if(!simulate) { - rships<-nsim<-geno.err<-hom2hom.err<-fitLD<-LDfiles<-cl<-NULL - } - list(simulate=simulate,rships=rships,nsim=nsim,userdat=userdat, - geno.err=geno.err, hom2hom.err=hom2hom.err,fitLD=fitLD, - LDfiles=LDfiles,cl=cl) -} - -################################################################ -simIBD=function(snpmatlist,simparams){ - - # Lots of decisions to be made in this function: - # - User wants LD? - # * If yes, do we have to fit LD models? - # But first, since both LD fitting and gene drops are done separately - # for each chromosome. Create a list of data needed for each chrom. - - dat.by.chrom=split.by.chrom(snpmatlist,simparams) - if(simparams$fitLD) { - if(is.null(simparams$LDfiles)) { - # Need to fit the LD models and save in parfiles - GMLDfct(dat.by.chrom,simparams) - } # else do nothing, because LD model files already exist - } else { - # User doesn't want LD model -- write regular parfiles - lapply(dat.by.chrom,parfile.noLD) - } - snpmats<-genedrops(dat.by.chrom,simparams) - return(snpmats) -} - -split.by.chrom=function(snpmatlist,simparams){ - - popsam=snpmatlist$subject.support$popsam - snpobjr=snpmatlist$snp.data[popsam,] - genloc=snpmatlist$snp.support$Gen_loc - chrvec=snpmatlist$snp.support$Chromosome - - # We used to set up two lists: snpobjlr and genlocl. - # Changed recently to set up one list called gdat with all info in it. - # This makes it easier to re-write the code to use a cluster. - gdat<-list(); c.count<-0 - for (i in 1:22){ - if(is.element(i,chrvec)=="TRUE"){ - c.count<-c.count+1 #found another chromosome - rslist=snpobjr[,chrvec==i] - loclist=genloc[chrvec==i] - if(simparams$fitLD) { #parfile for gene drops will include LD model - if(!is.null(simparams$LDfiles)) { - GDparfile=simparams$LDfiles[c.count] - } else { - GDparfile=paste("GD",i,".ld.par",sep="") - } - } else { - GDparfile=paste("GD",i,".par",sep="") - } - gdat[[c.count]]<-list(snp.data=rslist,genmap=loclist,chrom=i, - GDparfile=GDparfile) - } - } - - nl=length(gdat) - #ordering the snps according to genetic map position - for (j in 1:nl){ - snpchr=gdat[[j]]$snp.data - locsnps=as.numeric(as.vector(gdat[[j]]$genmap)) - ordsnps=order(locsnps) - gdat[[j]]$snp.data=snpchr[,ordsnps] - gdat[[j]]$genmap=locsnps[ordsnps] - } - return(gdat) -} - -GMLDfct=function(gdat,simparams){ - - if(is.null(simparams$cl)) { # no cluster: use lapply - unlist(lapply(gdat,parfile.fitLD)) - } else { # can use clusterApplyLB to distribute work over cluster - require(parallel) - unlist(clusterApplyLB(simparams$cl,gdat,parfile.fitLD)) - } -} - -parfile.fitLD<-function(gdat) { - # Use Thomas' FitGMLD to fit an LD model for founder haplotypes. - # Computations become too much of a burden for more than about 100 - # subjects, so subset if necessary. - if(nrow(gdat$snp.data)>100) { - s.ind<-sample(1:nrow(gdat$snp.data)) - gdat$snp.data<-gdat$snp.data[s.ind,] - } - fp<-paste("flipped",gdat$chrom,".ped",sep="") - write.pedfile(pedinf="unrelated",snp.data=gdat$snp.data, - file=fp,transpose=TRUE) - pf<-paste("parfile",gdat$chrom,".par",sep="") - write.parfile(gdat$snp.data,gdat$genmap,file=pf) - FitGMLD(pf,fp,gdat$GDparfile) - unlink(c(pf,fp)) - return(gdat$GDparfile) #Will need to output names of parfiles with LD models -} - -parfile.noLD<-function(gdat) { - # No fitted LD model, so just write a regular LINKAGE parfile. - write.parfile(gdat$snp.data,gdat$genmap,file=gdat$GDparfile) -} - -####################### -### gendrop simulations - -genedrops<-function(gdat,simparams){ - - # Do gene drops for all relationships of interest - ursnpmat<-mzsnpmat<-posnpmat<-fssnpmat<-hssnpmat<-cosnpmat<-usersnpmat<-NULL - if("unrelated" %in% simparams$rships) { - ursnpmat=gendrp(gdat,peddat=ur.peddat(), - simparams$nsim["unrelated"],simparams) - } - if("MZtwins" %in% simparams$rships) { - # Won't do gene drops, just sample subjects and apply genotyping error - mzsnpmat=mztwinsim(gdat, simparams$nsim["MZtwins"],simparams) - } - if("parent-offspring" %in% simparams$rships) { - posnpmat=gendrp(gdat,peddat=po.peddat(), - simparams$nsim["parent-offspring"],simparams) - } - if("full-sibs" %in% simparams$rships) { - fssnpmat=gendrp(gdat,peddat=fs.peddat(), - simparams$nsim["full-sibs"],simparams) - } - if("half-sibs" %in% simparams$rships) { - hssnpmat=gendrp(gdat,peddat=hs.peddat(), - simparams$nsim["half-sibs"],simparams) - } - if("cousins" %in% simparams$rships) { - cosnpmat=gendrp(gdat,peddat=co.peddat(), - simparams$nsim["cousins"],simparams) - } - if("user" %in% simparams$rships) { - usersnpmat=gendrp(gdat,peddat=user.peddat(simparams$userdat), - simparams$nsim["user"],simparams) - } - snpmats=list(mz=mzsnpmat,ur=ursnpmat,po=posnpmat,fs=fssnpmat,hs=hssnpmat,co=cosnpmat,user=usersnpmat, - LDfiles=findLDfiles(gdat))#Store LDfiles for output from simIBD - return(snpmats) -} - -mztwinsim=function(gdat,nreps,simparams){ - if(length(gdat) ==1) { - snpmat<-doTwinsim(gdat[[1]],nreps,simparams) - } else { - snpmatlist<-lapply(gdat,doTwinsim,nreps,simparams) - # cbind results into one big snp.matrix object - snpmat<-Reduce(cbind,snpmatlist) - } - return(snpmat) -} - - - -gendrp=function(gdat,peddat,nreps,simparams){ - - if(is.null(simparams$cl)) { - snpmatlist<-lapply(gdat,doGeneDrops,peddat,nreps,simparams) - } else { - require(parallel) - snpmatlist<-clusterApply(simparams$cl,gdat,doGeneDrops,peddat,nreps,simparams) - } - - # cbind results into one big snp.matrix object - nl=length(snpmatlist) - snpmat<-snpmatlist[[1]] - if(nl > 1){ - for (j in 2:nl){ - snpmat=cbind(snpmat,snpmatlist[[j]]) - } - } - - return(snpmat) -} -doTwinsim<-function(gdat,n,simparams) { - # First draw a sample of n, with replacement, from the study data. - s.inds <- sample(1:nrow(gdat$snp.data),n,replace=TRUE) - # Replicate each sample to make it a pair. - so1<-gdat$snp.data[s.inds,] - row.names(so1)<-as.character(1:n) - so2<-gdat$snp.data[s.inds,] - row.names(so2)<-as.character((n+1):(2*n)) - # Convention for output is all pairmember1's followed by all pairmember2's. - snpout<-rbind(so1,so2) - # Apply genotyping errors - nc<-ncol(gdat$snp.data) - for(i in 1:nrow(snpout)) { - # Sample genotypes to have errors with rate geno.err - bb<-as.logical(rbinom(nc,size=1,prob=simparams$geno.err)) - snpout[i,bb]<-apply.errs(snpout[i,bb],simparams$hom2hom.err) - } - return(snpout) -} - -# Worker function to doGeneDrops to do n gene drops given the pedigree -# information. This function is called by the functions specific to each -# relationship of interest -doGeneDrops<-function(gdat,pedinfo,n,simparams) { - - # We simulate complete data on the pedigree and then impose (i) genotyping - # errors and (ii) missing - # data patterns of randomly sampled pairs of subjects. GeneDrops needs - # a LINKAGE pedfile as input. The pedigree information - # for a regular LINKAGE pedfile (and that's it) is in pedinfo. - # To complete the pedfile, we need to add marker information to this - # pedigree info. As GeneDrops to simulate complete data on all subjects, - # the marker data we pass to GeneDrops is not actually used. Therefore, - # an empty (all missings) snp.matrix object of the right dimension will - # work as marker info. - # The snp.matrix missing data code can be obtained by coercing 0 - # to the data-type "raw" with as.raw(). Use as.raw(0) to create the - # missing data code for snp.matrix objects. To create a snp.matrix object - # of missing values, the command is: - nr<-nrow(pedinfo); nc<-ncol(gdat$snp.data) - sdat<-new("snp.matrix",data=as.raw(0),nrow=nr, ncol=nc, - dimnames=list(as.character(1:nr),as.character(1:nc))) - - - # Create input pedfile - inped<-paste("in",gdat$chrom,".ped",sep="") - write.pedfile(pedinfo,sdat,inped, sep=" ", eol="\n", na="0") - # Do gene drops and save in an output pedfile - outped<-paste("GDout",gdat$chrom,".ped",sep="") - GeneDrops(gdat$GDparfile,inped,n,complete.data=TRUE, outped) - # GeneDrops can fail but not report any error -- the output file may - # not exist. - if(!file.exists(outped)) { - warning("Gene drops failed for chromosome",gdat$chrom,"-- GeneDrops java program \n may have run out of memory and IBD coefficient clusters may not be valid.\n See the Note in the GeneDrops help file for instructions on how to increase java heap space.") - return(NULL) - } - snpout=suppressWarnings(read.pedfile(file=outped,snp.names=colnames(gdat$snp.data))$snp.data) - unlink(inped) - unlink(outped) - row.names(snpout)<-as.character(1:nrow(snpout)) - # Keep just the two subjects of interest, having ID's 1 or 2 (IDs are stored - # in col 2 of pedinfo). Annick want's all ID=1's first, then ID=2's - snpout<-rbind(snpout[pedinfo[,2]==1,],snpout[pedinfo[,2]==2,]) - #pairs<-rep(1:2,n) - #snpout<-rbind(snpout[pairs==1,],snpout[pairs==2,]) - - # Apply genotyping errors - for(i in 1:nrow(snpout)) { - # Sample genotypes to have errors with rate gerr - bb<-as.logical(rbinom(nc,size=1,prob=simparams$geno.err)) - snpout[i,bb]<-apply.errs(snpout[i,bb],simparams$hom2hom.err) - } - - # Select missing data patterns from study subjects and impose these on the - # simulated data. - # First draw a random sample of n pairs from the study data. - s.inds <- sample(1:nrow(gdat$snp.data),2*n,replace=TRUE) - # Now loop over simulated data and impose missingness patterns from sampled - # pairs - for(i in 1:(2*n)) { - # Need to test for the missing value in a snp.matrix object. - miss.ind<-gdat$snp.data[s.inds[i],] == as.raw(0) - snpout[i,miss.ind]<-as.raw(0) - } - - return(snpout) -} - -# Functions to set up pedigree information part of pedfiles -po.peddat<-function() { # Parent-offspring - pedsize <- 3 - pedids <- rep(1,pedsize) - ids <- (1:pedsize) - dadids <- c(0,3,0) - momids <- c(0,1,0) - zeros <- rep(0,pedsize) - genders <- c(2,2,1) - ones <- rep(1,pedsize) #9th column(1 for the starting ind of the pedigree) - peddat <- cbind(pedids,ids,dadids,momids,zeros,zeros,zeros,genders,ones) - return(peddat) -} - -fs.peddat=function(){ # Full sibs - pedsize <- 4 - pedids <- rep(1,pedsize) - ids <- (1:pedsize) - dadids <- c(3,3,0,0) - momids <- c(4,4,0,0) - zeros <- rep(0,pedsize) - genders <- c(2,2,1,2) - ones <- rep(1,pedsize) - peddat <- cbind(pedids,ids,dadids,momids,zeros,zeros,zeros,genders,ones) - return(peddat) -} - -hs.peddat=function(){ # Half sibs - pedsize <- 5 - pedids <- rep(1,pedsize) - ids <- (1:pedsize) - dadids <- c(3,5,0,0,0) - momids <- c(4,4,0,0,0) - zeros <- rep(0,pedsize) - genders <- c(2,2,1,2,1) - ones <- rep(1,pedsize) - peddat <- cbind(pedids,ids,dadids,momids,zeros,zeros,zeros,genders,ones) - return(peddat) -} - -ur.peddat=function(){ # unrelated - pedsize <- 2 - pedids <- rep(1,pedsize) - ids <- (1:pedsize) - dadids <- c(0,0) - momids <- c(0,0) - zeros <- rep(0,pedsize) - genders <- c(2,2) - ones <- rep(1,pedsize) - peddat <- cbind(pedids,ids,dadids,momids,zeros,zeros,zeros,genders,ones) - return(peddat) -} - -co.peddat=function(){ # cousins - pedsize <- 8 - pedids <- rep(1,pedsize) - ids <- (1:pedsize) - dadids <- c(3,6,0,8,8,0,0,0) - momids <- c(4,5,0,7,7,0,0,0) - zeros <- rep(0,pedsize) - genders <- c(2,2,1,2,2,1,2,1) - ones <- rep(1,pedsize) - peddat <- cbind(pedids,ids,dadids,momids,zeros,zeros,zeros,genders,ones) - return(peddat) -} - -user.peddat=function(userdat) { # user-defined - if(is.null(userdat)) { - return(NULL) - } else { - return(cbind(1,userdat$ids,userdat$dadids,userdat$momids,0,0,0, - userdat$gender,1)) - } -} - -apply.errs<-function(x,hom2hom.err) { - # x is a vector of raw's. - x.orig<-x - # homozygotes called heterozygous - x[x.orig==as.raw(1)]<-hom1.err(sum(x.orig==1),hom2hom.err) - x[x.orig==as.raw(2)]<-het.err(sum(x.orig==2)) - x[x.orig==as.raw(3)]<-hom3.err(sum(x.orig==3),hom2hom.err) - return(x) -} - - -hom1.err<-function(n,hom2hom.err) { - # homoz 01 have prob 1-hom2hom.err of being 02 and hom2hom.err of being 03 - return(as.raw(2+rbinom(n,size=1,prob=(1-hom2hom.err)))) -} - -het.err<-function(n) { - # heterozygotes equally likely to be one of two homozygotes - return(as.raw(1+2*rbinom(n,size=1,prob=1/2))) -} - -hom3.err<-function(n,hom2hom.err) { - # homoz 03 have prob hom2hom.err of being 01 and 1-hom2hom.err of being 02 - return(as.raw(1+rbinom(n,size=1,prob=hom2hom.err))) -} - - -findLDfiles<-function(gdat) { - return(unlist(lapply(gdat,getLDfile))) -} - -getLDfile<-function(gdat) { - return(gdat$GDparfile) -} - - +sim.control<-function( + simulate=FALSE, # do we want to simulate? + # relationships to simulate -- by default, don't do cousins + rships=c("unrelated","MZtwins","parent-offspring","full-sibs","half-sibs"), + nsim=rep(200,length(rships)), # number of gene drops per rship + userdat=NULL, + geno.err=1/1000, + hom2hom.err=0, + fitLD=TRUE, #fit LD models? + LDfiles=NULL, # list of LD files + cl=NULL) #SNOW cluster object +{ + names(nsim)<-rships + if(!simulate) { + rships<-nsim<-geno.err<-hom2hom.err<-fitLD<-LDfiles<-cl<-NULL + } + list(simulate=simulate,rships=rships,nsim=nsim,userdat=userdat, + geno.err=geno.err, hom2hom.err=hom2hom.err,fitLD=fitLD, + LDfiles=LDfiles,cl=cl) +} + +################################################################ +simIBD=function(snpmatlist,simparams){ + + # Lots of decisions to be made in this function: + # - User wants LD? + # * If yes, do we have to fit LD models? + # But first, since both LD fitting and gene drops are done separately + # for each chromosome. Create a list of data needed for each chrom. + + dat.by.chrom=split.by.chrom(snpmatlist,simparams) + if(simparams$fitLD) { + if(is.null(simparams$LDfiles)) { + # Need to fit the LD models and save in parfiles + GMLDfct(dat.by.chrom,simparams) + } # else do nothing, because LD model files already exist + } else { + # User doesn't want LD model -- write regular parfiles + lapply(dat.by.chrom,parfile.noLD) + } + snpmats<-genedrops(dat.by.chrom,simparams) + return(snpmats) +} + +split.by.chrom=function(snpmatlist,simparams){ + + popsam=snpmatlist$subject.support$popsam + snpobjr=snpmatlist$snp.data[popsam,] + genloc=snpmatlist$snp.support$Gen_loc + chrvec=snpmatlist$snp.support$Chromosome + + # We used to set up two lists: snpobjlr and genlocl. + # Changed recently to set up one list called gdat with all info in it. + # This makes it easier to re-write the code to use a cluster. + gdat<-list(); c.count<-0 + for (i in 1:22){ + if(is.element(i,chrvec)=="TRUE"){ + c.count<-c.count+1 #found another chromosome + rslist=snpobjr[,chrvec==i] + loclist=genloc[chrvec==i] + if(simparams$fitLD) { #parfile for gene drops will include LD model + if(!is.null(simparams$LDfiles)) { + GDparfile=simparams$LDfiles[c.count] + } else { + GDparfile=paste("GD",i,".ld.par",sep="") + } + } else { + GDparfile=paste("GD",i,".par",sep="") + } + gdat[[c.count]]<-list(snp.data=rslist,genmap=loclist,chrom=i, + GDparfile=GDparfile) + } + } + + nl=length(gdat) + #ordering the snps according to genetic map position + for (j in 1:nl){ + snpchr=gdat[[j]]$snp.data + locsnps=as.numeric(as.vector(gdat[[j]]$genmap)) + ordsnps=order(locsnps) + gdat[[j]]$snp.data=snpchr[,ordsnps] + gdat[[j]]$genmap=locsnps[ordsnps] + } + return(gdat) +} + +GMLDfct=function(gdat,simparams){ + + if(is.null(simparams$cl)) { # no cluster: use lapply + unlist(lapply(gdat,parfile.fitLD)) + } else { # can use clusterApplyLB to distribute work over cluster + unlist(parallel::clusterApplyLB(simparams$cl,gdat,parfile.fitLD)) + } +} + +parfile.fitLD<-function(gdat) { + # Use Thomas' FitGMLD to fit an LD model for founder haplotypes. + # Computations become too much of a burden for more than about 100 + # subjects, so subset if necessary. + if(nrow(gdat$snp.data)>100) { + s.ind<-sample(1:nrow(gdat$snp.data)) + gdat$snp.data<-gdat$snp.data[s.ind,] + } + fp<-paste("flipped",gdat$chrom,".ped",sep="") + write.pedfile(pedinf="unrelated",snp.data=gdat$snp.data, + file=fp,transpose=TRUE) + pf<-paste("parfile",gdat$chrom,".par",sep="") + write.parfile(gdat$snp.data,gdat$genmap,file=pf) + FitGMLD(pf,fp,gdat$GDparfile) + unlink(c(pf,fp)) + return(gdat$GDparfile) #Will need to output names of parfiles with LD models +} + +parfile.noLD<-function(gdat) { + # No fitted LD model, so just write a regular LINKAGE parfile. + write.parfile(gdat$snp.data,gdat$genmap,file=gdat$GDparfile) +} + +####################### +### gendrop simulations + +genedrops<-function(gdat,simparams){ + + # Do gene drops for all relationships of interest + ursnpmat<-mzsnpmat<-posnpmat<-fssnpmat<-hssnpmat<-cosnpmat<-usersnpmat<-NULL + if("unrelated" %in% simparams$rships) { + ursnpmat=gendrp(gdat,peddat=ur.peddat(), + simparams$nsim["unrelated"],simparams) + } + if("MZtwins" %in% simparams$rships) { + # Won't do gene drops, just sample subjects and apply genotyping error + mzsnpmat=mztwinsim(gdat, simparams$nsim["MZtwins"],simparams) + } + if("parent-offspring" %in% simparams$rships) { + posnpmat=gendrp(gdat,peddat=po.peddat(), + simparams$nsim["parent-offspring"],simparams) + } + if("full-sibs" %in% simparams$rships) { + fssnpmat=gendrp(gdat,peddat=fs.peddat(), + simparams$nsim["full-sibs"],simparams) + } + if("half-sibs" %in% simparams$rships) { + hssnpmat=gendrp(gdat,peddat=hs.peddat(), + simparams$nsim["half-sibs"],simparams) + } + if("cousins" %in% simparams$rships) { + cosnpmat=gendrp(gdat,peddat=co.peddat(), + simparams$nsim["cousins"],simparams) + } + if("user" %in% simparams$rships) { + usersnpmat=gendrp(gdat,peddat=user.peddat(simparams$userdat), + simparams$nsim["user"],simparams) + } + snpmats=list(mz=mzsnpmat,ur=ursnpmat,po=posnpmat,fs=fssnpmat,hs=hssnpmat,co=cosnpmat,user=usersnpmat, + LDfiles=findLDfiles(gdat))#Store LDfiles for output from simIBD + return(snpmats) +} + +mztwinsim=function(gdat,nreps,simparams){ + if(length(gdat) ==1) { + snpmat<-doTwinsim(gdat[[1]],nreps,simparams) + } else { + snpmatlist<-lapply(gdat,doTwinsim,nreps,simparams) + # cbind results into one big snp.matrix object + snpmat<-Reduce(cbind,snpmatlist) + } + return(snpmat) +} + + + +gendrp=function(gdat,peddat,nreps,simparams){ + + if(is.null(simparams$cl)) { + snpmatlist<-lapply(gdat,doGeneDrops,peddat,nreps,simparams) + } else { + snpmatlist<-parallel::clusterApply(simparams$cl,gdat,doGeneDrops,peddat,nreps,simparams) + } + + # cbind results into one big snp.matrix object + nl=length(snpmatlist) + snpmat<-snpmatlist[[1]] + if(nl > 1){ + for (j in 2:nl){ + snpmat=cbind(snpmat,snpmatlist[[j]]) + } + } + + return(snpmat) +} +doTwinsim<-function(gdat,n,simparams) { + # First draw a sample of n, with replacement, from the study data. + s.inds <- sample(1:nrow(gdat$snp.data),n,replace=TRUE) + # Replicate each sample to make it a pair. + so1<-gdat$snp.data[s.inds,] + row.names(so1)<-as.character(1:n) + so2<-gdat$snp.data[s.inds,] + row.names(so2)<-as.character((n+1):(2*n)) + # Convention for output is all pairmember1's followed by all pairmember2's. + snpout<-rbind(so1,so2) + # Apply genotyping errors + nc<-ncol(gdat$snp.data) + for(i in 1:nrow(snpout)) { + # Sample genotypes to have errors with rate geno.err + bb<-as.logical(rbinom(nc,size=1,prob=simparams$geno.err)) + snpout[i,bb]<-apply.errs(snpout[i,bb],simparams$hom2hom.err) + } + return(snpout) +} + +# Worker function to doGeneDrops to do n gene drops given the pedigree +# information. This function is called by the functions specific to each +# relationship of interest +doGeneDrops<-function(gdat,pedinfo,n,simparams) { + + # We simulate complete data on the pedigree and then impose (i) genotyping + # errors and (ii) missing + # data patterns of randomly sampled pairs of subjects. GeneDrops needs + # a LINKAGE pedfile as input. The pedigree information + # for a regular LINKAGE pedfile (and that's it) is in pedinfo. + # To complete the pedfile, we need to add marker information to this + # pedigree info. As GeneDrops to simulate complete data on all subjects, + # the marker data we pass to GeneDrops is not actually used. Therefore, + # an empty (all missings) snp.matrix object of the right dimension will + # work as marker info. + # The snp.matrix missing data code can be obtained by coercing 0 + # to the data-type "raw" with as.raw(). Use as.raw(0) to create the + # missing data code for snp.matrix objects. To create a snp.matrix object + # of missing values, the command is: + nr<-nrow(pedinfo); nc<-ncol(gdat$snp.data) + sdat<-new("snp.matrix",data=as.raw(0),nrow=nr, ncol=nc, + dimnames=list(as.character(1:nr),as.character(1:nc))) + + + # Create input pedfile + inped<-paste("in",gdat$chrom,".ped",sep="") + write.pedfile(pedinfo,sdat,inped, sep=" ", eol="\n", na="0") + # Do gene drops and save in an output pedfile + outped<-paste("GDout",gdat$chrom,".ped",sep="") + GeneDrops(gdat$GDparfile,inped,n,complete.data=TRUE, outped) + # GeneDrops can fail but not report any error -- the output file may + # not exist. + if(!file.exists(outped)) { + warning("Gene drops failed for chromosome",gdat$chrom,"-- GeneDrops java program \n may have run out of memory and IBD coefficient clusters may not be valid.\n See the Note in the GeneDrops help file for instructions on how to increase java heap space.") + return(NULL) + } + snpout=suppressWarnings(read.pedfile(file=outped,snp.names=colnames(gdat$snp.data))$snp.data) + unlink(inped) + unlink(outped) + row.names(snpout)<-as.character(1:nrow(snpout)) + # Keep just the two subjects of interest, having ID's 1 or 2 (IDs are stored + # in col 2 of pedinfo). Annick want's all ID=1's first, then ID=2's + snpout<-rbind(snpout[pedinfo[,2]==1,],snpout[pedinfo[,2]==2,]) + #pairs<-rep(1:2,n) + #snpout<-rbind(snpout[pairs==1,],snpout[pairs==2,]) + + # Apply genotyping errors + for(i in 1:nrow(snpout)) { + # Sample genotypes to have errors with rate gerr + bb<-as.logical(rbinom(nc,size=1,prob=simparams$geno.err)) + snpout[i,bb]<-apply.errs(snpout[i,bb],simparams$hom2hom.err) + } + + # Select missing data patterns from study subjects and impose these on the + # simulated data. + # First draw a random sample of n pairs from the study data. + s.inds <- sample(1:nrow(gdat$snp.data),2*n,replace=TRUE) + # Now loop over simulated data and impose missingness patterns from sampled + # pairs + for(i in 1:(2*n)) { + # Need to test for the missing value in a snp.matrix object. + miss.ind<-gdat$snp.data[s.inds[i],] == as.raw(0) + snpout[i,miss.ind]<-as.raw(0) + } + + return(snpout) +} + +# Functions to set up pedigree information part of pedfiles +po.peddat<-function() { # Parent-offspring + pedsize <- 3 + pedids <- rep(1,pedsize) + ids <- (1:pedsize) + dadids <- c(0,3,0) + momids <- c(0,1,0) + zeros <- rep(0,pedsize) + genders <- c(2,2,1) + ones <- rep(1,pedsize) #9th column(1 for the starting ind of the pedigree) + peddat <- cbind(pedids,ids,dadids,momids,zeros,zeros,zeros,genders,ones) + return(peddat) +} + +fs.peddat=function(){ # Full sibs + pedsize <- 4 + pedids <- rep(1,pedsize) + ids <- (1:pedsize) + dadids <- c(3,3,0,0) + momids <- c(4,4,0,0) + zeros <- rep(0,pedsize) + genders <- c(2,2,1,2) + ones <- rep(1,pedsize) + peddat <- cbind(pedids,ids,dadids,momids,zeros,zeros,zeros,genders,ones) + return(peddat) +} + +hs.peddat=function(){ # Half sibs + pedsize <- 5 + pedids <- rep(1,pedsize) + ids <- (1:pedsize) + dadids <- c(3,5,0,0,0) + momids <- c(4,4,0,0,0) + zeros <- rep(0,pedsize) + genders <- c(2,2,1,2,1) + ones <- rep(1,pedsize) + peddat <- cbind(pedids,ids,dadids,momids,zeros,zeros,zeros,genders,ones) + return(peddat) +} + +ur.peddat=function(){ # unrelated + pedsize <- 2 + pedids <- rep(1,pedsize) + ids <- (1:pedsize) + dadids <- c(0,0) + momids <- c(0,0) + zeros <- rep(0,pedsize) + genders <- c(2,2) + ones <- rep(1,pedsize) + peddat <- cbind(pedids,ids,dadids,momids,zeros,zeros,zeros,genders,ones) + return(peddat) +} + +co.peddat=function(){ # cousins + pedsize <- 8 + pedids <- rep(1,pedsize) + ids <- (1:pedsize) + dadids <- c(3,6,0,8,8,0,0,0) + momids <- c(4,5,0,7,7,0,0,0) + zeros <- rep(0,pedsize) + genders <- c(2,2,1,2,2,1,2,1) + ones <- rep(1,pedsize) + peddat <- cbind(pedids,ids,dadids,momids,zeros,zeros,zeros,genders,ones) + return(peddat) +} + +user.peddat=function(userdat) { # user-defined + if(is.null(userdat)) { + return(NULL) + } else { + return(cbind(1,userdat$ids,userdat$dadids,userdat$momids,0,0,0, + userdat$gender,1)) + } +} + +apply.errs<-function(x,hom2hom.err) { + # x is a vector of raw's. + x.orig<-x + # homozygotes called heterozygous + x[x.orig==as.raw(1)]<-hom1.err(sum(x.orig==1),hom2hom.err) + x[x.orig==as.raw(2)]<-het.err(sum(x.orig==2)) + x[x.orig==as.raw(3)]<-hom3.err(sum(x.orig==3),hom2hom.err) + return(x) +} + + +hom1.err<-function(n,hom2hom.err) { + # homoz 01 have prob 1-hom2hom.err of being 02 and hom2hom.err of being 03 + return(as.raw(2+rbinom(n,size=1,prob=(1-hom2hom.err)))) +} + +het.err<-function(n) { + # heterozygotes equally likely to be one of two homozygotes + return(as.raw(1+2*rbinom(n,size=1,prob=1/2))) +} + +hom3.err<-function(n,hom2hom.err) { + # homoz 03 have prob hom2hom.err of being 01 and 1-hom2hom.err of being 02 + return(as.raw(1+rbinom(n,size=1,prob=hom2hom.err))) +} + + +findLDfiles<-function(gdat) { + return(unlist(lapply(gdat,getLDfile))) +} + +getLDfile<-function(gdat) { + return(gdat$GDparfile) +} + + diff --git a/build/vignette.rds b/build/vignette.rds index 676e346..43408b1 100644 Binary files a/build/vignette.rds and b/build/vignette.rds differ diff --git a/data/datalist b/data/datalist new file mode 100644 index 0000000..3856aa1 --- /dev/null +++ b/data/datalist @@ -0,0 +1,2 @@ +Nhlsim: Nhlsim +RutgersMapB36: RutgersMapB36 diff --git a/inst/doc/CrypticIBDcheck.Rnw b/inst/doc/CrypticIBDcheck.Rnw index 2f56818..827a24c 100755 --- a/inst/doc/CrypticIBDcheck.Rnw +++ b/inst/doc/CrypticIBDcheck.Rnw @@ -10,4 +10,10 @@ Medicine, and available at \\ \bigskip \url{http://www.scfbm.org/content/8/1/5} + +\bigskip +NB: As of version 0.3-1, {\tt snow} has been replaced by {\tt parallel} +for splitting computations across a cluster. {\tt parallel} +includes all functions from {\tt snow} that are used by +{\tt CrypticIBDcheck}. \end{document} diff --git a/inst/doc/CrypticIBDcheck.pdf b/inst/doc/CrypticIBDcheck.pdf index 67f3a58..d9650f8 100644 Binary files a/inst/doc/CrypticIBDcheck.pdf and b/inst/doc/CrypticIBDcheck.pdf differ diff --git a/inst/doc/IBDcheck-hapmap.pdf b/inst/doc/IBDcheck-hapmap.pdf index e24c2a1..8f27da2 100644 Binary files a/inst/doc/IBDcheck-hapmap.pdf and b/inst/doc/IBDcheck-hapmap.pdf differ diff --git a/man/IBDcheck.Rd b/man/IBDcheck.Rd index 4d87991..95cb247 100644 --- a/man/IBDcheck.Rd +++ b/man/IBDcheck.Rd @@ -81,17 +81,12 @@ components of this class. } \references{ Leung H-T (2012). chopsticks: The snp.matrix and X.snp.matrix -classes. R package version 1.20.0. URL -\url{http://outmodedbonsai.sourceforge.net/} +classes. R package version 1.20.0. Purcell S, Neale B, Todd-Brown K, Thomas L, Ferreira MA, Bender D, Maller J, Sklar P, de Bakker PI, Daly MJ, Sham PC. PLINK: a tool set for whole-genome association and population-based linkage analyses. Am J Hum Genet. 2007 Sep;81(3):559-75. - -Tierney L, Rossini AJ, Li N, Sevcikova H (2011). -snow: Simple Network of Workstations. R package version 0.3-8, URL -\url{http://CRAN.R-project.org/package=snow}. } \author{ Annick Joelle Nembot-Simo, Jinko Graham and Brad McNeney @@ -104,15 +99,14 @@ be computationally demanding for data sets with more than about 1000 SNPs. In Appendix B of the vignette \code{CrypticIBDcheck} we describe strategies for making computations feasible by -use of a \pkg{snow} cluster (Tierney \emph{et al.}, 2011). +use of a \pkg{parallel} cluster (Tierney \emph{et al.}, 2011). Users may also need to increase the amount of java heap space for some computations. The computation for fitting the LD model is done in java, using functions from Alun Thomas' suite of Java Programs for Statistical Genetics and -Computational Statistics (JPSGCS), available at the website -\url{http://balance.med.utah.edu/wiki/index.php/JPSGCS}. +Computational Statistics (JPSGCS). The JPSGCS java programs are accessed by R-wrappers provided by the \pkg{rJPSGCS} R package. When \pkg{rJPSGCS} is loaded (automatically by loading \pkg{CrypticIBDcheck}), it @@ -221,7 +215,7 @@ ff<-filter.control(filter=FALSE) # No need to re-filter the SNP data in cibd8 cibd9<-IBDcheck(cibd8,simparams=ss,filterparams=ff) # Example 10) Distribute fitting of LD models and gene drop simulations for -# chromosomes 20, 21 and 22 across a snow cluster running on a local computer. +# chromosomes 20, 21 and 22 across a cluster running on a local computer. # See the package vignette, vignette("CrypticIBDcheck") for an example of # running IBDcheck in batch mode on a compute cluster. Simulate pairs of # subjects from each of the default relationships: unrelated, MZ twins/duplicate, @@ -229,7 +223,7 @@ cibd9<-IBDcheck(cibd8,simparams=ss,filterparams=ff) cind<-(Nhlsim$chromosome == 20 | Nhlsim$chromosome == 21 | Nhlsim$chromosome == 22) dat<-new.IBD(Nhlsim$snp.data[,cind],Nhlsim$chromosome[cind], Nhlsim$physmap[cind],popsam) -library(snow) +library(parallel) cl<-makeCluster(3,type="SOCK") clusterEvalQ(cl,library("CrypticIBDcheck")) ss<-sim.control(simulate=TRUE,fitLD=TRUE,cl=cl) diff --git a/man/RutgersMapB36.Rd b/man/RutgersMapB36.Rd index 8130cd3..96596df 100644 --- a/man/RutgersMapB36.Rd +++ b/man/RutgersMapB36.Rd @@ -9,7 +9,7 @@ physical and genetic map positions for a collection of markers on chromosome 1 t This dataset is list comprised of data frames corresponding to chromosomes 1 to 22. Each data frame has information on physical and genetic map positions for a collection of markers on the corresponding chromosome. } -\usage{data(RutgersMapB36)} +\usage{data("RutgersMapB36")} \format{ The format of the dataset is a list of 22 elements. Each element is a data frame corresponding to a chromosome with nine columns: \tabular{rlll}{ @@ -26,7 +26,7 @@ The format of the dataset is a list of 22 elements. Each element is a data frame } \source{ The data were obtained from the Rutgers map (see -\url{http://compgen.rutgers.edu/RutgersMap/DownloadMap.aspx}) +\url{http://compgen.rutgers.edu/download_maps.shtml}) } \references{ A second-generation combined linkage physical map of the human genome. @@ -35,6 +35,6 @@ Kong X, Murray SS, Ziegle JS, Stewart WC, Buyske S. Genome Res. 2007 Dec;17(12):1783-6. Epub 2007 Nov 7. } \examples{ -data(RutgersMapB36) +data("RutgersMapB36") } \keyword{datasets} diff --git a/man/SNPgenmap.Rd b/man/SNPgenmap.Rd index ec3a5bd..5e5a4af 100755 --- a/man/SNPgenmap.Rd +++ b/man/SNPgenmap.Rd @@ -58,6 +58,7 @@ gmap <- SNPgenmap(Nhlsim$physmap,Nhlsim$chromosome) # interpolation of genetic map positions on chromosome 1. # NB: Interpolant is not necessarily monotone increasing, which can lead to a # genetic map on which markers are re-ordered relative to the physical map. +data("RutgersMapB36") chrmap<-splinefun(RutgersMapB36[["chr1"]]$Build36_map_physical_position, RutgersMapB36[["chr1"]]$Sex.averaged_map_position) c1ind<-(Nhlsim$chromosome=="chr1") diff --git a/man/sim.control.Rd b/man/sim.control.Rd index a578e05..bcf3198 100755 --- a/man/sim.control.Rd +++ b/man/sim.control.Rd @@ -150,11 +150,11 @@ ff<-filter.control(filter=FALSE) # No need to re-filter the SNP data in cibd3 cibd4<-IBDcheck(cibd3,simparams=ss,filterparams=ff) # Distribute fitting of LD models and gene drop simulations for each -# chromosome across a snow cluster running on a local computer. See the +# chromosome across a cluster running on a local computer. See the # package vignette, vignette("CrypticIBDcheck") for an example of running # IBDcheck in batch mode on a compute cluster. Save the updated IBD object # in cibd5. -library(snow) +library(parallel) cl<-makeCluster(3,type="SOCK") clusterEvalQ(cl,library("CrypticIBDcheck")) ss<-sim.control(simulate=TRUE,cl=cl) # Leave all other sim params at defaults diff --git a/src/countIBS.c b/src/countIBS.c index fff6c4d..7bd517d 100644 --- a/src/countIBS.c +++ b/src/countIBS.c @@ -1,7 +1,7 @@ #include #include -void countIBS(char *x, int *rows, int *ncol, int *m0, int *m1, int *m2) { +void count_IBS(char *x, int *rows, int *ncol, int *m0, int *m1, int *m2) { int nrow = *rows; int nsnp = *ncol; int i, j, i1, j1, i2, k, d; diff --git a/src/init.c b/src/init.c new file mode 100644 index 0000000..04d3d04 --- /dev/null +++ b/src/init.c @@ -0,0 +1,32 @@ +#include +#include +#include // for NULL +#include + +/* FIXME: + Check these declarations against the C/Fortran source code. +*/ + +/* .C calls */ +extern void count_IBS(void *, void *, void *, void *, void *, void *); + +/* .Call calls */ +extern SEXP IBDest_sim(SEXP, SEXP, SEXP, SEXP, SEXP); +extern SEXP IBDest_study(SEXP, SEXP, SEXP, SEXP, SEXP); + +static const R_CMethodDef CEntries[] = { + {"count_IBS", (DL_FUNC) &count_IBS, 6}, + {NULL, NULL, 0} +}; + +static const R_CallMethodDef CallEntries[] = { + {"IBDest_sim", (DL_FUNC) &IBDest_sim, 5}, + {"IBDest_study", (DL_FUNC) &IBDest_study, 5}, + {NULL, NULL, 0} +}; + +void R_init_CrypticIBDcheck(DllInfo *dll) +{ + R_registerRoutines(dll, CEntries, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/vignettes/CrypticIBDcheck.Rnw b/vignettes/CrypticIBDcheck.Rnw index 2f56818..827a24c 100755 --- a/vignettes/CrypticIBDcheck.Rnw +++ b/vignettes/CrypticIBDcheck.Rnw @@ -10,4 +10,10 @@ Medicine, and available at \\ \bigskip \url{http://www.scfbm.org/content/8/1/5} + +\bigskip +NB: As of version 0.3-1, {\tt snow} has been replaced by {\tt parallel} +for splitting computations across a cluster. {\tt parallel} +includes all functions from {\tt snow} that are used by +{\tt CrypticIBDcheck}. \end{document}