Permalink
Browse files

fixing man pages for check 2.15

  • Loading branch information...
1 parent 26e50ec commit 86bdfc05fa8e5750e18637e0810143716949cc71 @byandell committed May 18, 2012
View
@@ -1,11 +1,11 @@
Package: qtlhot
-Version: 0.4.4
-Date: 2012-05-16
+Version: 0.4.5
+Date: 2012-05-17
Author: Elias Chaibub Neto <echaibub@hotmail.com> and Brian S Yandell <byandell@wisc.edu>
Title: Inference for QTL Hotspots
Description: Functions to Determine significance of co-mapping trait hotspots
Maintainer: Brian S. Yandell <yandell@stat.wisc.edu>
-Depends: qtl
+Depends: qtl,lattice
LazyLoad: yes
LazyData: yes
License: GPL (>= 2)
View
25 R/big.R
@@ -89,6 +89,9 @@ big.phase1 <- function(dirpath = ".", cross.index = 0, params.file,
batch.effect = NULL,
drop.lod = 1.5, lod.min = min(lod.thrs),
window = 5, seed = 0, big = TRUE, ...,
+ ## Following are loaded with Trait0.RData created in big.phase0.
+ trait.index, trait.names, all.traits, size.set, seeds, lod.sums,
+ ##
verbose = FALSE)
{
## Want this to depend on parallel.phase1 as much as possible.
@@ -101,7 +104,7 @@ big.phase1 <- function(dirpath = ".", cross.index = 0, params.file,
eval(parse(file.path(dirpath, params.file)))
## Calculate genotype probabilities.
- cross <- calc.genoprob(cross, step=0.5, err = 0.002,
+ cross <- calc.genoprob(cross, step=0.5, error.prob = 0.002,
map.function = "c-f", stepwidth = "max")
## Subset to individuals with batch if used.
@@ -132,7 +135,12 @@ big.phase1 <- function(dirpath = ".", cross.index = 0, params.file,
invisible()
}
#############################################################################################
-big.phase2 <- function(dirpath = ".", index, ..., remove.files = TRUE, verbose = FALSE)
+big.phase2 <- function(dirpath = ".", index, ...,
+ ## Following are loaded with Phase1.RData created in big.phase1.
+ batch.effect, all.traits, trait.index, lod.min,
+ drop.lod, Nmax, lod.thrs, cross.index, seeds,
+ ##
+ remove.files = TRUE, verbose = FALSE)
{
## Phase 2.
## Loop on sets of phenotypes, adding only what is needed.
@@ -169,7 +177,10 @@ big.phase2 <- function(dirpath = ".", index, ..., remove.files = TRUE, verbose =
}
}
do.big.phase2 <- function(dirpath, cross, covars, perms, index, trait.index,
- lod.min, drop.lod, remove.files, Nmax, lod.thrs, window, n.traits, cross.index, verbose)
+ lod.min, drop.lod, remove.files, Nmax, lod.thrs, window, n.traits, cross.index,
+ verbose,
+ ## Following supplied by Trait.*.RData created in big.phase0.
+ trait.data, offset, pheno.set)
{
## Cycle through all the phenotypes in sets of size size.set. Keeps R object smaller.
## Assume large trait matrix has been broken into Trait.i.RData, each with trait.data.
@@ -182,7 +193,8 @@ do.big.phase2 <- function(dirpath, cross, covars, perms, index, trait.index,
if(verbose)
cat(pheno.index, "\n")
- ## This is not working correctly. Either Trait.n.RData are all the same or ??.
+ if(exists("trait.data"))
+ rm("trait.data")
attach(file.path(dirpath, filenames[pheno.index]))
perm.cross <- add.phenos(cross, trait.data, index = trait.index)
pheno.col <- find.pheno(perm.cross, dimnames(trait.data)[[2]])
@@ -216,6 +228,9 @@ do.big.phase2 <- function(dirpath, cross, covars, perms, index, trait.index,
}
#############################################################################################
big.phase3 <- function(dirpath = ".", index, cross.index, ...,
+ ## Following are loaded with Phase1.RData created in big.phase1.
+ n.perm, lod.thrs, Nmax, lod.sums,
+ ##
outfile = phase3name, verbose = FALSE)
{
## Phase 3. Merge back together.
@@ -244,6 +259,8 @@ big.phase3 <- function(dirpath = ".", index, cross.index, ...,
for(i.perm in seq(n.file)) {
if(verbose)
cat(i.perm, "\n")
+ if(exists("lod.sums"))
+ rm("lod.sums")
attach(file.path(dirpath, filenames[i.perm]), warn.conflicts = FALSE)
n.quant <- length(lod.sums$max.lod.quant)
max.lod.quant[i.perm, seq(n.quant)] <- lod.sums$max.lod.quant
View
@@ -114,13 +114,15 @@ scanone.permutations <- function(cross, pheno.col = seq(3, nphe(cross)),
cat.scanone <- function(dirpath = ".", filenames = permfiles, chr.pos)
{
permfiles <- list.files(dirpath, paste("per.scan", "*", "RData", sep = "."))
+
+ ## Make and remove per.scan.hl. Below use version from attached files.
+ per.scan.hl <- NULL
+ rm(per.scan.hl)
- if(exists("per.scan.hl"))
- rm(per.scan.hl)
for(i in 1:length(filenames)){
attach(filenames[i], warn.conflicts = FALSE)
if(i==1) cat.scan.hl <- per.scan.hl else
- cat.scan.hl <- rbind.data.frame(cat.scan.hl,per.scan.hl)
+ cat.scan.hl <- rbind.data.frame(cat.scan.hl, per.scan.hl)
detach()
}
cbind.data.frame(chr.pos[cat.scan.hl$row,],cat.scan.hl)
View
@@ -180,7 +180,11 @@ qtlhot.phase1 <- function(dirpath, index = 0,
row.names = FALSE, col.names = FALSE, quote = FALSE)
}
####################################################################################
-qtlhot.phase2 <- function(dirpath, index = NULL, ..., big = FALSE, verbose = FALSE)
+qtlhot.phase2 <- function(dirpath, index = NULL, ...,
+ ## Following are loaded with Phase1.RData created in big.phase1.
+ n.split, cross, Nmax, n.perm, lod.thrs, n.phe, alpha.levels,
+ ##
+ big = FALSE, verbose = FALSE)
{
## PHASE 2: NL,N and WW permutations.
## Slow. Run on condor nodes. Sized by n.perm.
@@ -238,6 +242,10 @@ qtlhot.phase2 <- function(dirpath, index = NULL, ..., big = FALSE, verbose = FAL
}
####################################################################################
qtlhot.phase3 <- function(dirpath, index = NULL, ...,
+ ## Following are loaded with Phase1.RData created in big.phase1.
+ cross.index, n.split, n.perm, lod.thrs, Nmax, NLN, WW,
+ alpha.levels, cross, n.phe, latent.eff, res.var,
+ ##
dirpath.save = dirpath, big = FALSE, verbose = FALSE)
{
## PHASE 3: Sample Markov chain (MCMC). Parallelize.
@@ -264,7 +272,7 @@ qtlhot.phase3 <- function(dirpath, index = NULL, ...,
## This could be done once, but it would require splitting this phase in two.
## Besides, it is quite fast.
## Read in saved BIC scores and combine into one object.
- outfile <- paste("perm", ".", cross.index, "_", ".*RData", sep = "")
+ outfile <- paste("perm", ".", cross.index, "_", "*.RData", sep = "")
filenames <- list.files(dirpath, outfile)
if(!length(filenames))
parallel.error(5, 3, index)
View
@@ -22,7 +22,6 @@
pull.hotspots <- function(cross, scan.hl, chr.pos = NULL, lod.thr = 5, slide.thr = NULL, verbose = FALSE)
{
if(is.null(chr.pos)) {
- n.traits <- length(all.traits)
cross$pheno$trait <- rnorm(nind(cross))
chr.pos <- scanone(cross, pheno.col= find.pheno(cross, "trait"))[,1:2]
}
@@ -179,7 +178,7 @@ hotspot.scan <- function(cross, scan.hl, lod.thr, quant.level, window = 5, verbo
## Kludge to get chr and pos[
cross$pheno$trait <- rnorm(nind(cross))
if(is.null(cross$geno[[1]]$prob))
- cross <- calc.genoprob(cross, step=0.5, err = 0.002,
+ cross <- calc.genoprob(cross, step=0.5, error.prob = 0.002,
map.function = "c-f", stepwidth = "max")
chr.pos <- scanone(cross, pheno.col = find.pheno(cross, "trait"))[,1:2]
@@ -200,6 +199,7 @@ hotspot.scan <- function(cross, scan.hl, lod.thr, quant.level, window = 5, verbo
hot.scan
}
+#############################################################################################################
scan.hl.plot <- function(scan.hl)
{
for(i in levels(scan.hl$chr)) {
@@ -208,6 +208,7 @@ scan.hl.plot <- function(scan.hl)
}
invisible()
}
+#############################################################################################################
hotspot.plot <- function(hot.scan, quant.thr = NULL, maps = NULL, main = "")
{
for(i in levels(hot.scan$chr)) {
View
@@ -30,7 +30,9 @@
## the genome.
##
mySimulations <- function(...) sim.hotspot(...)
-get.hotspot <- function(filenames)
+get.hotspot <- function(filenames,
+ ## Following supplied in filenames[1].
+ Nmax, out.sim)
{
## filenames = list.files(".", paste(prefix, latent.eff, sets, "RData", sep = "."))
## latent.eff = 0, prefix = "Pilot", sets = "[0-9][0-9]*"
File renamed without changes.
File renamed without changes.
View
@@ -0,0 +1,20 @@
+Vignette qtlhot.pdf
+sim.null.cross
+ -> cross object
+include.hotspots
+ -> cross object
+set.to.zero.beyond.drop.int
+ -> matrix of scanone with zero below 1.5 LOD support intervals
+WW.perm
+ -> matrix of max count (permutation by LOD threshold)
+WW.summary
+ -> matrix of thresholds (LOD threshold by percentile)
+NL.N.perm
+ -> list
+ max.lod.quant matrix of max LOD (permutation by hotspot size)
+ max.N matrix of max count (permutation by LOD threshold)
+NL.N.summary
+ -> list
+ NL.thr matrix of thresholds (hotspot size by percentile)
+ N.thr matrix of thresholds (LOD threshold by percentile)
+sliding.bar.plot
View
@@ -0,0 +1,53 @@
+The code in this package can be run for SMALL datasets on a single machine. However, it is most effective
+to use many processors in parallel. The code in parallel.R and big.R is designed for this task. However,
+it must be combined with other materials and external engines such as Condor (www.cs.wisc.edu/condor).
+While we have running versions, it is not currently in "user friendly" format. This will be coming in
+later releases of R/qtlhot. Brian Yandell 17 may 2012
+
+===========================================================================================
+The runparallel.sh shell script is a mockup of what could be done on a parallel system. We use SOAR from CHTC,
+which automates use of Conder. See http://submit.chtc.wisc.edu/SOAR/SOAR.html .
+
+Phase0.R Initialization of dataset (done off line)
+Phase1.R Initialization of run. Use cross.RData and params.txt.
+ Create Phase1.RData and groups.txt. Run nruns times.
+Phase2.R Permutation runs. Parallelize. Use Phase1.RData and argument based on groups.txt
+ Create perm.R_N.RData files. Run nruns*n.split times with n.perm permutations each.
+Phase3.R Wrapup. Wait for all the Phase2 jobs to complete. Use Phase1.RData and perm*.RData.
+ Create Phase3.RData. Run nruns times.
+
+===========================================================================================
+There are actually two ways this is used, depending on the hidden parameter "big".
+===========================================================================================
+big = FALSE (use routines in parallel.R)
+ Used for studying properties of qtlhot. Can do double resampling: nruns * (n.split * n.perm)
+ each run R = 1:nruns is executed as a separate job
+ run = 1 uses cross object
+ run = 2:nruns uses map from cross object, resimulates genos, phenos
+
+ for each run, Phase2 starts index N = 1:n.split invocations
+ each invocation has n.perm random permutations of data
+ thus each run has n.split * n.perm permutated data sets
+ the n.split invocations are combined together in Phase3
+
+===========================================================================================
+big = TRUE (use routines in big.R)
+ Used for analysis of large datasets
+ run R = 1 (not recommended for nruns > 1)
+
+ index = 1 is original data
+ index = 2:n.perm are randomly permuted data
+
+ For convenience, n.perm = 1 and n.split = number of permutations.
+ That is, each permutation is invoked separately.
+ (code has n.perm=n.split for arcane reasons)
+
+ Data are split in big.phase0 into convenient number of traits (~250) to keep cross object small.
+ Each of the split data (Trait.*.RData) is run for the same permutation in big.phase2
+
+ Permutations are combined together in big.phase3 to produce one Phase31.RData.
+
+There is pre-processing using big.phase0(), and post-processing Phase31.RData for SOAR.
+These are currently specific to the SOAR/CHTC implementation.
+See SOAR.phase0.R and SOAR.post.R for scripts that currently depend on local files.
+
@@ -0,0 +1,80 @@
+## From ~/soar/attie_mouse
+## Phase 0.
+## Run-specific settings:
+## Must specify "tissue" and "runnum" (and "subset.sex" if used) before sourcing this file.
+
+library(B6BTBR07n)
+library(qtlhot)
+
+offset <- 0
+if(!exists("subset.sex")) subset.sex <- NULL
+
+Nmax <- 2000
+drop.lod <- 1.5
+size.set <- 250
+datadir <- "~/soar/diabetes48" ## Aimee's diabetes4 folder.
+
+cat("link needed data\n")
+## tissue.dir/control_probes.txt
+tissue.dir <- ifelse(datadir == ".", ".", file.path(datadir, "final.Rdata"))
+
+## lod.thr is in null.dir/quantiles.csv
+null.dir <- ifelse(datadir == ".", ".", file.path(datadir, "nullsims"))
+system(paste("ln -s", file.path(null.dir, "quantiles.csv"), "."))
+lod.thrs <- read.csv(file.path(null.dir, "quantiles.csv"), header = TRUE)
+tmp <- substring(lod.thrs[,1], 1, 2)
+if(is.null(subset.sex)) {
+ lod.thrs <- rev(sort(lod.thrs$full))
+} else {
+ lod.thrs <- rev(sort(lod.thrs[[tolower(subset.sex)]]))
+ if(tolower(subset.sex) == "full")
+ subset.sex <- NULL
+}
+names(lod.thrs) <- rev(tmp)
+
+## mlratio.file = tissue.dir/tissue_mlratio_final.RData
+## contains tissue.mlratio data.frame.
+trait.file <- paste(tissue, "mlratio", "final.RData", sep = "_")
+trait.mlratio <- file.path(tissue.dir, trait.file)
+trait.file <- "trait.RData"
+system(paste("ln -s", trait.mlratio, trait.file))
+trait.matrix <- paste(tissue, "mlratio", sep = ".")
+trait.index <- "MouseNum"
+
+## Control probes. Not needed any more.
+## droptrait.file <- "control_probes.txt"
+## system(paste("ln -s", file.path(tissue.dir, droptrait.file), "."))
+## droptrait.names <- read.table(file.path(tissue.dir, droptrait.file), header = FALSE)[,1]
+droptrait.names <- NULL
+## Get cross with Sex and batch.
+sex <- "Sex"
+trait.index <- "MouseNum"
+batch.effect <- paste(tissue, "batch", sep = ".")
+
+########################################################################################
+## Annotation
+cat("annotation\n")
+keeptrait.names <- B6BTBR07n.annotation$a_gene_id[B6BTBR07n.annotation$probe_use >= 0]
+cat("big phase0\n")
+big.phase0(".", B6BTBR07n, trait.file, trait.matrix, droptrait.names,
+ keeptrait.names,
+ lod.thrs,
+ sex = sex, trait.index = trait.index,
+ batch.effect = batch.effect, size.set = size.set,
+ offset = offset, subset.sex = subset.sex, verbose = TRUE)
+
+########################################################################################
+# Need to set runnum, tissue.
+cat("make local SOAR directories\n")
+run <- paste(tissue, runnum, sep = "")
+
+system(paste("mkdir", run))
+system(paste("mkdir ", run, "/shared", sep = ""))
+system("rm -f Trait.RData")
+system(paste("mv *.RData ", run, "/shared", sep = ""))
+system(paste("sed -e 's/TISSUE/", tissue, "/' < ../params.txt > ", run, "/params.txt", sep = ""))
+
+cat("submit SOAR job to simon.stat\n")
+## must be on simon.stat for these:
+system(paste("cp -r ", run, "~/soar/rundata/stage_qtlhot"))
+system(paste("cd ~/soar/rundata/stage_qtlhot; ./stage.pl --input", run))
Oops, something went wrong.

0 comments on commit 86bdfc0

Please sign in to comment.