Skip to content

Commit

Permalink
fixed big.phase0
Browse files Browse the repository at this point in the history
  • Loading branch information
byandell committed Oct 12, 2011
1 parent ceac8fe commit 7578ad7
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 58 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: qtlhot
Version: 0.2.0
Date: 2011-07-26
Version: 0.3.1
Date: 2011-10-12
Author: Elias Chaibub Neto <echaibub@hotmail.com> and Brian S Yandell <byandell@wisc.edu>
Title: Inference for QTL Hotspots
Description: Functions to Determine significance of co-mapping trait hotspots
Expand Down
144 changes: 107 additions & 37 deletions R/big.R
@@ -1,8 +1,69 @@
big.phase0 <- function(dirpath, cross, trait.file, trait.matrix,
droptrait.names = NULL,
keeptrait.names = NULL,
lod.thrs,
sex = "Sex", trait.index, batch.effect = NULL, size.set = 250,
verbose = TRUE)
{
## Set up cross object and trait.data objects.

## Cross object with sex, trait.index and batch.effect as only phenotypes.
traits <- match(c(sex,trait.index, batch.effect), names(cross$pheno), nomatch = 0)
cross$pheno <- cross$pheno[, traits, drop = FALSE]

## Subset to individuals with batch if used.
if(!is.null(batch.effect))
cross <- subset(cross, ind = !is.na(cross$pheno[[batch.effect]]))

## Save cross object and other pertinent information.
if(verbose)
cat("Saving cross object in cross.RData\n")
save(cross, lod.thrs, file = "cross.RData", compress = TRUE)

## Get trait values for selected traits.
attach(file.path(dirpath, trait.file))
trait.names <- dimnames(get(trait.matrix))[[2]]
all.traits <- seq(trait.names)
if(!is.null(droptrait.names))
all.traits[match(droptrait.names, trait.names, nomatch = 0)] <- 0
if(!is.null(keeptrait.names))
all.traits[-match(keeptrait.names, trait.names, nomatch = 0)] <- 0
all.traits <- all.traits[all.traits > 0]
n.traits <- length(all.traits)

num.sets <- ceiling(n.traits / size.set)
trait.nums <- seq(size.set)

## Save trait names, including those dropped, and size of sets.
tmp <- paste("Trait", 0, "RData", sep = ".")
if(verbose)
cat("Saving trait metadata in", tmp, "\n")
save(trait.file, trait.matrix, droptrait.names, size.set,
trait.names, all.traits, num.sets,
file = tmp, compress = TRUE)

## Cycle through all the phenotypes in sets of size size.set. Keeps R object smaller.
for(pheno.set in seq(num.sets)) {
traits <- all.traits[trait.nums[trait.nums <= n.traits]]

trait.data <- get(trait.matrix)[, trait.names[traits], drop = FALSE]

tmp <- paste("Trait", pheno.set, "RData", sep = ".")
if(verbose)
cat("Saving", length(traits), "trait data as set", pheno.set, "in", tmp, "\n")
save(trait.data, pheno.set, file = tmp, compress = TRUE)

## Get next size.set traits.
trait.nums <- trait.nums + size.set
}
detach()
}
#############################################################################################
big.phase1 <- function(dirpath = ".", cross.index = 0, params.file,
nruns, cross, lod.thrs, Nmax = 2000, n.perm = 1,
trait.file, trait.matrix, trait.index, droptrait.names, batch.effect = NULL,
cross, lod.thrs, Nmax = 2000, n.perm = 1, n.split = 1,
batch.effect = NULL,
drop.lod = 1.5, lod.min = min(lod.thrs),
size.set = 250, window = 5, seed = 0, ...,
window = 5, seed = 0, big = TRUE, ...,
verbose = FALSE)
{
## Want this to depend on parallel.phase1 as much as possible.
Expand All @@ -13,23 +74,17 @@ big.phase1 <- function(dirpath = ".", cross.index = 0, params.file,

## Get any parameters in file. These will overwrite passed arguments.
eval(parse(file.path(dirpath, params.file)))

## Calculate genotype probabilities.
cross <- calc.genoprob(cross, step=0.5, err = 0.002,
map.function = "c-f", stepwidth = "max")

## Subset to individuals with batch if used.
if(!is.null(batch.effect))
cross <- subset(cross, ind = !is.na(cross$pheno[[batch.effect]]))

## Get trait values for selected traits.

attach(file.path(dirpath, trait.file))
trait.names <- dimnames(get(trait.matrix))[[2]]
detach()
all.traits <- seq(trait.names)[-match(droptrait.names, trait.names)]

cross.index <- as.numeric(cross.index)
## Load trait.
load(file.path(dirpath, "Trait.0.RData"))

## Random numbers.
if(n.perm > 1) {
Expand All @@ -40,94 +95,109 @@ big.phase1 <- function(dirpath = ".", cross.index = 0, params.file,
seeds <- sample(c(98765:987654321), n.perm, replace = FALSE)
}
else
seeds <- rep(0, n.perm)
seeds <- 0

## Big assumes n.split is n.perm and does one perm each run.
n.split <- n.perm

save(cross, n.perm, seeds, Nmax, drop.lod, lod.thrs, lod.min, batch.effect,
control.probes, cross, trait.file, trait.matrix, trait.index,
trait.names, all.traits, size.set, window, cross.index,
trait.index, trait.names, all.traits, size.set, ## From Trait.0.RData
window, cross.index, n.split, big,
file = "Phase1.RData", compress = TRUE)
invisible()
}
#############################################################################################
big.phase2 <- function(dirpath = ".", index, ..., verbose = FALSE)
big.phase2 <- function(dirpath = ".", index, ..., remove.files = TRUE, verbose = FALSE)
{
## Phase 2.
## Loop on sets of phenotypes, adding only what is needed.
## Loop on permutations internal to scanone.permutations.

load("Phase1.RData")

covars <- sexbatch.covar(cross, batch.effect)
load(file.path(dirpath, "Phase1.RData"))

if(verbose)
cat("compute covariates\n")
covars <- sexbatch.covar(cross, batch.effect, verbose = TRUE)

n.ind <- nind(cross)
n.phe <- nphe(cross)
n.traits <- length(all.traits)
num.sets <- ceiling(n.traits / size.set)
trait.nums <- seq(size.set)

if(n.perm > 1) {
## Random permutation. Use preset seed if provided.
seed <- seeds[index]
if(seed > 0)
set.seed(seed[1])
if(verbose)
cat("sample permutation", seed[1], "\n")
perms <- sample(seq(n.ind), n.ind, replace = FALSE)
cross$pheno <- cross$pheno[perms,]
}

## Cycle through all the phenotypes in sets of size size.set. Keeps R object smaller.
for(pheno.set in seq(num.sets)) {
cat(pheno.set, "\n")
traits <- all.traits[trait.nums[trait.nums <= n.traits]]

attach(file.path(dirpath, trait.file))
perm.cross <- add.phenos(cross, get(trait.matrix)[, trait.names[traits], drop = FALSE],
index = trait.index)
## Assume large trait matrix has been broken into Trait.i.RData, each with trait.data.

filenames <- list.files(dirpath, "Trait.[1-9][0-9]*.RData")
if(!length(filenames))
parallel.error(5, 5, index)

for(pheno.set in seq(length(filenames))) {
if(verbose)
cat(pheno.set, "\n")

## This is not working correctly. Either Trait.n.RData are all the same or ??.
attach(file.path(dirpath, filenames[pheno.set]))
perm.cross <- add.phenos(cross, trait.data, index = trait.index)
pheno.col <- find.pheno(perm.cross, dimnames(trait.data)[[2]])
detach()

pheno.col <- find.pheno(perm.cross, trait.names[traits])
per.scan <- scanone(perm.cross, pheno.col=pheno.col, method="hk",
addcovar=covars$addcovar, intcovar=covars$intcovar)

per.scan.hl <- pull.highlods(per.scan, lod = lod.min, drop.lod = drop.lod)

save(per.scan.hl, perms,
file=file.path(dirpath, paste("per.scan",pheno.set, index,"RData",sep=".")))

trait.nums <- trait.nums + size.set
}

chr.pos <- per.scan[,1:2]

filenames <- list.files(dirpath, paste("per.scan", "[0-9][0-9]*", index, "RData",
sep = "."))

scan.hl <- cat.scanone(".", filenames, chr.pos)
scan.hl <- cat.scanone(dirpath, filenames, chr.pos)

if(remove.files)
file.remove(dirpath, filenames)

## Has some problem with missing values for (any(diff(pos) < 0)) in runningmean.
## problem is in calc of maxlod.hl[[k]]: NA should be 0
lod.sums <- lod.quantile.permutation(scan.hl,Nmax,lod.thrs,window,chr.pos,n.traits)

save(scan.hl, lod.sums, perm.set,
save(scan.hl, lod.sums, cross.index, index,
file = paste("perm", ".", cross.index, "_", index, ".RData", sep = ""))
}
#############################################################################################
big.phase3 <- function(dirpath = ".", index, ..., verbose = FALSE)
big.phase3 <- function(dirpath = ".", index, cross.index, ..., verbose = FALSE)
{
## Phase 3. Merge back together.

load("Phase1.RData")
load(file.path(dirpath, "Phase1.RData"))

n.thrs <- length(lod.thrs)

max.lod.quant <- matrix(NA, n.perm, Nmax)
max.N <- max.N.window <- matrix(NA, n.perm, n.thrs)

for(i.perm in seq(n.perm)) {
attach(file.path(dirpath, "perm", ".", cross.index, "_", i.perm, ".RData", sep = ""))
attach(file.path(dirpath, paste("perm", ".", cross.index, "_", i.perm, ".RData", sep = "")))
n.quant <- length(lod.sums$max.lod.quant)
max.lod.quant[i.perm, seq(n.quant)] <- lod.sums$max.lod.quant
max.N[i.perm,] <- lod.sums$max.N$max.N
max.N.window[i.perm,] <- lod.sums$max.N$max.N.win
detach()
}
save(max.lod.quant, max.N, max.N.window, file = "Phase3.RData", compress = TRUE)
outfile <- paste("Phase3", ifelse(cross.index > 0, cross.index, ""), ".RData", sep = "")

save(max.lod.quant, max.N, max.N.window, file = outfile, compress = TRUE)
}
11 changes: 9 additions & 2 deletions R/highlod.R
Expand Up @@ -28,14 +28,20 @@ pull.highlods <- function(scans, pheno.col, lod=4.5, drop.lod = 1.5)
cbind.data.frame(row = rr, phenos = pheno.col[cc], lod = lod)
}

sexbatch.covar <- function(cross, batch.effect)
sexbatch.covar <- function(cross, batch.effect, verbose = FALSE)
{
ic <- getsex(cross)$sex

if(!is.null(batch.effect)){
batch <- cross$pheno[,batch.effect, drop = FALSE]
tmp <- formula(paste("~ factor(", batch.effect, ")"))
if(verbose)
cat("sexbatch.covar", names(tmp), levels(factor(batch[[1]])), "\n")
if(verbose)
cat("sexbatch.covar", dim(batch), "\n")
batch <- model.matrix(tmp,batch)[,-1, drop = FALSE]
if(verbose)
cat("sexbatch.covar", dim(batch), "\n")
ac <- cbind(batch,ic)
}
else
Expand Down Expand Up @@ -131,7 +137,8 @@ lod.quantile.permutation <- function(cat.scan.hl,N,lod.thr,window,chr.pos,n.phe)
for(j in 1:l.lod.thr){
XX <- cat.scan.hl$lod >= lod.thr[j]
max.N$max.N[j] <- max(tapply(XX, cat.scan.hl$row,sum,na.rm=T),na.rm=T)


## Is this working properly?
neqtl.pos <- smooth.neqtl(cat.scan.hl, chr.pos, max.hl, lod.thr[j], window)

max.N$max.N.win[j] <- max(neqtl.pos[,3])
Expand Down
39 changes: 22 additions & 17 deletions R/parallel.R
Expand Up @@ -91,7 +91,6 @@ qtlhot.phase1 <- function(dirpath, index = 0,
n.phe = nphe(cross), ## Number of traits.
nruns = 1,
big = FALSE,
droptrait.names = character(0),
...)
{
## PHASE 1: Set up cross object. Needed in phases 2.
Expand All @@ -107,27 +106,30 @@ qtlhot.phase1 <- function(dirpath, index = 0,
## Get any parameters in file. These will overwrite passed arguments.
eval(parse(file.path(dirpath, params.file)))

n.perm <- ceiling(n.perm / n.split)

## cross.index is used when multiple phase1 jobs are spawned.
cross.index <- as.numeric(index)

## Cross object. Load if not done already.
if(!exists(cross.name))
load(file.path(dirpath, cross.file))

## Change name of cross object to "cross" for internal use.
if(cross.name != "cross")
cross <- get(cross.name)

## cross.index is used when multiple phase1 jobs are spawned.
cross.index <- as.numeric(index)

if(big) {
## Used for big data run.
big.phase1(dirpath, cross.index, params.file, nruns, cross,
lod.thrs, Nmax, n.perm, droptrait.names = droptrait.names, ...)
## Each run is separate permutation.
## Make sure n.split is equal to n.perm. n.perm set to 1 in big.phase1.
n.split <- n.perm
big.phase1(dirpath, cross.index, params.file, cross, lod.thrs, Nmax, n.perm,
n.split, ...)
}
else {
## Used for studying properties of qtlhot.

n.perm <- ceiling(n.perm / n.split)

if(cross.index > 0 & nruns > 1) {
## Keep markers but re-simulate genotypes each time.
mymap <- pull.map(cross)
Expand Down Expand Up @@ -156,7 +158,7 @@ qtlhot.phase1 <- function(dirpath, index = 0,
row.names = FALSE, col.names = FALSE, quote = FALSE)
}
####################################################################################
qtlhot.phase2 <- function(dirpath, index = NULL, ..., verbose = FALSE)
qtlhot.phase2 <- function(dirpath, index = NULL, ..., big = FALSE, verbose = FALSE)
{
## PHASE 2: NL,N and WW permutations.
## Slow. Run on condor nodes. Sized by n.perm.
Expand All @@ -169,6 +171,12 @@ qtlhot.phase2 <- function(dirpath, index = NULL, ..., verbose = FALSE)
## permi.RData
##

cross.index <- scan("groups.txt", 0)[1]

## Load Phase 1 computations.
infile <- "Phase1.RData"
load(file.path(dirpath, infile))

## Quality check of index.
if(missing(index))
parallel.error(2, 2, index)
Expand All @@ -181,12 +189,6 @@ qtlhot.phase2 <- function(dirpath, index = NULL, ..., verbose = FALSE)
if(big)
return(big.phase2(dirpath, index))

cross.index <- scan("groups.txt", 0)[1]

## Load Phase 1 computations.
infile <- "Phase1.RData"
load(file.path(dirpath, infile))

outfile <- paste("perm", ".", cross.index, "_", index, ".RData", sep = "")

## Following is in NL.N.perm stuff.
Expand Down Expand Up @@ -214,7 +216,7 @@ qtlhot.phase2 <- function(dirpath, index = NULL, ..., verbose = FALSE)
}
####################################################################################
qtlhot.phase3 <- function(dirpath, index = NULL, ...,
dirpath.save = dirpath, verbose = FALSE)
dirpath.save = dirpath, big = FALSE, verbose = FALSE)
{
## PHASE 3: Sample Markov chain (MCMC). Parallelize.
## Fast: Run on scheduler.
Expand All @@ -234,6 +236,9 @@ qtlhot.phase3 <- function(dirpath, index = NULL, ...,
## Load Phase 1 computations.
load(file.path(dirpath, "Phase1.RData"))

if(big)
return(big.phase3(dirpath, index, cross.index))

## This could be done once, but it would require splitting this phase in two.
## Besides, it is quite fast.
## Read in saved BIC scores and combine into one object.
Expand Down

0 comments on commit 7578ad7

Please sign in to comment.