-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 07e0335
Showing
14 changed files
with
610 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
Package: qch | ||
Title: Query Composed Hypotheses | ||
Version: 1.0.0 | ||
Authors@R: | ||
person(given = "Tristan", | ||
family = "Mary-Huard", | ||
role = c("aut", "cre"), | ||
email = "tristan.mary-huard@agroparistech.fr", | ||
comment = c(ORCID = "0000-0002-3839-9067")) | ||
Description: Provides functions for the joint analysis of K sets | ||
of p-values obtained for a same list of items. This joint analysis is | ||
performed by querying a composed hypothesis, i.e. an arbitrary complex | ||
combination of simple hypotheses, as described in Mary-Huard et al. | ||
(2021) <arXiv:2104.14601>. The null distribution corresponding to the | ||
composed hypothesis of interest is obtained by fitting non-parametric | ||
mixtures models (one for each of the simple hypothesis of the complex | ||
combination). Type I error rate control is achieved through Bayesian | ||
False Discovery Rate control. The 3 main functions of the package | ||
GetHinfo(), qch.fit() and qch.test() correspond to the 3 steps for | ||
querying a composed hypothesis (composed H0/H1 formulation, inferring | ||
the null distribution and testing the null hypothesis). | ||
License: GPL-3 | ||
Depends: R (>= 2.10) | ||
Imports: graphics, ks, mclust, stats | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 7.1.1 | ||
NeedsCompilation: no | ||
Packaged: 2021-05-06 13:12:40 UTC; dsi | ||
Author: Tristan Mary-Huard [aut, cre] (<https://orcid.org/0000-0002-3839-9067>) | ||
Maintainer: Tristan Mary-Huard <tristan.mary-huard@agroparistech.fr> | ||
Repository: CRAN | ||
Date/Publication: 2021-05-07 12:10:02 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
eb74fd3746987b7711e8df9fae579209 *DESCRIPTION | ||
ba32463f4647e688dce1c43b869478bb *NAMESPACE | ||
1c242249e24450adfeb40c576d75b6b2 *R/Data.R | ||
d070e8e6490908159614a1268da8a11f *R/QCH_functions.R | ||
e0ef681901465d24f47f9139cb2c086a *R/qch-package.R | ||
aacbfe98ddfb739904382b9ad8e98aba *data/PvalSets.rda | ||
eac1213d953962608dd8a2bde7b23de5 *man/FastKerFdr.Rd | ||
6b4cfbe8331db12171abd96a6a00a4c2 *man/GetHinfo.Rd | ||
c7fa58a9ea95bd1e2929f3d0826b0b49 *man/GetHinfoEqual.Rd | ||
ecc1afc03d4c3aa8ff6541d20c35314f *man/PvalSets.Rd | ||
57d5aeceb926b756bf1fc00d86e45f03 *man/qch-package.Rd | ||
97de6d91273ddaab3d3a171626c2fc58 *man/qch.fit.Rd | ||
625b99d1355f866102a481f4c944f26f *man/qch.test.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(GetHinfo) | ||
export(GetHinfoEqual) | ||
export(qch.fit) | ||
export(qch.test) | ||
import(graphics) | ||
import(ks) | ||
import(mclust) | ||
import(stats) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
|
||
#' Synthetic example to illustrate the main qch functions | ||
#' | ||
#' PvalSets is a data.frame with 10,000 rows and 3 columns. Each row corresponds to an item, | ||
#' columns 'Pval1' and 'Pval2' each correspond to a test serie over the items, and column 'Class' | ||
#' provides the truth, i.e. if item \eqn{i} belongs to class 1 then the H0 hypothesis is true for the 2 tests, | ||
#' if item \eqn{i} belongs to class 2 (resp. 3) then the H0 hypothesis is true for the first (resp. second) | ||
#' test only, and if item \eqn{i} belongs to class 4 then both H0 hypotheses are false (for the first | ||
#' and the second test). | ||
#' | ||
#' @format A data.frame | ||
#' | ||
"PvalSets" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,319 @@ | ||
################################################################### | ||
#' Generate H0/H1 configurations and specify the ones corresponding to the composed H1 | ||
#' | ||
#' @param Q number of test series to be combined | ||
#' @param AtLeast How many H1 hypotheses at least for the item to be of interest ? | ||
#' @param Consecutive Should the significant test series be consecutive ? Default=FALSE | ||
#' | ||
#' @export | ||
#' @return A list of two objects 'Hconfig' and 'Hconfig.H1'. | ||
#' Hconfig is the list of all possible combination of H0 and H1 hypotheses among Q hypotheses tested. | ||
#' Hconfig.H1 is the vector of components of Hconfig that correspond to the 'AtLeast' specification. | ||
#' | ||
#' @examples | ||
#' GetHinfo(4,2) | ||
#' | ||
#' @seealso [GetHinfoEqual()] | ||
GetHinfo <- function(Q,AtLeast,Consecutive=FALSE){ | ||
|
||
## Build H configurations | ||
Hconfig <- as.matrix(expand.grid(lapply(1:Q, function(q) 0:1))) | ||
Hconfig <- split(Hconfig, seq(2^Q)) | ||
|
||
## Find the ones that match H1 | ||
if (!Consecutive){ | ||
MatchingH1 <- sapply(Hconfig, function(h){sum(h)>=AtLeast}) | ||
Hconfig.H1 <- which(MatchingH1) | ||
names(Hconfig.H1) <- NULL | ||
} else { | ||
Consec <- paste(rep(1,AtLeast),collapse='') | ||
Hconcat <- sapply(Hconfig, function(hh){paste(hh,collapse='')}) | ||
Hconfig.H1 <- grep(pattern = Consec,x = Hconcat) | ||
} | ||
|
||
## Collect results | ||
return(list(Hconfig=Hconfig,Hconfig.H1=Hconfig.H1)) | ||
|
||
} | ||
|
||
|
||
################################################################### | ||
#' Generate H0/H1 configurations and specify the ones corresponding to the composed H1 | ||
#' | ||
#' @param Q number of test series to be combined | ||
#' @param Equal How many H1 hypotheses exactly for the item to be of interest ? | ||
#' @param Consecutive Should the significant test series be consecutive ? Default=FALSE | ||
#' | ||
#' @export | ||
#' @return A list of two objects 'Hconfig' and 'Hconfig.H1'. | ||
#' Hconfig is the list of all possible combination of H0 and H1 hypotheses among Q hypotheses tested. | ||
#' Hconfig.H1 is the vector of components of Hconfig that correspond to the 'Equal' specification. | ||
#' | ||
#' @examples | ||
#' GetHinfoEqual(4,2) | ||
#' @seealso [GetHinfo()] | ||
GetHinfoEqual <- function(Q,Equal,Consecutive=FALSE){ | ||
|
||
## Build H configurations | ||
Hconfig <- as.matrix(expand.grid(lapply(1:Q, function(q) 0:1))) | ||
Hconfig <- split(Hconfig, seq(2^Q)) | ||
|
||
## Find the ones that match H1 | ||
if (!Consecutive){ | ||
MatchingH1 <- sapply(Hconfig, function(h){sum(h)==Equal}) | ||
Hconfig.H1 <- which(MatchingH1) | ||
names(Hconfig.H1) <- NULL | ||
} else { | ||
Consec <- paste(rep(1,Equal),collapse='') | ||
ConsecP1 <- paste(rep(1,Equal+1),collapse='') | ||
Hconcat <- sapply(Hconfig, function(hh){paste(hh,collapse='')}) | ||
Hconfig.H1 <- intersect(grep(pattern = Consec,x = Hconcat),which(sapply(Hconfig, function(h){sum(h)==Equal}))) | ||
} | ||
|
||
## Collect results | ||
return(list(Hconfig=Hconfig,Hconfig.H1=Hconfig.H1)) | ||
|
||
} | ||
|
||
|
||
|
||
################################################################### | ||
#' FastKerFdr | ||
#' | ||
#' @param Pval a vector of p-values (corresponding to a p-value serie) | ||
#' @param p0 a priori proportion of H0 hypotheses | ||
#' @param plotting boolean, should some diagnostic graphs be plotted. Default is FALSE. | ||
#' @param NbKnot The (maximum) number of knot for the kde procedure. Default is 1e5 | ||
#' @param tol a tolerance value for convergence. Default is 1e-5 | ||
#' @import ks mclust graphics stats | ||
#' | ||
#' @return A list of 3 objects. Object p0 is an estimate of the proportion of H0 hypotheses., | ||
#' tau is the vector of H1 posteriors. | ||
#' f1 is a numeric vector, each coordinate i corresponding to the evaluation of the H1 density at point pi, where pi is the ith p-value in Pval. | ||
FastKerFdr <- function(Pval,p0=NULL,plotting=FALSE,NbKnot=1e5,tol = 1e-5){ | ||
|
||
## Transform pvalues into N(0,1) quantiles | ||
n = length(Pval) | ||
X = -qnorm(Pval) | ||
|
||
## Get a p0 estimate | ||
if(is.null(p0)){ | ||
p0 = min(2*sum(X < 0)/n,1-1/n); | ||
} | ||
p1 = 1 - p0 | ||
|
||
## Knots, counts and initialization (using Mclust) | ||
if(length(X)>NbKnot){ | ||
Hist = hist(X, breaks=NbKnot, plot=FALSE) | ||
Knots = Hist$mids; ActualNbKnot = length(Knots); Counts = Hist$counts; | ||
Xsample = sample(X, NbKnot) | ||
GM = mclust::Mclust(Xsample, G=3, modelNames='E'); | ||
mu = max(GM$parameters$mean) | ||
} else { | ||
Knots = X; ActualNbKnot = length(X); Counts = rep(1,ActualNbKnot); | ||
GM = mclust::Mclust(X, G=3, modelNames='E'); | ||
mu = max(GM$parameters$mean) | ||
} | ||
if (plotting){ | ||
Order <- order(Knots) | ||
Knots <- Knots[Order] | ||
Counts <- Counts[Order] | ||
} | ||
|
||
## Initialize the taus using GM | ||
phi = dnorm(Knots); f1 = dnorm(Knots, mean=mu, sd=1) | ||
tau = p1*f1/(p0*phi + p1*f1) | ||
|
||
## Get the weighted kernel density estimate | ||
diff = 2*tol; iter = 0 | ||
while(diff > tol){ | ||
iter = iter + 1 | ||
weights = tau*Counts; weights = ActualNbKnot * weights / sum(weights) | ||
f1 = ks::kde(x=Knots, w=weights, eval.points=Knots)$estimate | ||
tauNew = p1*f1/(p0*phi + p1*f1) | ||
## Dirty job 1: get rid of the f1 mass on the left | ||
tauNew[Knots< -3] <- 0 | ||
diff = max(abs(tau - tauNew)) | ||
tau = tauNew | ||
} | ||
if(plotting){ | ||
Hist.fig <- hist(X, freq=TRUE, breaks=sqrt(n), main='', border=8, | ||
xlab="Q-transformed pvalues", ylab="Densities") | ||
bin.width <- mean(diff(Hist.fig$breaks)) | ||
lines(Knots, n*bin.width*p0*phi, type='l', col=4, lwd=2); | ||
lines(Knots, n*bin.width*p1*f1, col=2,lwd=2); | ||
lines(Knots, n*bin.width*(p0*phi+p1*f1), lwd=2) | ||
legend("topright", legend=c("H0 dist", "H1 dist","Mixture Dist"), | ||
col=c("blue","red", "black"), lty=c(1,1,2), cex=0.8) | ||
} | ||
|
||
## Now get the f1 estimate | ||
KDE = ks::kde(x=Knots, w=weights, eval.points=X) | ||
f1 = KDE$estimate | ||
|
||
## Dirty job 2: get rid of numeric problems | ||
f1[f1<0] <- 1e-30 | ||
tau = p1*f1 / (p1*f1 + p0*dnorm(X)) | ||
|
||
return(list(p0=p0,tau=tau,f1=f1)) | ||
} | ||
|
||
|
||
################################################################### | ||
#' Infer Hconfig posteriors | ||
#' | ||
#' @param pValMat a matrix of p-values, each column corresponding to a p-value serie. | ||
#' @param Hconfig an Hconfig list as generated by the [GetHinfo()] function. | ||
#' @param plotting a boolean. Should some diagnostic graphs be plotted ? Default is FALSE. | ||
#' @import stats | ||
#' | ||
#' @return A list of 2 objects 'prior' and 'posterior'. | ||
#' Object 'prior' is a vector of estimated prior probabilities for each of the H-configurations. | ||
#' Object 'posterior' is a matrix providing for each item (in row) its posterior probability to belong to each of the H-configurations (in columns). | ||
#' @export | ||
#' | ||
#' @examples | ||
#' data(PvalSets) | ||
#' PvalMat <- as.matrix(PvalSets[,-3]) | ||
#' ## Build the Hconfig objects | ||
#' Q <- 2 | ||
#' AtLeast <- 2 | ||
#' Hconfig <- GetHinfo(Q,AtLeast)$Hconfig | ||
#' | ||
#' ## Run the function | ||
#' res.fit <- qch.fit(PvalMat,Hconfig) | ||
#' | ||
#' ## Display the prior of each class of items | ||
#' res.fit$prior | ||
#' | ||
#' ## Display the first posteriors | ||
#' head(res.fit$posterior) | ||
qch.fit <- function(pValMat,Hconfig, plotting=FALSE){ | ||
|
||
n <- nrow(pValMat) | ||
Q <- ncol(pValMat) | ||
|
||
#### Step 1: Marginal density estimation | ||
|
||
## Get p0 estimates | ||
p0 <- rep(0, Q) | ||
for (q in 1:Q){ | ||
p0[q] = min(2*sum(pValMat[,q] > 0.5)/n,1-1/n); | ||
} | ||
SomeH1 <- which(p0<1) | ||
NoH1 <- which(p0==1-1/n); | ||
if(length(NoH1)==1){ | ||
message(paste("Pvalue serie",NoH1, "may have very few H1 (or a weird distribution)")) | ||
} | ||
if(length(NoH1)>1){ | ||
message(paste("Pvalue series",paste(NoH1,collapse=' '), "may have very few H1 (or a weird distribution)")) | ||
} | ||
|
||
## Fit a 2-component mixture to each test serie using kerFdr | ||
f1Mat <- matrix(1, n, Q); | ||
for(q in SomeH1){ | ||
ker <- FastKerFdr(pValMat[, q], p0=p0[q], plotting=FALSE) | ||
f1Mat[,q] <- ker$f1 | ||
} | ||
f0Mat <- matrix(dnorm(-qnorm(pValMat)),ncol=Q) | ||
|
||
#### Step 2: transform marginal densities into config densities | ||
|
||
Logf0Mat <- log(f0Mat); | ||
Logf1Mat <- log(f1Mat); | ||
f.Hconfig <- sapply(Hconfig, function(h){ | ||
f <- rep(0,nrow(Logf0Mat)) | ||
if (length(which(h==1)) > 0){f <- f + rowSums(Logf1Mat[, which(h==1), drop=FALSE])} | ||
if (length(which(h==0)) > 0){f <- f + rowSums(Logf0Mat[, which(h==0), drop=FALSE])} | ||
return(exp(f)) | ||
}) | ||
|
||
#### Step 3: Infer prior estimation using an EM procedure | ||
|
||
## Initialization: simple product of marginal priors estimator | ||
NewPrior <- sapply(1:length(Hconfig), function(c){ | ||
prod(p0[which(Hconfig[[c]]==0)]) * prod(1-p0[which(Hconfig[[c]]==1)]) | ||
}) | ||
PriorsAt0 <- which(NewPrior==0) | ||
|
||
## EM calibration | ||
NotOK <- TRUE | ||
Precision <- 1e-6 | ||
NoLowerThan <- 1e-7 | ||
while(NotOK){ | ||
|
||
## E step | ||
Tau <- f.Hconfig*(tcrossprod(rep(1:n),NewPrior)) | ||
Tau <- Tau/rowSums(Tau) | ||
|
||
## M step | ||
OldPrior <- NewPrior | ||
NewPrior <- colMeans(Tau) | ||
if(length(PriorsAt0)==0){ | ||
NewPrior[NewPrior<NoLowerThan] <- NoLowerThan | ||
} else { | ||
NoLowerCoord <- setdiff(which(NewPrior<NoLowerThan),PriorsAt0) | ||
if(length(NoLowerCoord)>0){ | ||
NewPrior[NoLowerCoord] <- NoLowerThan | ||
} | ||
} | ||
NewPrior <- NewPrior/sum(NewPrior) | ||
NotOK <- max((OldPrior-NewPrior)^2) > Precision | ||
|
||
} | ||
priorHconfigEM <- NewPrior | ||
|
||
#### Step 4: Posterior computation | ||
posterior <- f.Hconfig*(tcrossprod(rep(1:n),priorHconfigEM)) | ||
posterior <- posterior/rowSums(posterior) | ||
|
||
#### Last but not least: output results | ||
Res <- list(prior=priorHconfigEM, posterior=posterior) | ||
return(Res) | ||
} | ||
|
||
|
||
################################################################### | ||
#' Perform composed hypothesis testing with FDR control | ||
#' | ||
#' @param posterior a matrix of posterior probabilities for each item to belong the different H-configurations, as provided by the [qch.fit()] function. | ||
#' @param Hconfig.H1 a list of H1 config, as created by the [GetHinfo()] function. | ||
#' @param Alpha the nominal Type I error rate for FDR control. | ||
#' | ||
#' @return A list of 2 objects 'Rejection' and 'lFDR'. | ||
#' Object 'Rejection' is a vector providing for each item the result of the composed hypothesis test, after multiple testing correction. | ||
#' Object 'lFDR' is a vector providing for each item its local FDR estimate. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' data(PvalSets) | ||
#' PvalMat <- as.matrix(PvalSets[,-3]) | ||
#' Truth <- PvalSets[,3] | ||
#' | ||
#' ## Build the Hconfig objects | ||
#' Q <- 2 | ||
#' AtLeast <- 2 | ||
#' Hconfig <- GetHinfo(Q,AtLeast)$Hconfig | ||
#' Hconfig.H1 <- GetHinfo(Q,AtLeast)$Hconfig.H1 | ||
#' | ||
#' ## Infer the posteriors | ||
#' res.fit <- qch.fit(PvalMat,Hconfig) | ||
#' | ||
#' ## Run the test procedure with FDR control | ||
#' res.test <- qch.test(res.fit$posterior,Hconfig.H1) | ||
#' table(res.test$Rejection,Truth==4) | ||
qch.test <- function(posterior,Hconfig.H1,Alpha=0.05){ | ||
|
||
n <- nrow(posterior) | ||
localFDR <- 1-rowSums(posterior[,Hconfig.H1,drop=FALSE]) | ||
Order <- order(localFDR) | ||
FDR <- cumsum(localFDR[Order])/(1:n) | ||
NbReject <- max(which(FDR<=Alpha)) | ||
Rejection <- rep(0,n) | ||
if (NbReject>0){ | ||
Rejection[Order[1:NbReject]] <- 1 | ||
} | ||
return(list(Rejection=Rejection,lFDR=localFDR)) | ||
|
||
} | ||
|
Oops, something went wrong.