Skip to content

Commit

Permalink
version 2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Yanchun Bao authored and gaborcsardi committed Sep 24, 2013
1 parent a61d5b0 commit cabb2d3
Show file tree
Hide file tree
Showing 26 changed files with 6,884 additions and 61 deletions.
22 changes: 9 additions & 13 deletions DESCRIPTION
@@ -1,19 +1,15 @@
Package: enRich
Type: Package
Title: An R package for analysis of multiple ChIP-seq data
Version: 1.0
Date: 2013-05-22
Author: Yanchun Bao <yanchun.bao@brunel.ac.uk>, Veronica
Vinciotti<veronica.vinciotti@brunel.ac.uk>
Title: An R package for the analysis of multiple ChIP-seq data
Version: 2.0
Date: 2013-09-24
Author: Yanchun Bao <yanchun.bao@brunel.ac.uk>, Veronica Vinciotti<veronica.vinciotti@brunel.ac.uk>
Maintainer: Yanchun Bao <yanchun.bao@brunel.ac.uk>
Description: enRich is an R package for joint statistical modelling of
ChIP-seq data, accounting for technical/biological replicates,
multiple conditions and different ChIP efficiencies of the
individual experiments.
Depends: R(>= 2.14), parallel
Description: enRich is an R package for joint statistical modelling of ChIP-seq data, accounting for technical/biological replicates, multiple conditions and different ChIP efficiencies of the individual experiments.
Depends: R(>= 2.15.3), parallel
License: GPL (>= 2)
LazyLoad: yes
Packaged: 2013-05-31 12:00:04 UTC; masryyb
NeedsCompilation: no
Packaged: 2013-09-24 08:40:31 UTC; masryyb
NeedsCompilation: yes
Repository: CRAN
Date/Publication: 2013-05-31 14:22:44
Date/Publication: 2013-09-24 11:20:59
33 changes: 25 additions & 8 deletions MD5
@@ -1,9 +1,10 @@
73191c776a0d570e7d09da07f3b57729 *DESCRIPTION
8ccb633701f57d5020c962e5a68931ab *NAMESPACE
f550763321611ed296e47d377526862e *DESCRIPTION
1ccb6fceb6cbfb08e07a665a5417d64a *NAMESPACE
c2d161145fe5f54aaf850dd0b86b24b7 *R/FDR.R
0de35362a72a63dae57766d58329d476 *R/IPE.R
b9b79fd941de0876c76cfeefe19deac1 *R/NBlike_phi_offset.R
1e36698d0ceaf5bfe3174e2267056c9b *R/enrich.mix.R
c34bdb88566012459417c6eba19d5f8e *R/enrich.mrf.R
a8efc832733c327427778a866133bb17 *R/mix.R
b6b15cddafa3097b4c1dbc74d5a11edf *R/mix.joint.R
00f90607b7e2d8aab26eeaeeae286d40 *R/mix_offset.R
Expand All @@ -12,13 +13,29 @@ b6b15cddafa3097b4c1dbc74d5a11edf *R/mix.joint.R
00f90607b7e2d8aab26eeaeeae286d40 *R/mixfit_offset.R
257d3aaf3ebe645a537cc452c0d2c639 *R/mloglike_offsetNB.R
cb8c5f8392eb06ee89f5df9750d90fac *R/mloglike_offsetpois.R
321515d47aa8728300cf65da57f1f40d *R/mrf.R
64c4165f1231933aed8cd2f52dea8266 *R/mrf.joint.R
c883d500bb6e6e52539a2ca5ee4cfec6 *R/mrf_rep.R
8a6cbb16641baa94acf3b9e08e134fc1 *R/mrf_single.R
446c14c29533399f452491799b8eaf3e *R/mrf_sp.R
071b86a420dbab1172e04347bd2770e4 *R/mrf_srsp.R
9cb4b5212ff81b46d91c07733506e438 *R/pprob.R
76e34f89608959cbe3c821ed057edaf0 *R/pprob_joint.R
e27bec8e0ac77f83968df29a7d10e346 *data/datalist
91637870bd91aa78972be5d2e3e937e9 *data/p300cbp.1000bp.rda
b4a38667ca28859ecb5a237bd01ec1c5 *man/FDR.Rd
0dd607094201c7fe4e65b060045af624 *man/IPE.Rd
35b21f3f1aa099551c359db4b149c89f *man/enRich-package.Rd
8a6580e5cce23f7580da332410d93852 *man/enrich.mix.Rd
28f3499c42b1c79e62b739a03ceb1d84 *man/mix.Rd
78b118a4cb8bd7f631349bc4d81d9a1c *man/mix.joint.Rd
c0b8473aa163a8b9618c5bdad4a70000 *data/p300cbp.200bp.rda
a3776276a33eb28e30013b8d6ba3c0c2 *man/FDR.Rd
5a4626a21a350f8f7b18f24ab2610eb6 *man/IPE.Rd
082f13f9fb7b1e0517559f81cca0e7cb *man/enRich-package.Rd
d9428a68482fdf4570532c02be194057 *man/enrich.mix.Rd
f9a54e73e08a7bfb28a313fc13ada98f *man/enrich.mrf.Rd
d5383d7bcbc1a3126ddd485eb238935c *man/mix.Rd
76879c00478486154284fc882f849bb7 *man/mix.joint.Rd
8825c8b0e06e94c64092caa259c691c2 *man/mrf.Rd
0e02e27d05450bae7793db4ffd696914 *man/mrf.joint.Rd
494ad59e5cb59d8c588bde57d032c170 *man/p300cbp.1000bp.Rd
73b12c77e4daa17ae3e0a3ec7810c1ce *man/p300cbp.200bp.Rd
b58c20a29a46e0bc89c97cf18d0d684f *src/MRF.c
f2132364405cdc7a2890628e3d06b988 *src/MRFrep.c
44b576b8d9f433248739c2237f2cee3b *src/MRFsp.c
15395d5253a745bd32602ede508162de *src/MRFsrsp.c
3 changes: 2 additions & 1 deletion NAMESPACE
@@ -1,2 +1,3 @@
export(enrich.mix, FDR, IPE, mix.joint, mix)
useDynLib(enRich)
export(mrf, mrf.joint, enrich.mrf, enrich.mix, FDR, IPE, mix, mix.joint)

275 changes: 275 additions & 0 deletions R/enrich.mrf.R
@@ -0,0 +1,275 @@
enrich.mrf<-function(object, analysis="joint", differential=FALSE, diff.vec=NULL, cr=0.05, crdiff=0.05)
{
## INPUT
## object: the results of mixfit if analysis="separate" or mixfit.joint if analysis="joint"
## differential: If TRUE, it will compute the posterior probability of differential binding of any two experiments or two conditions.
## diff.vec: If differential = TRUE, diff.vec must be given to show which experiments are to be used in the comparison. At the moment, this is restricted to two conditions
## (e.g. two proteins at the same time point), so the value for diff.vec should be only 0, 1, 2, where, 0 indicates the experiments are not be used in the analysis,
## 1 and 2 stand for conditions 1 and 2, respectively. diff.vec should be of the same length as the number of experiments in object.
## cr: the level of FDR for identifying the enriched regions.
## crdiff: the level of FDR for identifying the differentially bound regions.

data=object$data
datacount=as.matrix(data$count)
Nexp=ncol(datacount)
N=nrow(datacount)
method=object$method
rep.vec=object$rep.vec
p.vec=object$p.vec
joint=ifelse(length(rep.vec)+length(p.vec)>0, 1, 0)
PP=as.matrix(object$PP)
results=object$parameters
exp.label=colnames(datacount)
if (is.null(exp.label)==1)
{
exp.label=paste("Experiment", 1:Nexp, sep="")
}
X=matrix(0, N, Nexp)
colnames(X)=exp.label
diffdata=NULL
diffprob1=NULL
diffprob0=NULL
diffX=NULL
diffX1=NULL
diffX2=NULL
if (analysis=="joint"&joint==0)
stop("You are using the joint method but you provide the separate modelling results. Please set analysis='separate'", call.=FALSE)
if (analysis=="separate"&joint==1)
stop("You are using the individual models for the joint modelling results. Please set analysis='joint'", call.=FALSE)
if (analysis=="joint"&all(summary(factor(rep.vec))==1))
{
warning("No replicates used", call.=FALSE)
analysis="separate"
}
if (differential==TRUE&length(diff.vec)==0)
stop("You must give the diff.vec vector, for detecting differentially bound regions", call.=FALSE)
if (differential==TRUE&Nexp==1)
stop("You must give results of two experiments when call differentially bound regions. Please use mrf.joint function for multiple experiments", call.=FALSE)
if (differential==TRUE&any(diff.vec>2)&any(diff.vec<0))
stop("Only 0, 1, 2 can used in diff.vec: 0 means not in differential analysis, 1 means condition 1 and 2 means condition 2", call.=FALSE)
for (i in 1:Nexp)
{
temp1=FDR(1-PP[,i], cr=cr)
X[,i]=temp1$X
}
if (differential==TRUE&all(diff.vec<=2&diff.vec>=0))
{
csindex=rep(0, Nexp)
diffexp1=NULL
diffexp2=NULL
for(i in 1:Nexp)
{
if (diff.vec[i]==1)
{
diffexp1=c(diffexp1, i)
}
}
for(i in 1:Nexp)
{
if (diff.vec[i]==2)
{
diffexp2=c(diffexp2, i)
}
}
diffexp1.label=exp.label[diffexp1]
diffexp2.label=exp.label[diffexp2]
diffexp1.name="condition1"
diffexp2.name="condition2"
if (analysis=="joint")
{
if (length(rep.vec)>0)
{
rlist=factor(rep.vec)
rlevel=levels(rlist)
rlevelvalue=summary(rlist)
srlevel=subset(rlevel, rlevelvalue>1&rlevel>0)
srlevelvalue=subset(rlevelvalue, rlevelvalue>1&rlevel>0)
srindex=matrix(0, Nexp, length(srlevelvalue))
for (j in 1:length(srlevel))
{
srindex[,j]=ifelse(rlist==srlevel[j], 1, 0)
}
csindex=rowSums(srindex)
}
}
diffindex=ifelse(diff.vec==1, -1, 0)+ifelse(diff.vec==2, 1, 0)
diff1px1=rep(1, N)
diff1px0=rep(1, N)
diff2px1=rep(1, N)
diff2px0=rep(1, N)
if (sum(diffindex*csindex<0)>1)
{
diffexp1rep=NULL
for (i in 1:Nexp)
{
if(diffindex[i]*csindex[i]==-1)
{
diffexp1rep=c(diffexp1rep,i)
}
}
temp=PP[,diffexp1rep[1]]
diff1px1=diff1px1*temp
diff1px0=diff1px0*(1-temp)
for (i in 1:Nexp)
{
if(diffindex[i]*csindex[i]>=0&diffindex[i]==-1)
{
temp=PP[,i]
diff1px1=diff1px1*temp
diff1px0=diff1px0*(1-temp)
}
}
}else
{
for (i in 1:Nexp)
{
if (diffindex[i]==-1)
{
temp=PP[,i]
diff1px1=diff1px1*temp
diff1px0=diff1px0*(1-temp)
}
}
}
if (sum(diffindex*csindex>0)>1)
{
diffexp2rep=NULL
for (i in 1:Nexp)
{
if(diffindex[i]*csindex[i]==1)
{
diffexp2rep=c(diffexp2rep, i)
}
}
temp=PP[, diffexp2rep[1]]
diff2px1=diff2px1*temp
diff2px0=diff2px0*(1-temp)
for (i in 1:Nexp)
{
if(diffindex[i]*csindex[i]>=0&diffindex[i]==1)
{
temp=PP[,i]
diff2px1=diff2px1*temp
diff2px0=diff2px0*(1-temp)
}
}
}else
{
for (i in 1:Nexp)
{
if (diffindex[i]==1)
{
temp=PP[,i]
diff2px1=diff2px1*temp
diff2px0=diff2px0*(1-temp)
}
}
}
diffprob1=diff1px1*diff2px0+diff1px0*diff2px1
diffprob0=1-diffprob1
temp1<-FDR(diffprob0, cr=crdiff)
diffX<-temp1$X
diffX1<-ifelse(diffX==1&diff1px1>diff2px1, 1, 0)
diffX2<-ifelse(diffX==1&diff1px1<diff2px1, 1, 0)
diffX1=as.matrix(diffX1)
colnames(diffX1)=diffexp1.name
diffX2=as.matrix(diffX2)
colnames(diffX2)=diffexp2.name
}

## enrich and differential bound regions
nenrich=colSums(X)
enrich<-list()
diffenrich1<-NULL
diffenrich2<-NULL
cat ("***************Enrich regions**************", "\n")
cat ("There are", Nexp, "experiments.", '\n')
cat ("The number of enriched regions for these experiments are:", '\n')
rlist=factor(rep.vec)
rlevel=levels(rlist)
rlevelvalue=summary(rlist)
srlevel=subset(rlevel, rlevelvalue>1&rlevel>0)
srlevelvalue=subset(rlevelvalue, rlevelvalue>1&rlevel>0)
srindex=matrix(0, Nexp, length(srlevelvalue))
flist=factor(p.vec)
level=levels(flist)
levelvalue=summary(flist)
splevel=subset(level, levelvalue>1)
splevelvalue=subset(levelvalue, levelvalue>1)
spindex=matrix(0, Nexp, length(splevelvalue))
if (joint==1&length(splevel)>0)
{
for (i in 1:length(splevel))
{
spindex[,i]=ifelse(flist==splevel[i], 1, 0)
spcode=which(spindex[,i]==1)
templabel=exp.label[spcode]
cat("Note: in previous analysis", templabel, "are assumed have same p", '\n')
}
}
if(joint==1&length(srlevel)>0)
{
for (i in 1:length(srlevel))
{
srindex[,i]=ifelse(rlist==srlevel[i], 1, 0)
jointcode=which(srindex[,i]==1)
templabel=exp.label[jointcode]
tempnenrich=nenrich[jointcode][1]
if(length(jointcode)==2)
{
templabel=paste(templabel[1], "and", templabel[2], sep=" ")
}
cat(templabel, " are replicates and they have", tempnenrich, "jointly identified enriched regions at FDR=", cr, '\n')
temp=subset(cbind(data$region, data$count[,jointcode], PP[,jointcode[1]]), X[,jointcode[1]]==1)
colnames(temp)=c(colnames(data$region), exp.label[jointcode], "poster.prob")
temp=temp[order(temp[,1], temp[,2]),]
enrich[[i]]=temp
}
sepcode=which(rowSums(srindex)==0)
if (length(sepcode)>0)
{
for (j in 1:length(sepcode))
{
i=sepcode[j]
cat (exp.label[i], "has", nenrich[i], "enriched regions at FDR=", cr, '\n')
temp=subset(cbind(data$region, data$count[,i], PP[,i]), X[,i]==1)
colnames(temp)=c(colnames(data$region), exp.label[i], "poster.prob")
temp=temp[order(temp[,1], temp[,2]),]
enrich[[j+length(srlevel)]]=temp
}
}
}else
{
for (i in 1:Nexp)
{
cat (exp.label[i], "has", nenrich[i], "enriched regions at FDR=", cr, '\n')
temp=subset(cbind(data$region, data$count[,i], PP[,i]), X[,i]==1)
colnames(temp)=c(colnames(data$region), exp.label[i], "poster.prob")
temp=temp[order(temp[,1], temp[,2]),]
enrich[[i]]=temp
}
}
if (length(diffX1)>0)
{
cat ("***************Differential bound regions**************", "\n")
cat ("Condition 1 uses data of", diffexp1.label, '\n')
cat ("Condition 2 uses data of", diffexp2.label, '\n')
cat ("The number of regions bound only by condition 1 at FDR=", crdiff, " is:", sum(diffX1), '\n')
cat ("The number of regions bound only by condition 2 at FDR=", crdiff, " is:", sum(diffX2), '\n')
diffenrich1<-subset(cbind(data$region, data$count[,diffexp1], diffprob1), diffX1==1)
colnames(diffenrich1)=c(colnames(data$region), exp.label[diffexp1], "poster.prob.diff")
diffenrich2<-subset(cbind(data$region, data$count[,diffexp2], diffprob1), diffX2==1)
colnames(diffenrich2)=c(colnames(data$region), exp.label[diffexp2], "poster.prob.diff")
}

## Output list
## enrich: the list of enrich regions.
## diffenrich1 and diffenrich2: the differential bound regions of the first and second conditions, respectively.
## X: n x p matrix of index of enrichment for each experiment (at the FDR cutoff), X=1 : enriched region, X=0 : background region
## IPE: p-dimensional vector of IP efficiency values for the p experiments.
## diffprob1: n-dimensional vector of posterior probabilities P(X_1 \ne X_2) for the two conditions under study; diffprob0=1-diffprob1
## diffX1: n-dimensional vector of index of differential bound regions for condition 1, diffX2 is the index of differential bound regions for condition 2.

object<-list(enrich=enrich, diffenrich1=diffenrich1, diffenrich2=diffenrich2, X=X, diffprob1=diffprob1, diffprob0=diffprob0, diffX1=diffX1, diffX2=diffX2)
return(object)
}

0 comments on commit cabb2d3

Please sign in to comment.