version 2.0

cran · Sep 24, 2013 · cabb2d3 · cabb2d3
1 parent a61d5b0
commit cabb2d3
Show file tree

Hide file tree

Showing 26 changed files with 6,884 additions and 61 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,19 +1,15 @@
 Package: enRich
 Type: Package
-Title: An R package for analysis of multiple ChIP-seq data
-Version: 1.0
-Date: 2013-05-22
-Author: Yanchun Bao <yanchun.bao@brunel.ac.uk>, Veronica
-        Vinciotti<veronica.vinciotti@brunel.ac.uk>
+Title: An R package for the analysis of multiple ChIP-seq data
+Version: 2.0
+Date: 2013-09-24
+Author: Yanchun Bao <yanchun.bao@brunel.ac.uk>, Veronica Vinciotti<veronica.vinciotti@brunel.ac.uk>
 Maintainer: Yanchun Bao <yanchun.bao@brunel.ac.uk>
-Description: enRich is an R package for joint statistical modelling of
-        ChIP-seq data, accounting for technical/biological replicates,
-        multiple conditions and different ChIP efficiencies of the
-        individual experiments.
-Depends: R(>= 2.14), parallel
+Description: enRich is an R package for joint statistical modelling of ChIP-seq data, accounting for technical/biological replicates, multiple conditions and different ChIP efficiencies of the individual experiments.
+Depends: R(>= 2.15.3), parallel
 License: GPL (>= 2)
 LazyLoad: yes
-Packaged: 2013-05-31 12:00:04 UTC; masryyb
-NeedsCompilation: no
+Packaged: 2013-09-24 08:40:31 UTC; masryyb
+NeedsCompilation: yes
 Repository: CRAN
-Date/Publication: 2013-05-31 14:22:44
+Date/Publication: 2013-09-24 11:20:59
diff --git a/MD5 b/MD5
@@ -1,9 +1,10 @@
-73191c776a0d570e7d09da07f3b57729 *DESCRIPTION
-8ccb633701f57d5020c962e5a68931ab *NAMESPACE
+f550763321611ed296e47d377526862e *DESCRIPTION
+1ccb6fceb6cbfb08e07a665a5417d64a *NAMESPACE
 c2d161145fe5f54aaf850dd0b86b24b7 *R/FDR.R
 0de35362a72a63dae57766d58329d476 *R/IPE.R
 b9b79fd941de0876c76cfeefe19deac1 *R/NBlike_phi_offset.R
 1e36698d0ceaf5bfe3174e2267056c9b *R/enrich.mix.R
+c34bdb88566012459417c6eba19d5f8e *R/enrich.mrf.R
 a8efc832733c327427778a866133bb17 *R/mix.R
 b6b15cddafa3097b4c1dbc74d5a11edf *R/mix.joint.R
 00f90607b7e2d8aab26eeaeeae286d40 *R/mix_offset.R
@@ -12,13 +13,29 @@ b6b15cddafa3097b4c1dbc74d5a11edf *R/mix.joint.R
 00f90607b7e2d8aab26eeaeeae286d40 *R/mixfit_offset.R
 257d3aaf3ebe645a537cc452c0d2c639 *R/mloglike_offsetNB.R
 cb8c5f8392eb06ee89f5df9750d90fac *R/mloglike_offsetpois.R
+321515d47aa8728300cf65da57f1f40d *R/mrf.R
+64c4165f1231933aed8cd2f52dea8266 *R/mrf.joint.R
+c883d500bb6e6e52539a2ca5ee4cfec6 *R/mrf_rep.R
+8a6cbb16641baa94acf3b9e08e134fc1 *R/mrf_single.R
+446c14c29533399f452491799b8eaf3e *R/mrf_sp.R
+071b86a420dbab1172e04347bd2770e4 *R/mrf_srsp.R
 9cb4b5212ff81b46d91c07733506e438 *R/pprob.R
 76e34f89608959cbe3c821ed057edaf0 *R/pprob_joint.R
+e27bec8e0ac77f83968df29a7d10e346 *data/datalist
 91637870bd91aa78972be5d2e3e937e9 *data/p300cbp.1000bp.rda
-b4a38667ca28859ecb5a237bd01ec1c5 *man/FDR.Rd
-0dd607094201c7fe4e65b060045af624 *man/IPE.Rd
-35b21f3f1aa099551c359db4b149c89f *man/enRich-package.Rd
-8a6580e5cce23f7580da332410d93852 *man/enrich.mix.Rd
-28f3499c42b1c79e62b739a03ceb1d84 *man/mix.Rd
-78b118a4cb8bd7f631349bc4d81d9a1c *man/mix.joint.Rd
+c0b8473aa163a8b9618c5bdad4a70000 *data/p300cbp.200bp.rda
+a3776276a33eb28e30013b8d6ba3c0c2 *man/FDR.Rd
+5a4626a21a350f8f7b18f24ab2610eb6 *man/IPE.Rd
+082f13f9fb7b1e0517559f81cca0e7cb *man/enRich-package.Rd
+d9428a68482fdf4570532c02be194057 *man/enrich.mix.Rd
+f9a54e73e08a7bfb28a313fc13ada98f *man/enrich.mrf.Rd
+d5383d7bcbc1a3126ddd485eb238935c *man/mix.Rd
+76879c00478486154284fc882f849bb7 *man/mix.joint.Rd
+8825c8b0e06e94c64092caa259c691c2 *man/mrf.Rd
+0e02e27d05450bae7793db4ffd696914 *man/mrf.joint.Rd
 494ad59e5cb59d8c588bde57d032c170 *man/p300cbp.1000bp.Rd
+73b12c77e4daa17ae3e0a3ec7810c1ce *man/p300cbp.200bp.Rd
+b58c20a29a46e0bc89c97cf18d0d684f *src/MRF.c
+f2132364405cdc7a2890628e3d06b988 *src/MRFrep.c
+44b576b8d9f433248739c2237f2cee3b *src/MRFsp.c
+15395d5253a745bd32602ede508162de *src/MRFsrsp.c
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,2 +1,3 @@
-export(enrich.mix, FDR, IPE, mix.joint, mix)
+useDynLib(enRich)
+export(mrf, mrf.joint, enrich.mrf, enrich.mix, FDR, IPE, mix, mix.joint)
 
diff --git a/R/enrich.mrf.R b/R/enrich.mrf.R
@@ -0,0 +1,275 @@
+enrich.mrf<-function(object, analysis="joint", differential=FALSE, diff.vec=NULL, cr=0.05, crdiff=0.05)
+{
+## INPUT
+## object: the results of  mixfit if analysis="separate" or mixfit.joint if analysis="joint"
+## differential: If TRUE, it will compute the posterior probability of differential binding of any two experiments or two conditions. 
+## diff.vec: If differential = TRUE, diff.vec must be given to show which experiments are to be used in the comparison. At the moment, this is restricted to two conditions
+## (e.g. two proteins at the same time point), so the value for diff.vec should be only 0, 1, 2, where, 0 indicates the experiments are not be used in the analysis, 
+## 1 and 2 stand for conditions 1 and 2, respectively. diff.vec should be of the same length as the number of experiments in object.
+## cr: the level of FDR for identifying the enriched regions.
+## crdiff: the level of FDR for identifying the differentially bound regions.
+
+	data=object$data
+	datacount=as.matrix(data$count)
+	Nexp=ncol(datacount)
+	N=nrow(datacount)
+	method=object$method
+	rep.vec=object$rep.vec
+	p.vec=object$p.vec
+	joint=ifelse(length(rep.vec)+length(p.vec)>0, 1, 0)
+	PP=as.matrix(object$PP)
+	results=object$parameters
+	exp.label=colnames(datacount)
+	if (is.null(exp.label)==1)
+	{	
+		exp.label=paste("Experiment", 1:Nexp, sep="")
+	}
+	X=matrix(0, N, Nexp)
+	colnames(X)=exp.label
+	diffdata=NULL	
+	diffprob1=NULL
+	diffprob0=NULL
+	diffX=NULL
+	diffX1=NULL
+	diffX2=NULL
+	if (analysis=="joint"&joint==0)
+		stop("You are using the joint method but you provide the separate modelling results. Please set analysis='separate'", call.=FALSE)
+	if (analysis=="separate"&joint==1)
+		stop("You are using the individual models for the joint modelling results. Please set analysis='joint'", call.=FALSE)
+	if (analysis=="joint"&all(summary(factor(rep.vec))==1))
+	{
+		warning("No replicates used", call.=FALSE)
+		analysis="separate"
+	}		
+	if (differential==TRUE&length(diff.vec)==0)
+		stop("You must give the diff.vec vector, for detecting differentially bound regions", call.=FALSE)
+	if (differential==TRUE&Nexp==1)
+		stop("You must give results of two experiments when call differentially bound regions. Please use mrf.joint function for multiple experiments", call.=FALSE)
+	if (differential==TRUE&any(diff.vec>2)&any(diff.vec<0))
+		stop("Only 0, 1, 2 can used in diff.vec: 0 means not in differential analysis, 1 means condition 1 and 2 means condition 2", call.=FALSE)
+	for (i in 1:Nexp)
+	{
+		temp1=FDR(1-PP[,i], cr=cr)
+		X[,i]=temp1$X 
+	}	
+	if (differential==TRUE&all(diff.vec<=2&diff.vec>=0))
+	{
+		csindex=rep(0, Nexp)
+		diffexp1=NULL
+		diffexp2=NULL
+		for(i in 1:Nexp)
+		{
+			if (diff.vec[i]==1)
+			{
+				diffexp1=c(diffexp1, i)
+			}
+		}
+		for(i in 1:Nexp)
+		{
+			if (diff.vec[i]==2)
+			{
+				diffexp2=c(diffexp2, i)
+			}
+		}
+		diffexp1.label=exp.label[diffexp1]
+		diffexp2.label=exp.label[diffexp2]
+		diffexp1.name="condition1"
+		diffexp2.name="condition2"
+		if (analysis=="joint")
+		{
+			if (length(rep.vec)>0)
+			{
+				rlist=factor(rep.vec)
+				rlevel=levels(rlist)
+				rlevelvalue=summary(rlist)
+				srlevel=subset(rlevel, rlevelvalue>1&rlevel>0)
+				srlevelvalue=subset(rlevelvalue, rlevelvalue>1&rlevel>0)
+				srindex=matrix(0, Nexp, length(srlevelvalue))
+				for (j in 1:length(srlevel))
+				{
+					srindex[,j]=ifelse(rlist==srlevel[j], 1, 0)
+				}			
+				csindex=rowSums(srindex)
+			}
+		}
+		diffindex=ifelse(diff.vec==1, -1, 0)+ifelse(diff.vec==2, 1, 0)
+		diff1px1=rep(1, N)
+		diff1px0=rep(1, N)
+		diff2px1=rep(1, N)
+		diff2px0=rep(1, N)
+		if (sum(diffindex*csindex<0)>1)
+		{
+			diffexp1rep=NULL
+			for (i in 1:Nexp)
+			{
+				if(diffindex[i]*csindex[i]==-1)
+				{
+					diffexp1rep=c(diffexp1rep,i)
+				}				
+			}
+			temp=PP[,diffexp1rep[1]]
+			diff1px1=diff1px1*temp
+			diff1px0=diff1px0*(1-temp)
+			for (i in 1:Nexp)
+			{
+				if(diffindex[i]*csindex[i]>=0&diffindex[i]==-1)
+				{
+					temp=PP[,i]
+					diff1px1=diff1px1*temp
+					diff1px0=diff1px0*(1-temp)
+				}
+			}
+		}else
+		{
+			for (i in 1:Nexp)
+			{
+				if (diffindex[i]==-1)
+				{
+					temp=PP[,i]
+					diff1px1=diff1px1*temp
+					diff1px0=diff1px0*(1-temp)
+				}
+			}
+		}
+		if (sum(diffindex*csindex>0)>1)
+		{
+			diffexp2rep=NULL
+			for (i in 1:Nexp)
+			{
+				if(diffindex[i]*csindex[i]==1)
+				{
+					diffexp2rep=c(diffexp2rep, i)
+				}				
+			}
+			temp=PP[, diffexp2rep[1]]
+			diff2px1=diff2px1*temp
+			diff2px0=diff2px0*(1-temp)
+			for (i in 1:Nexp)
+			{
+				if(diffindex[i]*csindex[i]>=0&diffindex[i]==1)
+				{
+					temp=PP[,i]
+					diff2px1=diff2px1*temp
+					diff2px0=diff2px0*(1-temp)
+				}
+			}
+		}else
+		{
+			for (i in 1:Nexp)
+			{
+				if (diffindex[i]==1)
+				{
+					temp=PP[,i]
+					diff2px1=diff2px1*temp
+					diff2px0=diff2px0*(1-temp)
+				}
+			}
+		}
+		diffprob1=diff1px1*diff2px0+diff1px0*diff2px1 
+		diffprob0=1-diffprob1
+		temp1<-FDR(diffprob0, cr=crdiff)
+		diffX<-temp1$X
+		diffX1<-ifelse(diffX==1&diff1px1>diff2px1, 1, 0)
+		diffX2<-ifelse(diffX==1&diff1px1<diff2px1, 1, 0)
+		diffX1=as.matrix(diffX1)
+		colnames(diffX1)=diffexp1.name
+		diffX2=as.matrix(diffX2)
+		colnames(diffX2)=diffexp2.name
+	}
+
+	## enrich and differential bound regions
+	nenrich=colSums(X)
+	enrich<-list()
+	diffenrich1<-NULL
+	diffenrich2<-NULL
+	cat ("***************Enrich regions**************", "\n")
+	cat ("There are", Nexp, "experiments.", '\n')
+	cat ("The number of enriched regions for these experiments are:", '\n')
+	rlist=factor(rep.vec)
+	rlevel=levels(rlist)
+	rlevelvalue=summary(rlist)
+	srlevel=subset(rlevel, rlevelvalue>1&rlevel>0)
+	srlevelvalue=subset(rlevelvalue, rlevelvalue>1&rlevel>0)
+	srindex=matrix(0, Nexp, length(srlevelvalue))
+	flist=factor(p.vec)
+	level=levels(flist)
+	levelvalue=summary(flist)
+	splevel=subset(level, levelvalue>1)
+	splevelvalue=subset(levelvalue, levelvalue>1)
+	spindex=matrix(0, Nexp, length(splevelvalue))
+	if (joint==1&length(splevel)>0)
+	{			
+		for (i in 1:length(splevel))
+		{
+			spindex[,i]=ifelse(flist==splevel[i], 1, 0)
+			spcode=which(spindex[,i]==1)
+			templabel=exp.label[spcode]
+			cat("Note: in previous analysis", templabel, "are assumed have same p", '\n')
+		}
+	}
+	if(joint==1&length(srlevel)>0)
+	{
+		for (i in 1:length(srlevel))
+		{
+			srindex[,i]=ifelse(rlist==srlevel[i], 1, 0)
+			jointcode=which(srindex[,i]==1)
+			templabel=exp.label[jointcode]
+			tempnenrich=nenrich[jointcode][1]
+			if(length(jointcode)==2)
+			{
+				templabel=paste(templabel[1], "and", templabel[2], sep=" ")
+			}
+			cat(templabel, " are replicates and they have", tempnenrich, "jointly identified enriched regions at FDR=", cr, '\n')
+			temp=subset(cbind(data$region, data$count[,jointcode], PP[,jointcode[1]]), X[,jointcode[1]]==1)
+			colnames(temp)=c(colnames(data$region), exp.label[jointcode], "poster.prob")
+			temp=temp[order(temp[,1], temp[,2]),]
+			enrich[[i]]=temp	
+		}
+		sepcode=which(rowSums(srindex)==0)
+		if (length(sepcode)>0)
+		{
+			for (j in 1:length(sepcode))
+			{
+				i=sepcode[j]
+				cat (exp.label[i], "has", nenrich[i], "enriched regions at FDR=", cr, '\n')
+				temp=subset(cbind(data$region, data$count[,i], PP[,i]), X[,i]==1)
+				colnames(temp)=c(colnames(data$region), exp.label[i], "poster.prob")
+				temp=temp[order(temp[,1], temp[,2]),]
+				enrich[[j+length(srlevel)]]=temp	
+			}
+		}
+	}else
+	{
+		for (i in 1:Nexp)
+		{
+			cat (exp.label[i], "has", nenrich[i], "enriched regions at FDR=", cr, '\n')
+			temp=subset(cbind(data$region, data$count[,i], PP[,i]), X[,i]==1)
+			colnames(temp)=c(colnames(data$region), exp.label[i], "poster.prob")
+			temp=temp[order(temp[,1], temp[,2]),]
+			enrich[[i]]=temp	
+		}
+	}
+	if (length(diffX1)>0)
+	{
+		cat ("***************Differential bound regions**************", "\n")
+		cat ("Condition 1 uses data of", diffexp1.label, '\n')
+		cat ("Condition 2 uses data of", diffexp2.label, '\n')
+		cat ("The number of regions bound only by condition 1 at FDR=", crdiff, " is:", sum(diffX1), '\n')
+		cat ("The number of regions bound only by condition 2 at FDR=", crdiff, " is:", sum(diffX2), '\n')
+		diffenrich1<-subset(cbind(data$region, data$count[,diffexp1], diffprob1), diffX1==1)
+		colnames(diffenrich1)=c(colnames(data$region), exp.label[diffexp1], "poster.prob.diff")
+		diffenrich2<-subset(cbind(data$region, data$count[,diffexp2], diffprob1),  diffX2==1)
+		colnames(diffenrich2)=c(colnames(data$region), exp.label[diffexp2], "poster.prob.diff")
+	}
+
+##    Output list
+##    enrich: the list of enrich regions. 
+##    diffenrich1 and diffenrich2: the differential bound regions of the first and second conditions, respectively.
+##    X: n x p matrix of index of enrichment for each experiment (at the FDR cutoff), X=1 : enriched region, X=0 : background region
+##    IPE: p-dimensional vector of IP efficiency values for the p experiments.
+##    diffprob1: n-dimensional vector of posterior probabilities P(X_1 \ne X_2) for the two conditions under study; diffprob0=1-diffprob1
+##    diffX1: n-dimensional vector of index of differential bound regions for condition 1, diffX2 is the index of differential bound regions for condition 2.
+
+	object<-list(enrich=enrich, diffenrich1=diffenrich1, diffenrich2=diffenrich2, X=X, diffprob1=diffprob1, diffprob0=diffprob0, diffX1=diffX1, diffX2=diffX2)
+	return(object)
+}