-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f6b8ec3
Showing
61 changed files
with
9,455 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
Type: Package | ||
Title: Multivariate Information Inductive Causation | ||
Version: 0.1 | ||
Date: 2017-10-09 | ||
Package: miic | ||
Description: We report an information-theoretic method which learns a large class of causal or non-causal graphical models from purely observational data, while including the effects of unobserved latent variables, commonly found in many datasets. Starting from a complete graph, the method iteratively removes dispensable edges, by uncovering significant information contributions from indirect paths, and assesses edge-specific confidences from randomization of available data. The remaining edges are then oriented based on the signature of causality in observational data. This approach can be applied on a wide range of datasets and provide new biological insights on regulatory networks from single cell expression data, genomic alterations during tumor development and co-evolving residues in protein structures. For more information you can refer to: Verny et al. Plos Comput Biol. (2017) <doi:10.1371/journal.pcbi.1005662>. | ||
Authors@R: c(person("Nadir", "Sella", role = c("aut","cre"), email = "nadir.sella@curie.fr"), | ||
person("Louis", "Verny",role = "aut"), | ||
person("Severine", "Affeldt", role = "aut"), | ||
person("Hervé", "Isambert", role = c("aut"), email = "Herve.Isambert@curie.fr")) | ||
Maintainer: Nadir Sella <nadir.sella@curie.fr> | ||
Imports: MASS,igraph, methods, plotrix, bnlearn, Rcpp, ppcor | ||
License: GPL (>= 2) | ||
NeedsCompilation: yes | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 6.0.1 | ||
LinkingTo: Rcpp | ||
Packaged: 2017-10-09 14:35:54 UTC; nadir | ||
Author: Nadir Sella [aut, cre], | ||
Louis Verny [aut], | ||
Severine Affeldt [aut], | ||
Hervé Isambert [aut] | ||
Repository: CRAN | ||
Date/Publication: 2017-10-09 15:54:34 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
a6fdbc256cb2630ced8bbd4c7c218f6b *DESCRIPTION | ||
b50205a65608e6b31f7140a07576af20 *NAMESPACE | ||
8a83329490f3bc92cbb5a3aa459ab69c *R/data.R | ||
b87cae829aa82bad8fd7a2a25309885b *R/evaluate_efn.R | ||
3a37815bb41d603c82883dbcbcd99626 *R/gmPlot.lib.R | ||
710b1febf091e82b3f33f6f7f8b7eae6 *R/gmStatistics.orient.lib.R | ||
23a27aa69c6e7fdd30d8ead240e655a1 *R/gmStatistics.skeleton.lib.R | ||
0a6be7d93839500f11bc1991d97c78d1 *R/gmSummary.R | ||
c19120d1c18e25e86db5767dcfcc8baf *R/gmSummary.lib.R | ||
a394543dff5a97048508c9181e922912 *R/miic.R | ||
f338965e7fa825c25f4f266d335dfd55 *R/miic.orient.R | ||
f6cd4f12a2bed4020d51f6390ee3c7f5 *R/miic.plot.R | ||
5697c881c566cab959170e4c18c067e3 *R/miic.skeleton.R | ||
1db813647f8d46f894e6aa7c8640155e *R/miic.utils.R | ||
95897d5733d3844b947af2488d8d2ed4 *R/shared.utils.lib.R | ||
6e20ed7b7261c755396ccd6a2fb5b13e *R/write.cytoscape.R | ||
56f5fb69772a96249d878df39f9bd695 *R/write.style.R | ||
6ad2e090cca5b97d79d5fec0158dd22a *data/cosmicCancer.rda | ||
c45e16b9419f8c315e51e7a333a47620 *data/cosmicCancer_stateOrder.rda | ||
49c1015d0983b3acc95d9ed2ef9d56cc *data/datalist | ||
5fbb6445f81a441b194ceab0928a8afc *data/hematoData.rda | ||
eccda37fcc3f7d01fbfa3b5a4c5c6424 *data/ohno.rda | ||
f27116874bf4e6c19e8d178eb33cac92 *data/ohno_stateOrder.rda | ||
e9304c35e64bb0d689e9337ee62f3885 *man/cosmicCancer.Rd | ||
ee82115615692931b2533d7f9b01c67c *man/cosmicCancer_stateOrder.Rd | ||
6936b8176557a4dfc94e9fd1e3df4551 *man/hematoData.Rd | ||
7f43f6ac4c416548d6329f5b251470a7 *man/miic.Rd | ||
641f80420a52817ce17ece6671e66a15 *man/miic.evaluate.effn.Rd | ||
843ed9f299b77a3102519ab320a7d22c *man/miic.plot.Rd | ||
dd2b3833e4079d797dfb278008da3639 *man/miic.write.network.cytoscape.Rd | ||
d0a7e312ede8df92a1240855c6853240 *man/miic.write.style.cytoscape.Rd | ||
6edc3db98f08bb12a0d416417e6d344e *man/ohno.Rd | ||
7817c89c2c9177e6f5cbdeab9742ee56 *man/ohno_stateOrder.Rd | ||
9dfda68c998b80c1539be1ea141f1c6f *src/Makevars | ||
b0e0a870b963f94fb3d0e33504c91b65 *src/computeEnsInformation.cpp | ||
fd6cb900f21d3eade279ebcd8e7dbba8 *src/computeEnsInformation.h | ||
a6d7806d77368b2f2765c98c61c856e0 *src/computeInfo.cpp | ||
b92099fde178adad20c48b3d1b826928 *src/computeInfo.h | ||
9d3d5c5dd59800f6cc38a46581a875d1 *src/computeInfo_interface.cpp | ||
5cce2bdc080024287e6f2dcadd9f9fdc *src/computeInfo_interface.h | ||
e0825921467980d53160f447963cee2d *src/confidenceCut.cpp | ||
254a0cc0d3a35cfc7d8751ec67021a78 *src/confidenceCut.h | ||
ea8c2e36af98501e6c6a4d554efd2fe1 *src/crosscorrelation.cpp | ||
6f386fbfa1601b35da8b8f9921c8a600 *src/memory.h | ||
0d7b6ebdba5c16c9354e95da5b7466f1 *src/miic_init.c | ||
53af7dedc80e724265ec834e5e6dff96 *src/orientationProbability.cpp | ||
df81b15d577e2284c88218618f86c029 *src/orientationProbability.h | ||
15774d652f8b40cbda603e776f757c16 *src/probaOrientation.cpp | ||
bf4e298c2e147cfa6e17d239c9d3a381 *src/probaOrientation.h | ||
2a3d543c6b5132ac56366927a7b38f68 *src/probaOrientation_interface.cpp | ||
0631912416f071d2dff6da62f0c1fc77 *src/probaOrientation_interface.h | ||
3c958989b52b7268b3b4e137251ac817 *src/skeleton.cpp | ||
f10078f15d83c1321742cfad6aa75f6e *src/skeleton.h | ||
b174ec1055fb1cdcc13742af1041f9e3 *src/skeletonInitialization.cpp | ||
aad43dd4310bfbc15eca725c08f4db17 *src/skeletonInitialization.h | ||
99e7531c85e5e01872e2c86f91b277bb *src/skeletonIteration.cpp | ||
28a37bc6efab65d5b2d965a9b897f80f *src/skeletonIteration.h | ||
73800446d485674606d7151c9f78fcd4 *src/structure.h | ||
ffb184f3990522d7948fd3632d3c29e1 *src/utilities.cpp | ||
21e78659556c299a5e80d952126585f9 *src/utilities.h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(miic) | ||
export(miic.evaluate.effn) | ||
export(miic.plot) | ||
export(miic.write.network.cytoscape) | ||
export(miic.write.style.cytoscape) | ||
useDynLib(miic) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#' Early blood development: single cell binary gene expression data | ||
#' | ||
#' Binarized expression data of 33 transcription factors involved | ||
#' in early differentiation of primitive erythroid and endothelial | ||
#' cells (3934 cells). | ||
#' | ||
#' @docType data | ||
#' @name hematoData | ||
#' @usage data(hematoData) | ||
#' @format A data.frame object. | ||
#' | ||
#' @keywords datasets | ||
#' | ||
#' @references Moignard et al. (2015) Nat Biotechnol 33(3):269-76 | ||
#' (\href{https://www.ncbi.nlm.nih.gov/pubmed/25664528}{PubMed link}) | ||
#' | ||
#' @keywords data | ||
NULL | ||
|
||
|
||
|
||
#' Genomic and ploidy alterations in breast tumors | ||
#' | ||
#' The dataset contains 807 samples without predisposing Brca1/2 germline mutations | ||
#' and includes 204 somatic mutations (from whole exome sequencing) and expression | ||
#' level information for 91 genes. | ||
#' | ||
#' @docType data | ||
#' @name cosmicCancer | ||
#' @usage data(cosmicCancer) | ||
#' | ||
#' @format A data.frame object. | ||
#' | ||
#' @keywords datasets | ||
#' | ||
#' @references Forbes SA, Beare D, Gunasekaran P, Leung K, Bindal N, et al. (2015) | ||
#' Nucleic Acids Res 43:D805–D811. (\href{https://www.ncbi.nlm.nih.gov/pubmed/25355519}{PubMed link}) | ||
#' | ||
#' @keywords data | ||
NULL | ||
|
||
|
||
#' Genomic and ploidy alterations in breast tumors | ||
#' | ||
#' The dataset contains 807 samples without predisposing Brca1/2 germline mutations | ||
#' and includes 204 somatic mutations (from whole exome sequencing) and expression | ||
#' level information for 91 genes, cathegory order file. | ||
#' | ||
#' @docType data | ||
#' @name cosmicCancer_stateOrder | ||
#' @usage data(cosmicCancer_stateOrder) | ||
#' | ||
#' @format A data.frame object. | ||
#' | ||
#' @keywords datasets | ||
#' | ||
#' @references Forbes SA, Beare D, Gunasekaran P, Leung K, Bindal N, et al. (2015) | ||
#' Nucleic Acids Res 43:D805–D811. (\href{https://www.ncbi.nlm.nih.gov/pubmed/25355519}{PubMed link}) | ||
#' | ||
#' @keywords data | ||
NULL | ||
|
||
|
||
|
||
|
||
|
||
#' Tetraploidization in vertebrate evolution | ||
#' | ||
#' 20,415 protein-coding genes in the human genome from Ensembl (v70) and information on the | ||
#' retention of duplicates originating either from the two whole genome duplications at | ||
#' the onset of vertebrates (‘ohnolog’) or from subsequent small scale duplications (‘SSD’) | ||
#' as well as copy number variants (‘CNV’). | ||
#' | ||
#' @docType data | ||
#' @name ohno | ||
#' @usage data(ohno) | ||
#' | ||
#' @format A data.frame object. | ||
#' | ||
#' @keywords datasets | ||
#' | ||
#' @references Verny et al., PLoS Comp. Bio. 2017. | ||
#' | ||
#' @keywords data | ||
NULL | ||
|
||
|
||
#' Tetraploidization in vertebrate evolution | ||
#' | ||
#' 20,415 protein-coding genes in the human genome from Ensembl (v70) and information on the | ||
#' retention of duplicates originating either from the two whole genome duplications at | ||
#' the onset of vertebrates (‘ohnolog’) or from subsequent small scale duplications (‘SSD’) | ||
#' as well as copy number variants (‘CNV’), cathegory order. | ||
#' | ||
#' @docType data | ||
#' @usage data(ohno_stateOrder) | ||
#' @format A data.frame object. | ||
#' @keywords datasets | ||
#' @name ohno_stateOrder | ||
#' @references Verny et al., PLoS Comp. Bio. 2017. | ||
#' | ||
#' @keywords data | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#' Evaluate the effective number of samples | ||
#' @description This function evaluates the effective number of samples in a dataset. | ||
#' | ||
#' @param inputData [a data frame] | ||
#' A data frame that contains the observational data. Each | ||
#' column corresponds to one variable and each row is a sample that gives the | ||
#' values for all the observed variables. The column names correspond to the | ||
#' names of the observed variables. Data must be discrete like. | ||
#' @param plot [a boolean value] if the autocorrelation plot has to be done. It will be performed only if all values of the correlation vector are positive. | ||
#' @return A list containing the autocorrelation decay, the effective number of samples, and the result of an exponentiality test with alpha = 0.05 | ||
#' @export | ||
#' @useDynLib miic | ||
|
||
miic.evaluate.effn <- function(inputData = NULL, plot=T) | ||
{ | ||
#### Check the input arguments | ||
if( is.null( inputData ) ) | ||
{ stop("The input data file is required") } | ||
inData <- c(colnames(inputData), as.vector(as.character(t(as.matrix(inputData))))) | ||
if (requireNamespace("Rcpp", quietly = TRUE)) { | ||
res <- .Call('evaluateEffn', inData, ncol(inputData), nrow(inputData),PACKAGE = "miic") | ||
} | ||
if(length(which(res$correlation > 0)) == length(res$correlation)){ | ||
|
||
fit1 <- MASS::fitdistr(res$correlation, "exponential") | ||
pval = stats::ks.test(res$correlation, "pexp", fit1$estimate)$p.value | ||
if(pval < 0.05){ | ||
res$exponential_decay= FALSE | ||
} else { | ||
res$exponential_decay= TRUE | ||
} | ||
|
||
if(plot){ | ||
graphics::plot(res$correlation, type="l", log="y", ylab="Autocorrelation with lag", xlab="n") | ||
graphics::title("Autocorrelation between n distant samples") | ||
} | ||
}else { | ||
res$exponential_decay= FALSE | ||
} | ||
res | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
plot.loadSummary <- function( mySummary ) | ||
{ | ||
#### Load the summary of the edges | ||
rownames(mySummary) = c() | ||
|
||
#### Ignore the TN edges | ||
myTypesToIgnore = c() | ||
myTypesToIgnore = c("TN","N","FN") | ||
myLinesToIgnore = which( mySummary[,"type"] %in% myTypesToIgnore ) | ||
if( length( myLinesToIgnore ) > 0 ) { mySummary = mySummary[-myLinesToIgnore,]} | ||
|
||
return(mySummary) | ||
} | ||
|
||
plot.createDefaultGraph <- function( mySummary, myAllGenes ) | ||
{ | ||
#### Replace names by numbers to create the graph | ||
inf.edgesList.nbr <- apply( mySummary[, c("x","y")], MARGIN = c(1,2), function(x) { x <- which( myAllGenes == x ) } ) | ||
|
||
#### Create an unoriented igraph with all the nodes | ||
inf.graph <- igraph::graph( t( inf.edgesList.nbr ), length( myAllGenes ), directed = TRUE ) | ||
|
||
#### Set the vertices options | ||
igraph::V(inf.graph)$label <- myAllGenes | ||
igraph::V(inf.graph)$shape <- "circle" | ||
igraph::V(inf.graph)$color <- "lightblue" | ||
igraph::V(inf.graph)$label.family <- "Helvetica" | ||
igraph::V(inf.graph)$label.cex <- 0.6 | ||
igraph::V(inf.graph)$size <- 10 | ||
|
||
#### Set the general edges options | ||
igraph::E(inf.graph)$arrow.size <- 0.5 | ||
igraph::E(inf.graph)$arrow.width <- 3 | ||
igraph::E(inf.graph)$width <- 3 | ||
igraph::E(inf.graph)$curved <- FALSE | ||
igraph::E(inf.graph)$color <- "red2" | ||
igraph::E(inf.graph)$lty <- "solid" | ||
igraph::E(inf.graph)$arrow.mode <- 0 | ||
return(inf.graph) | ||
} | ||
|
||
plot.setOrientation <- function( mySummary, inf.graph ) | ||
{ | ||
#### Set the options specific for forward oriented | ||
ort.fwd.idx <- which( ( mySummary[, "infOrt"] %in% c(2,4) ) | ( mySummary[, "type"] == 'FN' & mySummary[, "trueOrt"] == 2 ) ) | ||
if( length( ort.fwd.idx ) > 0 ){ igraph::E(inf.graph)[ort.fwd.idx]$arrow.mode <- 2 } | ||
|
||
#### Set the options specific for backward oriented | ||
ort.bck.idx <- which( ( mySummary[, "infOrt"] %in% c(-2,-4) ) | ( mySummary[, "type"] == 'FN' & mySummary[, "trueOrt"] == (-2) ) ) | ||
if( length( ort.bck.idx ) > 0 ) { igraph::E(inf.graph)[ort.bck.idx]$arrow.mode <- 1 } | ||
|
||
#### Set the options specific for bidirectional orientations | ||
bidir.idx <- which( mySummary[, "infOrt"] == 6 ) | ||
if( length( bidir.idx ) > 0 ) | ||
{ | ||
igraph::E(inf.graph)[bidir.idx]$arrow.mode <- 3 | ||
} | ||
return(inf.graph) | ||
|
||
} | ||
|
||
littlefunc <- function(edge1,edge2) | ||
{ | ||
return(paste(edge1, collapse=",") == paste(rev(edge2), collapse=",")) | ||
} | ||
|
||
# ---- Function to plot graphes with edges matching to their partial correlation | ||
pCor.edgeCol <- function(summary, features) | ||
{ | ||
# Define the color gradients | ||
blue.gradient = grDevices::rainbow(100, start = 3/6, end=4/6) | ||
red.gradient = rev(grDevices::rainbow(100, start=0, end=0.16)) | ||
|
||
myEdgesColor = rep(NA, nrow(summary)) # Set the color vector for the edges | ||
max.pcor.neg = suppressWarnings(min(summary[which(summary[,"sign"] == "-"),"partial_correlation"])) # get the maximum negative pcor | ||
max.pcor.pos = suppressWarnings(max(summary[which(summary[,"sign"] == "+"),"partial_correlation"])) # get the maximum positive pcor | ||
for(edge in 1:nrow(summary)) # loop on all the edges present in the network | ||
{ | ||
# Set the correct tmp.max.pcor | ||
if(sign(summary[edge, "partial_correlation"]) == -1){tmp.max.pcor = max.pcor.neg} | ||
else {tmp.max.pcor = max.pcor.pos} | ||
# Compute the ratio between the tmp.max.pcor and the edge's pcor, and use it as an index to get a color | ||
edge.pCor.ind = abs(summary[edge, "partial_correlation"]) | ||
edge.colIndex = round(edge.pCor.ind * 100) | ||
if( edge.colIndex == 0 ) { edge.colIndex = 1} | ||
### Get the sign of the link to look at the correct color gradient | ||
if(! is.na(summary[edge, "sign"]) ) | ||
{ | ||
if(summary[edge, "sign"] == "+") | ||
{ | ||
myEdgesColor[edge] = red.gradient[edge.colIndex] | ||
} | ||
else | ||
{ | ||
myEdgesColor[edge] = blue.gradient[edge.colIndex] | ||
} | ||
} | ||
else { myEdgesColor[edge] = "grey88" } | ||
} | ||
|
||
return(myEdgesColor) | ||
} | ||
|
||
# ---- Function to plot graphes with edges matching to their mutual information (confidence column in summary) | ||
conf.edgeCol <- function(summary, features) | ||
{ | ||
# Define the color gradients | ||
blue.gradient = grDevices::rainbow(100, start = 3/6, end=4/6) | ||
red.gradient = rev(grDevices::rainbow(100, start=0, end=0.16)) | ||
myEdgesColor = rep(NA, nrow(summary)) # Set the color vector for the edges | ||
max.conf = 100 # get the maximum | ||
min.conf = 1 # get the minimum | ||
|
||
for(edge in 1:nrow(summary)) # loop on all the edges present in the network | ||
{ | ||
# Set the correct tmp.max.pcor | ||
# Compute the ratio between the tmp.max.pcor and the edge's pcor, and use it as an index to get a color | ||
edge.colIndex = round(summary[edge, "log_confidence"]) | ||
if( edge.colIndex < min.conf ) { edge.colIndex = 1 } | ||
else if( edge.colIndex > max.conf ){ edge.colIndex = 100 } | ||
### Get the sign of the link to look at the correct color gradient | ||
if(! is.na(summary[edge, "sign"]) ) | ||
{ | ||
if(summary[edge, "sign"] == "+") | ||
{ | ||
myEdgesColor[edge] = red.gradient[edge.colIndex] | ||
} | ||
else | ||
{ | ||
myEdgesColor[edge] = blue.gradient[edge.colIndex] | ||
} | ||
} | ||
else { myEdgesColor[edge] = red.gradient[edge.colIndex] } | ||
} | ||
|
||
return(myEdgesColor) | ||
} | ||
|
||
# ---- Function which returns a graph object from edges colors and node sizes eventually | ||
modif.Graph <- function(summary, features, edgeColors, nodeSizes = 10, nodeColors = 'lightblue') | ||
{ | ||
mygraph = plot.createDefaultGraph(summary, features) | ||
igraph::E(mygraph)$color = edgeColors | ||
igraph::E(mygraph)$arrow.size = 0.2 | ||
igraph::V(mygraph)$color = nodeColors | ||
mygraph = plot.setOrientation(summary, mygraph) | ||
igraph::V(mygraph)$size = nodeSizes | ||
return(mygraph) | ||
} |
Oops, something went wrong.