Skip to content

Commit

Permalink
version 0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Nadir Sella authored and cran-robot committed Oct 9, 2017
0 parents commit f6b8ec3
Show file tree
Hide file tree
Showing 61 changed files with 9,455 additions and 0 deletions.
25 changes: 25 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Type: Package
Title: Multivariate Information Inductive Causation
Version: 0.1
Date: 2017-10-09
Package: miic
Description: We report an information-theoretic method which learns a large class of causal or non-causal graphical models from purely observational data, while including the effects of unobserved latent variables, commonly found in many datasets. Starting from a complete graph, the method iteratively removes dispensable edges, by uncovering significant information contributions from indirect paths, and assesses edge-specific confidences from randomization of available data. The remaining edges are then oriented based on the signature of causality in observational data. This approach can be applied on a wide range of datasets and provide new biological insights on regulatory networks from single cell expression data, genomic alterations during tumor development and co-evolving residues in protein structures. For more information you can refer to: Verny et al. Plos Comput Biol. (2017) <doi:10.1371/journal.pcbi.1005662>.
Authors@R: c(person("Nadir", "Sella", role = c("aut","cre"), email = "nadir.sella@curie.fr"),
person("Louis", "Verny",role = "aut"),
person("Severine", "Affeldt", role = "aut"),
person("Hervé", "Isambert", role = c("aut"), email = "Herve.Isambert@curie.fr"))
Maintainer: Nadir Sella <nadir.sella@curie.fr>
Imports: MASS,igraph, methods, plotrix, bnlearn, Rcpp, ppcor
License: GPL (>= 2)
NeedsCompilation: yes
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.0.1
LinkingTo: Rcpp
Packaged: 2017-10-09 14:35:54 UTC; nadir
Author: Nadir Sella [aut, cre],
Louis Verny [aut],
Severine Affeldt [aut],
Hervé Isambert [aut]
Repository: CRAN
Date/Publication: 2017-10-09 15:54:34 UTC
60 changes: 60 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
a6fdbc256cb2630ced8bbd4c7c218f6b *DESCRIPTION
b50205a65608e6b31f7140a07576af20 *NAMESPACE
8a83329490f3bc92cbb5a3aa459ab69c *R/data.R
b87cae829aa82bad8fd7a2a25309885b *R/evaluate_efn.R
3a37815bb41d603c82883dbcbcd99626 *R/gmPlot.lib.R
710b1febf091e82b3f33f6f7f8b7eae6 *R/gmStatistics.orient.lib.R
23a27aa69c6e7fdd30d8ead240e655a1 *R/gmStatistics.skeleton.lib.R
0a6be7d93839500f11bc1991d97c78d1 *R/gmSummary.R
c19120d1c18e25e86db5767dcfcc8baf *R/gmSummary.lib.R
a394543dff5a97048508c9181e922912 *R/miic.R
f338965e7fa825c25f4f266d335dfd55 *R/miic.orient.R
f6cd4f12a2bed4020d51f6390ee3c7f5 *R/miic.plot.R
5697c881c566cab959170e4c18c067e3 *R/miic.skeleton.R
1db813647f8d46f894e6aa7c8640155e *R/miic.utils.R
95897d5733d3844b947af2488d8d2ed4 *R/shared.utils.lib.R
6e20ed7b7261c755396ccd6a2fb5b13e *R/write.cytoscape.R
56f5fb69772a96249d878df39f9bd695 *R/write.style.R
6ad2e090cca5b97d79d5fec0158dd22a *data/cosmicCancer.rda
c45e16b9419f8c315e51e7a333a47620 *data/cosmicCancer_stateOrder.rda
49c1015d0983b3acc95d9ed2ef9d56cc *data/datalist
5fbb6445f81a441b194ceab0928a8afc *data/hematoData.rda
eccda37fcc3f7d01fbfa3b5a4c5c6424 *data/ohno.rda
f27116874bf4e6c19e8d178eb33cac92 *data/ohno_stateOrder.rda
e9304c35e64bb0d689e9337ee62f3885 *man/cosmicCancer.Rd
ee82115615692931b2533d7f9b01c67c *man/cosmicCancer_stateOrder.Rd
6936b8176557a4dfc94e9fd1e3df4551 *man/hematoData.Rd
7f43f6ac4c416548d6329f5b251470a7 *man/miic.Rd
641f80420a52817ce17ece6671e66a15 *man/miic.evaluate.effn.Rd
843ed9f299b77a3102519ab320a7d22c *man/miic.plot.Rd
dd2b3833e4079d797dfb278008da3639 *man/miic.write.network.cytoscape.Rd
d0a7e312ede8df92a1240855c6853240 *man/miic.write.style.cytoscape.Rd
6edc3db98f08bb12a0d416417e6d344e *man/ohno.Rd
7817c89c2c9177e6f5cbdeab9742ee56 *man/ohno_stateOrder.Rd
9dfda68c998b80c1539be1ea141f1c6f *src/Makevars
b0e0a870b963f94fb3d0e33504c91b65 *src/computeEnsInformation.cpp
fd6cb900f21d3eade279ebcd8e7dbba8 *src/computeEnsInformation.h
a6d7806d77368b2f2765c98c61c856e0 *src/computeInfo.cpp
b92099fde178adad20c48b3d1b826928 *src/computeInfo.h
9d3d5c5dd59800f6cc38a46581a875d1 *src/computeInfo_interface.cpp
5cce2bdc080024287e6f2dcadd9f9fdc *src/computeInfo_interface.h
e0825921467980d53160f447963cee2d *src/confidenceCut.cpp
254a0cc0d3a35cfc7d8751ec67021a78 *src/confidenceCut.h
ea8c2e36af98501e6c6a4d554efd2fe1 *src/crosscorrelation.cpp
6f386fbfa1601b35da8b8f9921c8a600 *src/memory.h
0d7b6ebdba5c16c9354e95da5b7466f1 *src/miic_init.c
53af7dedc80e724265ec834e5e6dff96 *src/orientationProbability.cpp
df81b15d577e2284c88218618f86c029 *src/orientationProbability.h
15774d652f8b40cbda603e776f757c16 *src/probaOrientation.cpp
bf4e298c2e147cfa6e17d239c9d3a381 *src/probaOrientation.h
2a3d543c6b5132ac56366927a7b38f68 *src/probaOrientation_interface.cpp
0631912416f071d2dff6da62f0c1fc77 *src/probaOrientation_interface.h
3c958989b52b7268b3b4e137251ac817 *src/skeleton.cpp
f10078f15d83c1321742cfad6aa75f6e *src/skeleton.h
b174ec1055fb1cdcc13742af1041f9e3 *src/skeletonInitialization.cpp
aad43dd4310bfbc15eca725c08f4db17 *src/skeletonInitialization.h
99e7531c85e5e01872e2c86f91b277bb *src/skeletonIteration.cpp
28a37bc6efab65d5b2d965a9b897f80f *src/skeletonIteration.h
73800446d485674606d7151c9f78fcd4 *src/structure.h
ffb184f3990522d7948fd3632d3c29e1 *src/utilities.cpp
21e78659556c299a5e80d952126585f9 *src/utilities.h
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Generated by roxygen2: do not edit by hand

export(miic)
export(miic.evaluate.effn)
export(miic.plot)
export(miic.write.network.cytoscape)
export(miic.write.style.cytoscape)
useDynLib(miic)
103 changes: 103 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#' Early blood development: single cell binary gene expression data
#'
#' Binarized expression data of 33 transcription factors involved
#' in early differentiation of primitive erythroid and endothelial
#' cells (3934 cells).
#'
#' @docType data
#' @name hematoData
#' @usage data(hematoData)
#' @format A data.frame object.
#'
#' @keywords datasets
#'
#' @references Moignard et al. (2015) Nat Biotechnol 33(3):269-76
#' (\href{https://www.ncbi.nlm.nih.gov/pubmed/25664528}{PubMed link})
#'
#' @keywords data
NULL



#' Genomic and ploidy alterations in breast tumors
#'
#' The dataset contains 807 samples without predisposing Brca1/2 germline mutations
#' and includes 204 somatic mutations (from whole exome sequencing) and expression
#' level information for 91 genes.
#'
#' @docType data
#' @name cosmicCancer
#' @usage data(cosmicCancer)
#'
#' @format A data.frame object.
#'
#' @keywords datasets
#'
#' @references Forbes SA, Beare D, Gunasekaran P, Leung K, Bindal N, et al. (2015)
#' Nucleic Acids Res 43:D805–D811. (\href{https://www.ncbi.nlm.nih.gov/pubmed/25355519}{PubMed link})
#'
#' @keywords data
NULL


#' Genomic and ploidy alterations in breast tumors
#'
#' The dataset contains 807 samples without predisposing Brca1/2 germline mutations
#' and includes 204 somatic mutations (from whole exome sequencing) and expression
#' level information for 91 genes, cathegory order file.
#'
#' @docType data
#' @name cosmicCancer_stateOrder
#' @usage data(cosmicCancer_stateOrder)
#'
#' @format A data.frame object.
#'
#' @keywords datasets
#'
#' @references Forbes SA, Beare D, Gunasekaran P, Leung K, Bindal N, et al. (2015)
#' Nucleic Acids Res 43:D805–D811. (\href{https://www.ncbi.nlm.nih.gov/pubmed/25355519}{PubMed link})
#'
#' @keywords data
NULL





#' Tetraploidization in vertebrate evolution
#'
#' 20,415 protein-coding genes in the human genome from Ensembl (v70) and information on the
#' retention of duplicates originating either from the two whole genome duplications at
#' the onset of vertebrates (‘ohnolog’) or from subsequent small scale duplications (‘SSD’)
#' as well as copy number variants (‘CNV’).
#'
#' @docType data
#' @name ohno
#' @usage data(ohno)
#'
#' @format A data.frame object.
#'
#' @keywords datasets
#'
#' @references Verny et al., PLoS Comp. Bio. 2017.
#'
#' @keywords data
NULL


#' Tetraploidization in vertebrate evolution
#'
#' 20,415 protein-coding genes in the human genome from Ensembl (v70) and information on the
#' retention of duplicates originating either from the two whole genome duplications at
#' the onset of vertebrates (‘ohnolog’) or from subsequent small scale duplications (‘SSD’)
#' as well as copy number variants (‘CNV’), cathegory order.
#'
#' @docType data
#' @usage data(ohno_stateOrder)
#' @format A data.frame object.
#' @keywords datasets
#' @name ohno_stateOrder
#' @references Verny et al., PLoS Comp. Bio. 2017.
#'
#' @keywords data
NULL
41 changes: 41 additions & 0 deletions R/evaluate_efn.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#' Evaluate the effective number of samples
#' @description This function evaluates the effective number of samples in a dataset.
#'
#' @param inputData [a data frame]
#' A data frame that contains the observational data. Each
#' column corresponds to one variable and each row is a sample that gives the
#' values for all the observed variables. The column names correspond to the
#' names of the observed variables. Data must be discrete like.
#' @param plot [a boolean value] if the autocorrelation plot has to be done. It will be performed only if all values of the correlation vector are positive.
#' @return A list containing the autocorrelation decay, the effective number of samples, and the result of an exponentiality test with alpha = 0.05
#' @export
#' @useDynLib miic

miic.evaluate.effn <- function(inputData = NULL, plot=T)
{
#### Check the input arguments
if( is.null( inputData ) )
{ stop("The input data file is required") }
inData <- c(colnames(inputData), as.vector(as.character(t(as.matrix(inputData)))))
if (requireNamespace("Rcpp", quietly = TRUE)) {
res <- .Call('evaluateEffn', inData, ncol(inputData), nrow(inputData),PACKAGE = "miic")
}
if(length(which(res$correlation > 0)) == length(res$correlation)){

fit1 <- MASS::fitdistr(res$correlation, "exponential")
pval = stats::ks.test(res$correlation, "pexp", fit1$estimate)$p.value
if(pval < 0.05){
res$exponential_decay= FALSE
} else {
res$exponential_decay= TRUE
}

if(plot){
graphics::plot(res$correlation, type="l", log="y", ylab="Autocorrelation with lag", xlab="n")
graphics::title("Autocorrelation between n distant samples")
}
}else {
res$exponential_decay= FALSE
}
res
}
149 changes: 149 additions & 0 deletions R/gmPlot.lib.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
plot.loadSummary <- function( mySummary )
{
#### Load the summary of the edges
rownames(mySummary) = c()

#### Ignore the TN edges
myTypesToIgnore = c()
myTypesToIgnore = c("TN","N","FN")
myLinesToIgnore = which( mySummary[,"type"] %in% myTypesToIgnore )
if( length( myLinesToIgnore ) > 0 ) { mySummary = mySummary[-myLinesToIgnore,]}

return(mySummary)
}

plot.createDefaultGraph <- function( mySummary, myAllGenes )
{
#### Replace names by numbers to create the graph
inf.edgesList.nbr <- apply( mySummary[, c("x","y")], MARGIN = c(1,2), function(x) { x <- which( myAllGenes == x ) } )

#### Create an unoriented igraph with all the nodes
inf.graph <- igraph::graph( t( inf.edgesList.nbr ), length( myAllGenes ), directed = TRUE )

#### Set the vertices options
igraph::V(inf.graph)$label <- myAllGenes
igraph::V(inf.graph)$shape <- "circle"
igraph::V(inf.graph)$color <- "lightblue"
igraph::V(inf.graph)$label.family <- "Helvetica"
igraph::V(inf.graph)$label.cex <- 0.6
igraph::V(inf.graph)$size <- 10

#### Set the general edges options
igraph::E(inf.graph)$arrow.size <- 0.5
igraph::E(inf.graph)$arrow.width <- 3
igraph::E(inf.graph)$width <- 3
igraph::E(inf.graph)$curved <- FALSE
igraph::E(inf.graph)$color <- "red2"
igraph::E(inf.graph)$lty <- "solid"
igraph::E(inf.graph)$arrow.mode <- 0
return(inf.graph)
}

plot.setOrientation <- function( mySummary, inf.graph )
{
#### Set the options specific for forward oriented
ort.fwd.idx <- which( ( mySummary[, "infOrt"] %in% c(2,4) ) | ( mySummary[, "type"] == 'FN' & mySummary[, "trueOrt"] == 2 ) )
if( length( ort.fwd.idx ) > 0 ){ igraph::E(inf.graph)[ort.fwd.idx]$arrow.mode <- 2 }

#### Set the options specific for backward oriented
ort.bck.idx <- which( ( mySummary[, "infOrt"] %in% c(-2,-4) ) | ( mySummary[, "type"] == 'FN' & mySummary[, "trueOrt"] == (-2) ) )
if( length( ort.bck.idx ) > 0 ) { igraph::E(inf.graph)[ort.bck.idx]$arrow.mode <- 1 }

#### Set the options specific for bidirectional orientations
bidir.idx <- which( mySummary[, "infOrt"] == 6 )
if( length( bidir.idx ) > 0 )
{
igraph::E(inf.graph)[bidir.idx]$arrow.mode <- 3
}
return(inf.graph)

}

littlefunc <- function(edge1,edge2)
{
return(paste(edge1, collapse=",") == paste(rev(edge2), collapse=","))
}

# ---- Function to plot graphes with edges matching to their partial correlation
pCor.edgeCol <- function(summary, features)
{
# Define the color gradients
blue.gradient = grDevices::rainbow(100, start = 3/6, end=4/6)
red.gradient = rev(grDevices::rainbow(100, start=0, end=0.16))

myEdgesColor = rep(NA, nrow(summary)) # Set the color vector for the edges
max.pcor.neg = suppressWarnings(min(summary[which(summary[,"sign"] == "-"),"partial_correlation"])) # get the maximum negative pcor
max.pcor.pos = suppressWarnings(max(summary[which(summary[,"sign"] == "+"),"partial_correlation"])) # get the maximum positive pcor
for(edge in 1:nrow(summary)) # loop on all the edges present in the network
{
# Set the correct tmp.max.pcor
if(sign(summary[edge, "partial_correlation"]) == -1){tmp.max.pcor = max.pcor.neg}
else {tmp.max.pcor = max.pcor.pos}
# Compute the ratio between the tmp.max.pcor and the edge's pcor, and use it as an index to get a color
edge.pCor.ind = abs(summary[edge, "partial_correlation"])
edge.colIndex = round(edge.pCor.ind * 100)
if( edge.colIndex == 0 ) { edge.colIndex = 1}
### Get the sign of the link to look at the correct color gradient
if(! is.na(summary[edge, "sign"]) )
{
if(summary[edge, "sign"] == "+")
{
myEdgesColor[edge] = red.gradient[edge.colIndex]
}
else
{
myEdgesColor[edge] = blue.gradient[edge.colIndex]
}
}
else { myEdgesColor[edge] = "grey88" }
}

return(myEdgesColor)
}

# ---- Function to plot graphes with edges matching to their mutual information (confidence column in summary)
conf.edgeCol <- function(summary, features)
{
# Define the color gradients
blue.gradient = grDevices::rainbow(100, start = 3/6, end=4/6)
red.gradient = rev(grDevices::rainbow(100, start=0, end=0.16))
myEdgesColor = rep(NA, nrow(summary)) # Set the color vector for the edges
max.conf = 100 # get the maximum
min.conf = 1 # get the minimum

for(edge in 1:nrow(summary)) # loop on all the edges present in the network
{
# Set the correct tmp.max.pcor
# Compute the ratio between the tmp.max.pcor and the edge's pcor, and use it as an index to get a color
edge.colIndex = round(summary[edge, "log_confidence"])
if( edge.colIndex < min.conf ) { edge.colIndex = 1 }
else if( edge.colIndex > max.conf ){ edge.colIndex = 100 }
### Get the sign of the link to look at the correct color gradient
if(! is.na(summary[edge, "sign"]) )
{
if(summary[edge, "sign"] == "+")
{
myEdgesColor[edge] = red.gradient[edge.colIndex]
}
else
{
myEdgesColor[edge] = blue.gradient[edge.colIndex]
}
}
else { myEdgesColor[edge] = red.gradient[edge.colIndex] }
}

return(myEdgesColor)
}

# ---- Function which returns a graph object from edges colors and node sizes eventually
modif.Graph <- function(summary, features, edgeColors, nodeSizes = 10, nodeColors = 'lightblue')
{
mygraph = plot.createDefaultGraph(summary, features)
igraph::E(mygraph)$color = edgeColors
igraph::E(mygraph)$arrow.size = 0.2
igraph::V(mygraph)$color = nodeColors
mygraph = plot.setOrientation(summary, mygraph)
igraph::V(mygraph)$size = nodeSizes
return(mygraph)
}

0 comments on commit f6b8ec3

Please sign in to comment.