Skip to content

Commit

Permalink
added full cluster init
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinFXP committed Jan 28, 2019
1 parent 65dbe2d commit a5b1af7
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 73 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: mnem
Type: Package
Title: Mixture Nested Effects Models
Version: 0.99.42
Version: 0.99.43
Author: Martin Pirkl
Maintainer: Martin Pirkl <martin.pirkl@bsse.ethz.ch>
Description: Mixture Nested Effects Models (mnem) is an extension of Nested Effects Models and allows for the analysis of single cell perturbation data provided by methods like Perturb-Seq (Dixit et al., 2016) or Crop-Seq (Datlinger et al., 2017). In those experiments each of many cells is perturbed by a knock-down of a specific gene, i.e. several cells are perturbed by a knock-down of gene A, several by a knock-down of gene B, ... and so forth. The observed read-out has to be multi-trait and in the case of the Perturb-/Crop-Seq gene are expression profiles for each cell. mnem uses a mixture model to simultaneously cluster the cell population into k clusters and and infer k networks causally linking the perturbed genes for each cluster. The mixture components are inferred via an expectation maximization algorithm.
Expand Down
147 changes: 78 additions & 69 deletions R/mnems.r
Original file line number Diff line number Diff line change
Expand Up @@ -44,24 +44,24 @@ fitacc <- function(x, y, strict = FALSE, unique = TRUE,
score <- 0
while(length(x) > 0 & length(y) > 0) {
couple <- numeric(2)
best <- 0
best <- -Inf
for (i in seq_len(length(x))) {
for (j in seq_len(length(y))) {
A <- mytc(x[[i]]$phi)
B <- mytc(y[[j]])
tmp <- (n*(n-1) - sum(abs(A - B)))/(n*(n-1))
comp <- tmp > best
comp <- tmp >= best
comp2 <- TRUE
if (type %in% "sens") {
tp <- sum(A == 1 & B == 1)
fn <- sum(A == 0 & B == 1)
tmp <- tp/(tp+fn)
comp2 <- tmp > best
comp2 <- tmp >= best
} else if (type %in% "spec") {
tn <- sum(A == 0 & B == 0)
fp <- sum(A == 1 & B == 0)
tmp <- tn/(tn+fp)
comp2 <- tmp > best
comp2 <- tmp >= best
}
if (comp & comp2) {
couple <- c(i,j)
Expand Down Expand Up @@ -114,7 +114,7 @@ fitacc <- function(x, y, strict = FALSE, unique = TRUE,
} else {
xmax <- numeric(xn)
ymax <- numeric(yn)
best <- 0
best <- -Inf
for (i in seq_len(xn)) {
for (j in seq_len(yn)) {
A <- mytc(x[[i]]$phi)
Expand Down Expand Up @@ -200,7 +200,7 @@ plotConvergence <- function(x, col = NULL, type = "b",
" (",
round(maxllcount/runs*100, 2),
"%) run(s) with maximum log odds at ",
convergence, " accuracy\n",
convergence, " distance\n",
minlen, " (", round(minlen/runs*100, 2),
"%) run(s) started in local optimum"),
...)
Expand Down Expand Up @@ -954,7 +954,8 @@ mnemk <- function(D, ks = seq_len(5), man = FALSE, degree = 4, logtype = 2,
#' "cluster" (each S-gene is clustered and the different S-gene clustered
#' differently combined for several starts),
#' "cluster2" (clustNEM is used to infer reasonable phis, which are then
#' used as a start for one EM run) or "networks" (initialize with random phis)
#' used as a start for one EM run), "cluster3" (global clustering as a start),
#' or "networks" (initialize with random phis)
#' @param p initial probabilities as a k (components) times l (cells) matrix
#' @param k number of components
#' @param kmax maximum number of components when k=NULL is inferred
Expand Down Expand Up @@ -1023,7 +1024,7 @@ mnem <- function(D, inference = "em", search = "greedy", phi = NULL,
parallel = NULL, reduce = FALSE, runs = 1, starts = 3,
type = "random",
p = NULL, k = NULL, kmax = 10, verbose = FALSE,
max_iter = 100, parallel2 = NULL, converged = 10^-1,
max_iter = 100, parallel2 = NULL, converged = -Inf,
redSpace = NULL, affinity = 0, evolution = FALSE, lambda = 1,
subtopoX = NULL, ratio = TRUE, logtype = 2,
domean = TRUE, modulesize = 5, compress = FALSE,
Expand Down Expand Up @@ -1081,9 +1082,6 @@ mnem <- function(D, inference = "em", search = "greedy", phi = NULL,
if (is.null(mw)) {
mw <- rep(1, k)/k
}
if (is.null(subtopoX)) {
subtopoX <- estimateSubtopo(data)
}
if (!is.null(k)) {
if (type %in% "networks" & is.null(phi)) {
meanet <- mynem(D = data, search = search, start = phi,
Expand All @@ -1102,14 +1100,25 @@ mnem <- function(D, inference = "em", search = "greedy", phi = NULL,
if (type %in% "cluster") {
probscl <- initps(data, ks, k, starts = starts)
init <- NULL
} else if (type %in% "cluster2") {
init <- clustNEM(data, k = k, starts = starts, search = search,
method = method, parallel = parallel,
} else if (type %in% c("cluster2", "cluster3")) {
nem <- TRUE
if (type %in% "cluster3") { nem <- FALSE }
init <- clustNEM(data, nem = nem, k = k, starts = starts,
search = search, method = method,
parallel = parallel,
runs = runs, verbose = verbose, ratio = ratio,
domean = domean, modulesize = modulesize,
Rho = Rho, logtype = logtype, modified = TRUE)
mw <- init$mw
init <- list(A = unlist(init$comp, recursive = FALSE))
if (type %in% "cluster2") {
mw <- init$mw
init <- list(A = unlist(init$comp, recursive = FALSE))
} else {
p <- matrix(0, k, ncol(data))
for (i in seq_len(k)) {
p[i, which(init$cluster == i)] <- 1
}
init <- NULL
}
starts <- 1
} else {
probscl <- 0
Expand Down Expand Up @@ -1151,22 +1160,17 @@ mnem <- function(D, inference = "em", search = "greedy", phi = NULL,
res <- list()
res[[1]] <- list()
res[[1]]$adj <- limits[[1]]$res[[1]]$adj
res[[1]]$subtopo <- limits[[1]]$res[[1]]$subtopo
n <- ncol(res[[1]]$adj)
mw <- 1
probs0 <- list()
probs0$probs <- matrix(-Inf, k, ncol(data))
probs <- matrix(0, k, ncol(data))
probs <- getProbs(probs, k, data, res, method, n, affinity,
converged, subtopoX, ratio, mw = mw, fpfn = fpfn,
Rho = Rho)
if (getLL(probs$probs, logtype = logtype, mw = mw, data = data) >
getLL(probs0$probs, logtype = logtype, mw = mw, data = data)) {
probs0 <- probs
}
subtopoX <- probs0$subtopoX
probs <- probs0$probs
limits[[1]]$ll <- getLL(probs, logtype = logtype, mw = mw, data = data)
limits[[1]]$probs <- probs
subtopoX <- probs$subtopoX
limits[[1]]$ll <- probs$ll
limits[[1]]$probs <- probs$probs
limits[[1]]$mw <- 1
} else {
if (inference %in% "em") {
if (!is.null(parallel)) {
Expand Down Expand Up @@ -1232,8 +1236,6 @@ mnem <- function(D, inference = "em", search = "greedy", phi = NULL,
}
} else {
k <- nrow(p)
pdata <- data
p <- p*t(t(p)*apply(abs(data), 2, sum))
probs <- log(p)/log(logtype)
}
}
Expand Down Expand Up @@ -2061,6 +2063,7 @@ Mixture weight: ", round(x$mw[i], 3)*100, "%", sep = "")
#' @param cluster given clustering has to correspond to the columns of data
#' @param starts number of random starts for the kmeans algorithm
#' @param logtype logarithm type of the data
#' @param nem if FALSE only clusters the data
#' @param getprobspars list of parameters for the getProbs function
#' @param getaffinitypars list of parameters for the getAffinity function
#' @param ... additional arguments for standard nem function
Expand All @@ -2080,7 +2083,8 @@ Mixture weight: ", round(x$mw[i], 3)*100, "%", sep = "")
#' data <- data + rnorm(length(data), 0, 1)
#' resulst <- clustNEM(data, k = 2:3)
clustNEM <- function(data, k = 2:10, cluster = NULL, starts = 1, logtype = 2,
getprobspars = list(), getaffinitypars = list(), ...) {
nem = TRUE, getprobspars = list(),
getaffinitypars = list(), ...) {
data <- modData(data)
if (is.null(cluster)) {
smax <- 0
Expand All @@ -2105,48 +2109,53 @@ clustNEM <- function(data, k = 2:10, cluster = NULL, starts = 1, logtype = 2,
K <- max(cluster)
}
res <- list()
for (i in seq_len(K)) {
if (sum(Kres$cluster == i) > 1 &
length(unique(colnames(data[, which(Kres$cluster == i)]))) > 1) {
res[[i]] <- mynem(data[, which(Kres$cluster == i)], ...)
} else {
res[[i]] <- list()
res[[i]]$adj <- matrix(1, 1, 1)
rownames(res[[i]]$adj) <- colnames(res[[i]]$adj) <-
unique(colnames(data[, which(Kres$cluster == i), drop = FALSE]))
}
}
res$comp <- list()
res$mw <- numeric(K)
Sgenes <- getSgeneN(data)
for (i in seq_len(K)) {
res$comp[[i]] <- list()
tmp <- res[[i]]$adj
if (nrow(tmp) < Sgenes) {
smiss <- unique(colnames(data)[-which(colnames(data)
%in% colnames(tmp))])
tmp <-
rbind(cbind(tmp, matrix(0, nrow = nrow(tmp),
ncol = length(smiss))),
matrix(0, nrow = length(smiss),
ncol =ncol(tmp) + length(smiss)))
colnames(tmp)[(dim(res[[i]]$adj)[1]+1):nrow(tmp)] <-
rownames(tmp)[(dim(res[[i]]$adj)[1]+1):nrow(tmp)] <- smiss
tmp <- tmp[order(rownames(tmp)), order(colnames(tmp))]
}
res$comp[[i]]$phi <- tmp
res$mw[i] <- sum(Kres$cluster == i)/ncol(data)
res[[i]]$adj <- tmp
if (nem) {
for (i in seq_len(K)) {
if (sum(Kres$cluster == i) > 1 &
length(unique(
colnames(data[,which(Kres$cluster == i)]))) > 1) {
res[[i]] <- mynem(data[, which(Kres$cluster == i)], ...)
} else {
res[[i]] <- list()
res[[i]]$adj <- matrix(1, 1, 1)
rownames(res[[i]]$adj) <- colnames(res[[i]]$adj) <-
unique(colnames(data[, which(Kres$cluster == i),
drop = FALSE]))
}
}
res$comp <- list()
res$mw <- numeric(K)
Sgenes <- getSgeneN(data)
for (i in seq_len(K)) {
res$comp[[i]] <- list()
tmp <- res[[i]]$adj
if (nrow(tmp) < Sgenes) {
smiss <- unique(colnames(data)[-which(colnames(data)
%in% colnames(tmp))])
tmp <-
rbind(cbind(tmp, matrix(0, nrow = nrow(tmp),
ncol = length(smiss))),
matrix(0, nrow = length(smiss),
ncol =ncol(tmp) + length(smiss)))
colnames(tmp)[(dim(res[[i]]$adj)[1]+1):nrow(tmp)] <-
rownames(tmp)[(dim(res[[i]]$adj)[1]+1):nrow(tmp)] <- smiss
tmp <- tmp[order(rownames(tmp)), order(colnames(tmp))]
}
res$comp[[i]]$phi <- tmp
res$mw[i] <- sum(Kres$cluster == i)/ncol(data)
res[[i]]$adj <- tmp
}
n <- getSgeneN(data)
probs <- matrix(0, K, ncol(data))
probs <- do.call(getProbs, c(list(probs=probs, k=K, data=data,
res=res, n=n, mw=res$mw),
getprobspars))
colnames(probs$probs) <- colnames(data)
res$ll <- probs$ll
res$probs <- probs$probs
res$mw <- probs$mw
}
n <- getSgeneN(data)
probs <- matrix(0, K, ncol(data))
probs <- do.call(getProbs, c(list(probs=probs, k=K, data=data,
res=res, n=n, mw=res$mw), getprobspars))
colnames(probs$probs) <- colnames(data)
res$ll <- probs$ll
res$probs <- probs$probs
res$cluster <- Kres$cluster
res$mw <- probs$mw
return(res)
}
#' Simulate data.
Expand Down
4 changes: 3 additions & 1 deletion man/clustNEM.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/mnem.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a5b1af7

Please sign in to comment.