From 0a73c1d7d4af10a286357569bc28fc9b8b803217 Mon Sep 17 00:00:00 2001
From: MartinFXP <martin.pirkl@bsse.ethz.ch>
Date: Fri, 31 Aug 2018 16:32:13 +0200
Subject: [PATCH] added discrete data mode; no bump yet

---
 NAMESPACE          |   1 +
 R/mnems_low.r      | 153 +++++++++++++++++++++++++++++++++++----------
 vignettes/mnem.Rmd |   4 +-
 3 files changed, 122 insertions(+), 36 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 7847464..e3a97cb 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -21,6 +21,7 @@ import(snowfall)
 import(stats)
 import(stats4)
 import(tsne)
+importFrom(cluster,silhouette)
 importFrom(flexclust,dist2)
 importFrom(graphics,layout)
 importFrom(graphics,par)
diff --git a/R/mnems_low.r b/R/mnems_low.r
index 7beb098..106adb0 100644
--- a/R/mnems_low.r
+++ b/R/mnems_low.r
@@ -99,23 +99,6 @@ calcEvopen <- function(res) {
     return(evopen)
 }
 #' @noRd
-#' @importFrom flexclust dist2
-llrScore <- function(data, adj, weights = NULL, ratio = TRUE) {
-    if (is.null(weights)) {
-        weights <- rep(1, ncol(data))
-    }
-    if (ratio) {
-        score <- data%*%(adj*weights)
-    } else {
-        if (max(data) == 1) {
-            score <- -dist2(data, t(adj)*weights)
-        } else {
-            score <- -dist2(data, t((adj*mean(data))*weights))
-        }
-    }
-    return(score)
-}
-#' @noRd
 modAdj <- function(adj, D) {
     Sgenes <- naturalsort(unique(colnames(D)))
     SgeneN <- getSgeneN(D)
@@ -229,6 +212,7 @@ modData <- function(D) {
     return(D)
 }
 #' @noRd
+#' @importFrom cluster silhouette
 learnk <- function(data, kmax = 10, verbose = FALSE) {
     ks <- numeric(length(unique(colnames(data))))
     lab <- list()
@@ -400,7 +384,8 @@ annotAdj <- function(adj, data) {
 nemEst <- function(data, maxiter = 100, start = "null",
                    sumf = mean, alpha = 1, cut = 0,
                    kernel = "cosim", monoton = FALSE,
-                   useCut = TRUE, useF = FALSE, ...) {
+                   useCut = TRUE, useF = FALSE, method = "llr",
+                   weights = rep(1, ncol(data)), ...) {
     if (sum(duplicated(colnames(data)) == TRUE) > 0) {
         data2 <- data[, -which(duplicated(colnames(data)) == TRUE)]
         for (j in unique(colnames(data))) {
@@ -470,14 +455,33 @@ nemEst <- function(data, maxiter = 100, start = "null",
     stop <- FALSE
     while(!stop & iter < maxiter) {
         iter <- iter + 1
-        P <- R%*%cbind(phi, 0)
+        if (method %in% "llr") {
+            P <- t(t(R)*weights)%*%cbind(phi, 0)
+        }
+        if (method %in% "disc") {
+            lltype <- "max"
+            if (lltype %in% "max") { llfun <- max }
+            if (lltype %in% "marg") { llfun <- sum }
+            fp <- fn <- 0.1
+            P <- ((1-fn)^(t(t(R)*weights)%*%phi))*
+                (fn^((1-t(t(R)*weights))%*%phi))*
+                (fp^(t(t(R)*weights)%*%(1-phi)))*
+                ((1-fp)^((1-t(t(R)*weights))%*%(1-phi)))                                     
+            P <- cbind(P, 0.5^nrow(R))
+        }
         P[, grep("_", colnames(phi))] <- min(P)
         subtopo <- as.numeric(gsub(
             ncol(phi)+1, 0, apply(P, 1,function(x) return(which.max(x)))))
         theta <- t(R)*0
         theta[cbind(subtopo, seq_len(ncol(theta)))] <- 1
         Oold <- O
-        ll <- sum(diag(theta%*%P))
+        if (method %in% "llr") {
+            ll <- theta%*%P
+            ll <- sum(diag(ll))
+        }
+        if (method %in% "disc") {
+            ll <- sum(log(apply(P, 1, llfun)))
+        }
         if (ll %in% lls | all(phi == phibest)) {
             stop <- TRUE
         }
@@ -497,15 +501,37 @@ nemEst <- function(data, maxiter = 100, start = "null",
         nozeros <- nozeros[which(nozeros[, 1] %in% nogenes), ]
         theta[nozeros] <- 1
         theta[grep("_", colnames(phi)), ] <- 0
-        if (useF) {
-            O <- t(R)%*%t(phi%*%theta)
-        } else {
-            O <- t(R)%*%t(theta)
+        if (method %in% "llr") {
+            if (useF) {
+                O <- (t(R)*weights)%*%t(phi%*%theta)
+            } else {
+                O <- (t(R)*weights)%*%t(theta)
+            }
+        }
+        if (method %in% "disc") {
+            if (useF) {
+                O <- (log(1-fn)*((t(R)*weights)%*%t(phi%*%theta)))+
+                    (log(fn)*((1-(t(R)*weights))%*%t(phi%*%theta)))+
+                    (log(fp)*((t(R)*weights)%*%t(1-phi%*%theta)))+
+                    (log(1-fp)*((1-(t(R)*weights))%*%t(1-phi%*%theta)))
+            } else {
+                O <- (log(1-fn)*((t(R)*weights)%*%t(theta)))+
+                    (log(fn)*((1-(t(R)*weights))%*%t(theta)))+
+                    (log(fp)*((t(R)*weights)%*%t(1-theta)))+
+                    (log(1-fp)*((1-(t(R)*weights))%*%t(1-theta)))
+            }
         }
         if (useCut) {
-            cutoff <- cut*max(abs(O))
-            phi[which(O > cutoff & E == 1)] <- 1
-            phi[which(O <= cutoff | E == 0)] <- 0
+            if (method %in% "llr") {
+                cutoff <- cut*max(abs(O))
+                phi[which(O > cutoff & E == 1)] <- 1
+                phi[which(O <= cutoff | E == 0)] <- 0
+            }
+            if (method %in% "disc") {
+                cutoff <- mean(O[which(E == 1)]) # cut*max(abs(O))
+                phi[which(O > cutoff & E == 1)] <- 1
+                phi[which(O <= cutoff | E == 0)] <- 0
+            }
         } else {
             O <- O*E
             supertopo <- as.numeric(gsub(
@@ -516,13 +542,28 @@ nemEst <- function(data, maxiter = 100, start = "null",
         }
     }
     phibest <- transitive.closure(phibest, mat = TRUE)
-    P <- R%*%cbind(phibest, 0)
+    if (method %in% "llr") {
+        P <- t(t(R)*weights)%*%cbind(phibest, 0)
+    }
+    if (method %in% "disc") {
+        P <- ((1-fn)^(t(t(R)*weights)%*%phibest))*
+            (fn^((1-t(t(R)*weights))%*%phibest))*
+            (fp^(t(t(R)*weights)%*%(1-phibest)))*
+            ((1-fp)^((1-t(t(R)*weights))%*%(1-phibest)))                                     
+        P <- cbind(P, 0.5^ncol(R))
+    }
     P[, grep("_", colnames(phibest))] <- min(P)
     subtopo <- as.numeric(gsub(
         ncol(phibest)+1, 0, apply(P, 1, function(x) return(which.max(x)))))
     thetabest <- t(R)*0
     thetabest[cbind(subtopo, seq_len(ncol(thetabest)))] <- 1
-    llbest <- sum(diag(thetabest%*%P))
+    if (method %in% "llr") {
+        llbest <- thetabest%*%P
+        llbest <- sum(diag(llbest))
+    }
+    if (method %in% "disc") {
+        llbest <- sum(log(apply(P, 1, llfun)))
+    }
     nem <- list(phi = phibest, theta = thetabest, iter = iter,
                 ll = llbest, lls = lls, num = numbest, C = Cz,
                 O = Obest, E = E0)
@@ -649,6 +690,7 @@ mynem <- function(D, search = "greedy", start = NULL, method = "llr",
     get.deletions <- getFromNamespace("get.deletions", "nem")
     get.insertions <- getFromNamespace("get.insertions", "nem")
     get.reversions <- getFromNamespace("get.reversions", "nem")
+    if (method %in% "disc") { domean = FALSE }
     if ("modules" %in% search) {
         if (length(search) > 1) {
             search <- search[-which(search %in% "modules")]
@@ -716,7 +758,7 @@ mynem <- function(D, search = "greedy", start = NULL, method = "llr",
     if (search %in% "greedy") {
         for (iter in seq_len(runs)) {
             if (iter > 1) {
-                better <-matrix(sample(c(0,1),nrow(better)*ncol(better),
+                better <- matrix(sample(c(0,1),nrow(better)*ncol(better),
                                        replace = TRUE),
                                 nrow(better),
                                 ncol(better))
@@ -817,7 +859,7 @@ mynem <- function(D, search = "greedy", start = NULL, method = "llr",
         } else {
             Dw <- D
         }
-        tmp <- nemEst(Dw, start = start2, ...)
+        tmp <- nemEst(Dw, start = start2, method = method, ...)
         better <- tmp$phi
         oldscore <- tmp$ll
         allscores <- tmp$lls
@@ -897,25 +939,68 @@ graph2adj <- function(gR) {
     return(adj.matrix)
 }
 #' @noRd
+#' @importFrom flexclust dist2
+llrScore <- function(data, adj, weights = NULL, ratio = TRUE) {
+    if (is.null(weights)) {
+        weights <- rep(1, ncol(data))
+    }
+    if (ratio) {
+        score <- data%*%(adj*weights)
+    } else {
+        if (max(data) == 1) {
+            score <- -dist2(data, t(adj)*weights)
+        } else {
+            score <- -dist2(data, t((adj*mean(data))*weights))
+        }
+    }
+    return(score)
+}
+#' @noRd
+discScore <- function(data, adj, weights = NULL) {
+    fp <- fn <- 0.1
+    tp <- tn <- 0.9
+    if (is.null(weights)) {
+        weights <- rep(1, ncol(data))
+        score <- (tp^(data%*%adj))*
+            (fn^((1-data)%*%adj))*
+            (fp^(data%*%(1-adj)))*
+            (tn^((1-data)%*%(1-adj)))
+    } else {
+        fp <- t(matrix(fp/(weights - 2*fp*(1-weights)), ncol(data), nrow(data)))
+        fn <- t(matrix(fn/(weights - 2*fn*(1-weights)), ncol(data), nrow(data)))
+        tp <- t(matrix(tp/(weights - 2*tp*(1-weights)), ncol(data), nrow(data)))
+        tn <- t(matrix(tn/(weights - 2*tn*(1-weights)), ncol(data), nrow(data)))
+        score <- (tp^(data%*%adj))*
+            (fn^((1-data)%*%adj))*
+            (fp^(data%*%(1-adj)))*
+            (tn^((1-data)%*%(1-adj)))
+    }
+    score <- cbind(score, 0.5^nrow(data))
+    return(score)
+}
+#' @noRd
 scoreAdj <- function(D, adj, method = "llr", weights = NULL,
                      trans.close = TRUE, subtopo = NULL,
                      prior = NULL, ratio = TRUE) {
     adj <- transitive.closure(adj, mat = TRUE)
     adj1 <- cbind(adj[colnames(D), ], "0" = 0)
-    
     if (method %in% "llr") {
+        ll <- "max"
         score <- llrScore(D, adj1, weights = weights, ratio = ratio)
     }
+    if (method %in% "disc") {
+        ll <- "marg"
+        score <- discScore(D, adj, weights = weights)
+    }
     if (is.null(subtopo)) {
         subtopo <- apply(score, 1, which.max)
     }
     subweights <- score
-    ll <- "max"
     if (ll %in% "max") {
         score <- sum(score[cbind(seq_len(nrow(score)), subtopo)])
     }
     if (ll %in% "marg") {
-        score <- log(sum(apply(exp(score), 1, sum))/length(score))
+        score <- sum(log(apply(score, 1, sum)))
     }
     if (!is.null(prior)) {
         prior <- transitive.reduction(prior)
diff --git a/vignettes/mnem.Rmd b/vignettes/mnem.Rmd
index a3018a1..02c148c 100644
--- a/vignettes/mnem.Rmd
+++ b/vignettes/mnem.Rmd
@@ -625,7 +625,7 @@ Datlinger, P., Rendeiro, A., Schmidl, C., Krausgruber, T., Traxler, P.,
 Klughammer, J., Schuster, L. C., Kuchler, A., Alpar, D., and
 Bock, C. (2017).
 Pooled crispr screening with single-cell transcriptome readout.  
-Nature Methods, 14, 297 EP ???.
+Nature Methods, 14, 297-301.
 
 Dixit, A., Parnas, O., Li, B., Chen, J., Fulco, C. P., Jerby-Arnon, L.,
 Marjanovic, N. D., Dionne, D., Burks, T., Raychowdhury, R., Adamson,
@@ -633,7 +633,7 @@ B., Norman, T. M., Lander, E. S., Weissman, J. S., Friedman, N., and
 Regev, A. (2016).  
 Perturb-seq: Dissecting molecular circuits with scalable single-cell rna
 profiling of pooled genetic screens.  
-Cell, 167(7), 1853???1866.e17.
+Cell, 167(7), 1853-1866.e17.
 
 # Data Generation