version 0.0.8

cran · Mar 15, 2024 · 421859f · 421859f
commit 421859f
Show file tree

Hide file tree

Showing 16 changed files with 820 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,25 @@
+Package: cpmr
+Title: Connectome Predictive Modelling in R
+Version: 0.0.8
+Authors@R: 
+    person("Liang", "Zhang", , "psychelzh@outlook.com", role = c("aut", "cre"),
+           comment = c(ORCID = "0000-0001-9041-1150"))
+Description: Connectome Predictive Modelling (CPM) (Shen et al. (2017)
+    <doi:10.1038/nprot.2016.178>) is a method to predict individual
+    differences in behaviour from brain functional connectivity. 'cpmr'
+    provides a simple yet efficient implementation of this method.
+License: MIT + file LICENSE
+URL: https://github.com/psychelzh/cpmr,
+        https://psychelzh.github.io/cpmr/
+BugReports: https://github.com/psychelzh/cpmr/issues
+Imports: Rfast, stats
+Suggests: testthat (>= 3.0.0), withr
+Config/testthat/edition: 3
+Encoding: UTF-8
+RoxygenNote: 7.3.1
+NeedsCompilation: no
+Packaged: 2024-03-14 00:58:04 UTC; lenovo
+Author: Liang Zhang [aut, cre] (<https://orcid.org/0000-0001-9041-1150>)
+Maintainer: Liang Zhang <psychelzh@outlook.com>
+Repository: CRAN
+Date/Publication: 2024-03-14 20:20:05 UTC
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2024
+COPYRIGHT HOLDER: cpmr authors
diff --git a/MD5 b/MD5
@@ -0,0 +1,15 @@
+ee3550681e90446d0330308333e63309 *DESCRIPTION
+5bca3576e06ebe9ba7f96d7e134a615d *LICENSE
+d95c844b575a0c89f7c28238f169f44e *NAMESPACE
+21d1c68beb8504d38dcd847f15a36359 *NEWS.md
+b513d6ec75272ec02295b4ff9e914af8 *R/constants.R
+c93cbc040c31b9f09b006173bf38799a *R/cpm.R
+13f2bf36fd59da3c8c2a8db4814177b5 *R/print.cpm.R
+ac41285f8024074df29e3a65b53bbcb5 *R/utils.R
+4057520e0136995d940911c21fdb4f0d *R/zzz.R
+fa89fbd9a4d30ec4d7688fd63dbf0e20 *README.md
+af4c07196f2ef389e1dc7da18bac7b60 *man/cpm.Rd
+bf7c67f196b2969cdc5f2384bf4c7188 *man/figures/README-example-1.png
+103f6c11436dc1b484544dcacff0c1b5 *tests/testthat.R
+a9d9ca777733c29a29a6915ae3d24575 *tests/testthat/_snaps/cpm.md
+fcd91c763cda1ebe10dafea41f865b25 *tests/testthat/test-cpm.R
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,5 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method(print,cpm)
+export(cpm)
+import(Rfast)
diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,33 @@
+# cpmr 0.0.8
+
+* Added `return_edges` argument to optionally set how to return edges in the output.
+
+# cpmr 0.0.7
+
+* Convert back to older version of confounds treating.
+
+# cpmr 0.0.6
+
+* Ensure confounds regression are now only used in feature selection.
+
+# cpmr 0.0.5
+
+* Fixed confounds treatment. Now confounds are used in feature selection but not in model fitting.
+
+# cpmr 0.0.4
+
+* Ensure sparsity threshold method work as expect.
+* Some other improvements in code quality.
+
+# cpmr 0.0.3
+
+* Keep observation names in the output.
+* Check if observation names match between neural data and behavioral data.
+
+# cpmr 0.0.2
+
+* Added support for confounding variables.
+
+# cpmr 0.0.1
+
+* Initial commit to r-universe.
diff --git a/R/constants.R b/R/constants.R
@@ -0,0 +1,2 @@
+networks <- c("pos", "neg")
+includes <- c("both", networks)
diff --git a/R/cpm.R b/R/cpm.R
@@ -0,0 +1,235 @@
+#' Perform Connectome-based Predictive Modeling (CPM)
+#'
+#' The connectome-based predictive modeling (CPM) is a data-driven approach to
+#' predict individual behavior from brain connectivity data. Originally proposed
+#' by Shen et al. (2017), the CPM has been widely used in various studies. This
+#' function implements the CPM algorithm and provides a convenient interface to
+#' use it.
+#'
+#' @examples
+#' conmat <- matrix(rnorm(100 * 100), nrow = 100)
+#' behav <- rnorm(100)
+#' cpm(conmat, behav)
+#' # use different threshold method and level
+#' cpm(conmat, behav, thresh_method = "sparsity", thresh_level = 0.05)
+#' # use a 10-fold cross-validation
+#' cpm(conmat, behav, kfolds = 10)
+#' @param conmat A matrix of connectome data. Observations in row, edges in
+#'   column (assumed that duplicated edges are removed).
+#' @param behav A numeric vector contains behavior data. Length must equal to
+#'   number of observations in `conmat`.
+#' @param ... For future extension. Currently ignored.
+#' @param confounds A matrix of confounding variables. Observations in row,
+#'   variables in column. If `NULL`, no confounding variables are used.
+#' @param thresh_method,thresh_level The threshold method and level used in edge
+#'   selection. If method is set to be `"alpha"`, the edge selection is based on
+#'   the critical value of correlation coefficient. If method is set to be
+#'   `"sparsity"`, the edge selection is based on the quantile of correlation
+#'   coefficient, thus network sparsity is controlled.
+#' @param kfolds Folds number of cross-validation. If `NULL`, it will be set to
+#'   be equal to the number of observations, i.e., leave-one-subject-out.
+#' @param bias_correct Logical value indicating if the connectome data should be
+#'   bias-corrected. If `TRUE`, the connectome data will be centered and scaled
+#'   to have unit variance based on the training data before model fitting and
+#'   prediction. See Rapuano et al. (2020) for more details.
+#' @param return_edges A character string indicating the return value of the
+#'   selected edges. If `"none"`, no edges are returned. If `"sum"`, the sum of
+#'   selected edges across folds is returned. If `"all"`, the selected edges for
+#'   each fold is returned, which is a 3D array and memory-consuming.
+#' @return A list with the following components:
+#'
+#'   \item{folds}{The corresponding fold for each observation when used as test
+#'     group in cross-validation.}
+#'
+#'   \item{real}{The real behavior data. This is the same as the input `behav`
+#'     if `confounds` is `NULL`, otherwise it is the residual of `behav` after
+#'     regressing out `confounds`.}
+#'
+#'   \item{pred}{The predicted behavior data, with each column corresponding to
+#'     a model, i.e., both edges, positive edges, negative edges, and the row
+#'     names corresponding to the observation names (the same as those of
+#'     `behav`).}
+#'
+#'   \item{edges}{The selected edges, if `return_edges` is not `"none"`. If
+#'     `return_edges` is `"sum"`, it is a matrix with rows corresponding to
+#'     edges and columns corresponding to networks. If `return_edges` is
+#'     `"all"`, it is a 3D array with dimensions corresponding to folds, edges,
+#'     and networks.}
+#' @references
+#'
+#' Shen, X., Finn, E. S., Scheinost, D., Rosenberg, M. D., Chun, M. M.,
+#' Papademetris, X., & Constable, R. T. (2017). Using connectome-based
+#' predictive modeling to predict individual behavior from brain connectivity.
+#' Nature Protocols, 12(3), 506–518. https://doi.org/10.1038/nprot.2016.178
+#'
+#' Rapuano, K. M., Rosenberg, M. D., Maza, M. T., Dennis, N. J., Dorji, M.,
+#' Greene, A. S., Horien, C., Scheinost, D., Todd Constable, R., & Casey, B. J.
+#' (2020). Behavioral and brain signatures of substance use vulnerability in
+#' childhood. Developmental Cognitive Neuroscience, 46, 100878.
+#' https://doi.org/10.1016/j.dcn.2020.100878
+#' @export
+cpm <- function(conmat, behav, ...,
+                confounds = NULL,
+                thresh_method = c("alpha", "sparsity"),
+                thresh_level = 0.01,
+                kfolds = NULL,
+                bias_correct = TRUE,
+                return_edges = c("none", "sum", "all")) {
+  call <- match.call()
+  thresh_method <- match.arg(thresh_method)
+  return_edges <- match.arg(return_edges)
+  # check if rownames of conmat and names of behav match if both are not NULL
+  if (!is.null(rownames(conmat)) && !is.null(names(behav))) {
+    # nocov start
+    stopifnot(
+      "Row names of `conmat` and names of `behav` do not match." =
+        identical(rownames(conmat), names(behav))
+    )
+    # nocov end
+  }
+  if (!is.null(confounds)) {
+    # nocov start
+    if (!is.null(rownames(confounds))) {
+      if (!is.null(rownames(conmat))) {
+        stopifnot(
+          "Row names of `conmat` and names of `confounds` do not match." =
+            identical(rownames(conmat), rownames(confounds))
+        )
+      }
+      if (!is.null(names(behav))) {
+        stopifnot(
+          "Names of `behav` and names of `confounds` do not match." =
+            identical(names(behav), rownames(confounds))
+        )
+      }
+    }
+    # nocov end
+    conmat <- regress_counfounds(conmat, confounds)
+    behav <- regress_counfounds(behav, confounds)
+  }
+  # default to leave-one-subject-out
+  if (is.null(kfolds)) kfolds <- length(behav)
+  folds <- crossv_kfold(length(behav), kfolds)
+  # pre-allocation
+  edges <- switch(return_edges,
+    all = array(
+      dim = c(kfolds, dim(conmat)[2], length(networks)),
+      dimnames = list(NULL, NULL, networks)
+    ),
+    sum = array(
+      0,
+      dim = c(dim(conmat)[2], length(networks)),
+      dimnames = list(NULL, networks)
+    )
+  )
+  pred <- matrix(
+    nrow = length(behav),
+    ncol = length(includes),
+    dimnames = list(names(behav), includes)
+  )
+  for (fold in seq_len(kfolds)) {
+    rows_train <- folds != fold
+    conmat_train <- conmat[rows_train, , drop = FALSE]
+    behav_train <- behav[rows_train]
+    cur_edges <- select_edges(
+      conmat_train, behav_train,
+      thresh_method, thresh_level
+    )
+    conmat_test <- conmat[!rows_train, , drop = FALSE]
+    cur_pred <- predict_cpm(
+      conmat_train, behav_train, conmat_test,
+      cur_edges, bias_correct
+    )
+    pred[!rows_train, ] <- cur_pred
+    if (return_edges == "all") {
+      edges[fold, , ] <- cur_edges
+    } else if (return_edges == "sum") {
+      edges <- edges + cur_edges
+    }
+  }
+  structure(
+    list(
+      folds = folds,
+      real = behav,
+      pred = pred,
+      edges = edges,
+      call = call
+    ),
+    class = "cpm"
+  )
+}
+
+# helper functions
+select_edges <- function(conmat, behav, method, level) {
+  r_mat <- stats::cor(conmat, behav)
+  r_crit <- switch(method,
+    alpha = {
+      thresh <- critical_r(nrow(conmat), level)
+      c(-thresh, thresh)
+    },
+    sparsity = {
+      k <- round(level * length(r_mat))
+      thresh <- c(
+        nth(r_mat, k),
+        nth(r_mat, k, descending = TRUE)
+      )
+      if (thresh[[1]] > 0 || thresh[[2]] < 0) {
+        warning("Not enough positive or negative correlation values.") # nocov
+      }
+      thresh
+    },
+    stop("Invalid threshold method.")
+  )
+  matrix(
+    c(r_mat >= r_crit[2], r_mat <= r_crit[1]),
+    ncol = 2,
+    dimnames = list(NULL, networks)
+  )
+}
+
+predict_cpm <- function(conmat, behav, conmat_new, edges, bias_correct) {
+  if (bias_correct) {
+    center <- colmeans(conmat)
+    scale <- colVars(conmat, std = TRUE)
+    conmat <- fscale(conmat, center, scale)
+    conmat_new <- fscale(conmat_new, center, scale)
+  }
+  allocate_predictors <- function(nrow) {
+    matrix(
+      1,
+      nrow = nrow, ncol = length(networks) + 1,
+      dimnames = list(NULL, c("(Intercept)", networks))
+    )
+  }
+  x <- allocate_predictors(dim(conmat)[1])
+  x_new <- allocate_predictors(dim(conmat_new)[1])
+  for (network in networks) {
+    x[, network] <- rowsums(
+      conmat[, edges[, network], drop = FALSE]
+    )
+    x_new[, network] <- rowsums(
+      conmat_new[, edges[, network], drop = FALSE]
+    )
+  }
+  pred <- matrix(
+    nrow = dim(conmat_new)[1],
+    ncol = length(includes),
+    dimnames = list(NULL, includes)
+  )
+  for (include in includes) {
+    if (include == "both") {
+      cur_x <- x
+      cur_x_new <- x_new
+    } else {
+      cur_x <- x[, c("(Intercept)", include)]
+      cur_x_new <- x_new[, c("(Intercept)", include)]
+    }
+    model <- stats::.lm.fit(cur_x, behav)
+    pred[, include] <- cur_x_new %*% model$coefficients
+  }
+  pred
+}
+
+regress_counfounds <- function(resp, confounds) {
+  stats::.lm.fit(cbind(1, confounds), resp)$residuals
+}
diff --git a/R/print.cpm.R b/R/print.cpm.R
@@ -0,0 +1,10 @@
+#' @export
+print.cpm <- function(x, ...) {
+  cv <- if (length(unique(x$folds)) == length(x$real)) {
+    "leave-one-out"
+  } else {
+    sprintf("%d-fold", length(unique(x$folds)))
+  }
+  cat(sprintf("CPM results based on %s cross validation.\n", cv))
+  invisible(x)
+}
diff --git a/R/utils.R b/R/utils.R
@@ -0,0 +1,13 @@
+critical_r <- function(n, alpha) {
+  df <- n - 2
+  ct <- stats::qt(alpha / 2, df, lower.tail = FALSE)
+  sqrt((ct^2) / ((ct^2) + df))
+}
+
+crossv_kfold <- function(n, k) {
+  sample(cut(seq_len(n), breaks = k, labels = FALSE))
+}
+
+fscale <- function(x, center, scale) {
+  eachrow(eachrow(x, center, "-"), scale, "/")
+}
diff --git a/R/zzz.R b/R/zzz.R
@@ -0,0 +1,2 @@
+#' @import Rfast
+NULL