version 0.1.0

cran · Aug 15, 2023 · 0438d61 · 0438d61
commit 0438d61
Show file tree

Hide file tree

Showing 6 changed files with 296 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,20 @@
+Package: kerDAA
+Type: Package
+Title: New Kernel-Based Test for Differential Association Analysis
+Version: 0.1.0
+Authors@R: c(person("Hoseung", "Song", role = c("aut", "cre"),
+                     email = "hsong3@fredhutch.org"),
+             person("Michael", "C. Wu", role = "aut"))
+Author: Hoseung Song [aut, cre],
+  Michael C. Wu [aut]
+Imports: mvtnorm
+Maintainer: Hoseung Song <hsong3@fredhutch.org>
+Description: A new practical method to evaluate whether relationships between two sets of high-dimensional variables are different or not across two conditions.
+    Song, H. and Wu, M.C. (2023) 
+    <arXiv:2307.15268>.
+License: GPL (>= 2)
+Encoding: UTF-8
+NeedsCompilation: no
+Packaged: 2023-08-12 21:52:41 UTC; hsong3
+Repository: CRAN
+Date/Publication: 2023-08-15 10:00:05 UTC
diff --git a/MD5 b/MD5
@@ -0,0 +1,5 @@
+04e369af571a500babe397c777470023 *DESCRIPTION
+8c65cbabb1e0695a67ad7978ec90c378 *NAMESPACE
+f4d127428d04633b3d3d57dc7ad8f511 *R/kerDAA.R
+8d59f4c39553ffd8b9a751da638c4b44 *man/kerDAA-package.Rd
+2dcd738d5e7dafb34d8d607551c13971 *man/kerdaa.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,2 @@
+export(kerdaa)
+importFrom("stats", "dist", "median", "pnorm", "pcauchy")
diff --git a/R/kerDAA.R b/R/kerDAA.R
@@ -0,0 +1,151 @@
+kerdaa = function(X1, Y1, X2, Y2, perm=0) {
+  X = rbind(X1, X2)
+  Y = rbind(Y1, Y2)
+
+  m = nrow(X1)
+  n = nrow(X2)
+  N = m + n
+
+  Dx = dist(X)^2
+  sigma = median(Dx)
+  if (sigma == 0) {
+    sigma = sigma + 0.1
+  }
+  KX_g = exp(-as.matrix(Dx)/sigma/2)
+
+  Dy = dist(Y)^2
+  sigma = median(Dy)
+  if (sigma == 0) {
+    sigma = sigma + 0.1
+  }
+  KY_g = exp(-as.matrix(Dy)/sigma/2)
+
+  KX_l = X%*%t(X)
+  KY_l = Y%*%t(Y)
+
+  I.m = diag(1,m)
+  I.1 = rep(1,m)
+  H1 = I.m-I.1%*%t(I.1)/m
+
+  I.n = diag(1,n)
+  I.1 = rep(1,n)
+  H2 = I.n-I.1%*%t(I.1)/n
+
+  H = rbind( cbind(H1, matrix(0, m, n)), cbind(matrix(0,n,m), H2) )
+
+  Kx_g = H%*%KX_g%*%H
+  Ky_g = H%*%KY_g%*%H
+
+  Kx_l = H%*%KX_l%*%H
+  Ky_l = H%*%KY_l%*%H
+
+  K_g = Kx_g*Ky_g
+  diag(K_g) = 0
+
+  K_l = Kx_l*Ky_l
+  diag(K_l) = 0
+
+  K1_g = sum(K_g[1:m,1:m])/m/(m-1)
+  K2_g = sum(K_g[(m+1):N,(m+1):N])/n/(n-1)
+
+  K1_l = sum(K_l[1:m,1:m])/m/(m-1)
+  K2_l = sum(K_l[(m+1):N,(m+1):N])/n/(n-1)
+
+  p1 = m*(m-1)/N/(N-1)
+  p2 = p1*(m-2)/(N-2)
+  p3 = p2*(m-3)/(N-3)
+
+  q1 = n*(n-1)/N/(N-1)
+  q2 = q1*(n-2)/(N-2)
+  q3 = q2*(n-3)/(N-3)
+
+  mu_g = sum(K_g)/N/(N-1)
+
+  A_g = sum(K_g^2)
+  B_g = sum(rowSums(K_g)^2) - A_g
+  C_g = sum(K_g)^2 - 2*A_g - 4*B_g
+
+  var_K1_g = (2*A_g*p1 + 4*B_g*p2 + C_g*p3)/m/m/(m-1)/(m-1) - mu_g^2
+  var_K2_g = (2*A_g*q1 + 4*B_g*q2 + C_g*q3)/n/n/(n-1)/(n-1) - mu_g^2
+  cov_g = C_g/N/(N-1)/(N-2)/(N-3) - mu_g^2
+
+  var_g = var_K1_g + var_K2_g - 2*cov_g
+
+  stat_g = (K1_g - K2_g)/sqrt(var_g)
+
+  mu_l = sum(K_l)/N/(N-1)
+
+  A_l = sum(K_l^2)
+  B_l = sum(rowSums(K_l)^2) - A_l
+  C_l = sum(K_l)^2 - 2*A_l - 4*B_l
+
+  var_K1_l = (2*A_l*p1 + 4*B_l*p2 + C_l*p3)/m/m/(m-1)/(m-1) - mu_l^2
+  var_K2_l = (2*A_l*q1 + 4*B_l*q2 + C_l*q3)/n/n/(n-1)/(n-1) - mu_l^2
+  cov_l = C_l/N/(N-1)/(N-2)/(N-3) - mu_l^2
+
+  var_l = var_K1_l + var_K2_l - 2*cov_l
+
+  stat_l = (K1_l - K2_l)/sqrt(var_l)
+
+  pval_g = 2*pnorm(-abs(stat_g))
+  pval_l = 2*pnorm(-abs(stat_l))
+
+  x = c(pval_g, pval_l)
+  cauchy.t = sum(tan((0.5-pmin(x,0.99))*pi))/length(x)
+  pval_o = 1 - pcauchy(cauchy.t)
+
+  res = list()
+  res$stat_g = stat_g
+  res$stat_l = stat_l
+  res$pval = pval_o
+
+
+  if (perm>0) {
+    temp1 = temp2 = rep(0, perm)
+    for (i in 1:perm) {
+      id = sample(N, replace = FALSE)
+      K_g_i = K_g[id,id]
+      K_l_i = K_l[id,id]
+
+      K1_g_i = sum(K_g_i[1:m,1:m])/m/(m-1)
+      K2_g_i = sum(K_g_i[(m+1):N,(m+1):N])/n/(n-1)
+
+      K1_l_i = sum(K_l_i[1:m,1:m])/m/(m-1)
+      K2_l_i = sum(K_l_i[(m+1):N,(m+1):N])/n/(n-1)
+
+      stat_g_i = (K1_g_i - K2_g_i)/sqrt(var_g)
+      stat_l_i = (K1_l_i - K2_l_i)/sqrt(var_l)
+
+      temp1[i] = stat_g_i
+      temp2[i] = stat_l_i
+    }
+    pval_g_perm = 2*length(which(temp1>=abs(stat_g)))/perm
+    pval_l_perm = 2*length(which(temp2>=abs(stat_l)))/perm
+  }
+
+  x = c(pval_g_perm, pval_l_perm)
+  cauchy.t = sum(tan((0.5-pmin(x,0.99))*pi))/length(x)
+  pval_o_perm = 1 - pcauchy(cauchy.t)
+
+  res$pval_perm = pval_o_perm
+
+
+  return(res)
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/man/kerDAA-package.Rd b/man/kerDAA-package.Rd
@@ -0,0 +1,55 @@
+\name{kerDAA}
+\alias{kerDAA}
+\title{New kernel-based test for differential association analysis}
+\description{This package can be used to determine whether two high-dimensional samples have similar dependence relationships across two conditions.}
+\author{
+Hoseung Song and Michael C. Wu
+
+Maintainer: Hoseung Song (hsong3@fredhutch.org)
+}
+\references{
+Song, H. and Wu, M.C. (2023). Multivariate differential association analysis. arXiv:2307.15268
+}
+
+\seealso{
+  \code{\link{kerdaa}}
+}
+\examples{
+
+# Dimension of variables.
+d = 100
+
+# The first covariance matrix
+SIG = matrix(0, d, d)
+for (i in 1:d) {
+  for (j in 1:d) {
+    SIG[i,j] = 0.4^(abs(i-j))
+  }
+}
+
+# The second covariance matrix
+SIG1 = matrix(0, d, d)
+for (i in 1:d) {
+  for (j in 1:d) {
+    SIG1[i,j] = (0.4+0.5)^(abs(i-j))
+  }
+}
+
+set.seed(500)
+
+# We use 'rmvnorm' in 'mvtnorm' package to generate multivariate normally distributed samples
+require(mvtnorm)
+Z = rmvnorm(100, mean = rep(0,100), sigma = SIG)
+X1 = Z[,1:50]
+Y1 = Z[,51:100]
+
+Z = rmvnorm(100, mean = rep(0,100), sigma = SIG1)
+X2 = Z[,1:50]
+Y2 = Z[,51:100]
+
+a = kerdaa(X1, Y1, X2, Y2, perm=1000)
+# output results based on the permutation and the asymptotic results
+# the test statistic values can be found in a$stat_g and a$stat_l
+# p-values can be found in a$pval and a$pval_perm
+
+}
diff --git a/man/kerdaa.Rd b/man/kerdaa.Rd
@@ -0,0 +1,63 @@
+\name{kerdaa}
+\alias{kerdaa}
+\title{New kernel-based test for differential association analysis}
+\description{This function provides the kernel-based differential association test.}
+\usage{
+kerdaa(X1, Y1, X2, Y2, perm=0)
+}
+\arguments{
+  \item{X1}{The first multivariate data in the first condition.}
+  \item{Y1}{The second multivariate data in the first condition.}
+  \item{X2}{The first multivariate data in the second condition.}
+  \item{Y2}{The second multivariate data in the second condition.}
+  \item{perm}{The number of permutations performed to calculate the p-value of the test. The default value is 0, which means the permutation is not performed and only approximated p-value based on the asymptotic theory is provided. Doing permutation could be time consuming, so be cautious if you want to set this value to be larger than 10,000.}
+}
+
+\value{
+  Returns a list with test statistic values and p-values of the test. See below for more details.
+  \item{stat_g}{The value of the test statistic using the Gaussian kernel.}
+  \item{stat_l}{The value of the test statistic using the linear kernel.}
+  \item{pval}{The omnibus p-value using the approximated p-values of the test statistic based on asymptotic theory.}
+  \item{pval_perm}{The omnibus p-value using the permutation p-values of the test statistic when argument `perm' is positive.}
+}
+\seealso{
+  \code{\link{kerDAA}}
+}
+\examples{
+
+# Dimension of variables.
+d = 100
+
+# The first covariance matrix
+SIG = matrix(0, d, d)
+for (i in 1:d) {
+  for (j in 1:d) {
+    SIG[i,j] = 0.4^(abs(i-j))
+  }
+}
+
+# The second covariance matrix
+SIG1 = matrix(0, d, d)
+for (i in 1:d) {
+  for (j in 1:d) {
+    SIG1[i,j] = (0.4+0.5)^(abs(i-j))
+  }
+}
+
+set.seed(500)
+# We use 'rmvnorm' in 'mvtnorm' package to generate multivariate normally distributed samples
+require(mvtnorm)
+Z = rmvnorm(100, mean = rep(0,100), sigma = SIG)
+X1 = Z[,1:50]
+Y1 = Z[,51:100]
+
+Z = rmvnorm(100, mean = rep(0,100), sigma = SIG1)
+X2 = Z[,1:50]
+Y2 = Z[,51:100]
+
+a = kerdaa(X1, Y1, X2, Y2, perm=1000)
+# output results based on the permutation and the asymptotic results
+# the test statistic values can be found in a$stat_g and a$stat_l
+# p-values can be found in a$pval and a$pval_perm
+}
+