Skip to content

Commit

Permalink
version 0.2.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
ytzhong authored and cran-robot committed Nov 7, 2022
1 parent af3961d commit 7acfd8c
Show file tree
Hide file tree
Showing 10 changed files with 531 additions and 145 deletions.
19 changes: 11 additions & 8 deletions DESCRIPTION
@@ -1,19 +1,22 @@
Package: RsqMed
Title: Total Mediation Effect Size Measure (R-Squared Measure) under
Moderate or High-Dimensional Mediator Settings
Version: 0.1.7.1
Authors@R: person("Tianzhong", "Yang", email = "yang3704@umn.edu", role = c("aut", "cre"))
Description: An implementation of calculating the R-squared measure as a total mediation effect size measure and its confidence interval for moderate- or high-dimensional mediator model. It gives an option to filter out non-mediators using variable selection method. This R package is directly related to the paper "Estimation of mediation effect for high-dimensional omics mediators with application to the Framingham Heart Study" with <doi:10.1101/774877>.
Version: 0.2.1.0
Authors@R:
person(given = "Tianzhong",
family = "Yang",
role = c("aut", "cre"),
email = "yang3704@umn.edu")
Description: An implementation of calculating the R-squared measure as a total mediation effect size measure and its confidence interval for moderate- or high-dimensional mediator model. It gives an option to filter out non-mediators using variable selection method. The original R package is directly related to the paper "Estimation of mediation effect for high-dimensional omics mediators with application to the Framingham Heart Study" with <doi:10.1101/774877>. The new version contains a choice of using cross-fitting, which is computationally faster.
License: GPL-3
Depends: R (>= 3.5.0)
Imports: SIS (>= 0.8), GMMAT (>= 1.1.0)
Suggests: MASS, stats, knitr
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.1.1
RoxygenNote: 7.1.1
NeedsCompilation: no
Packaged: 2020-01-23 18:29:31 UTC; tyang
Packaged: 2022-11-07 20:10:02 UTC; yang
Author: Tianzhong Yang [aut, cre]
Maintainer: Tianzhong Yang <yang3704@umn.edu>
Repository: CRAN
Date/Publication: 2020-02-02 16:00:02 UTC
Date/Publication: 2022-11-07 20:40:02 UTC
16 changes: 9 additions & 7 deletions MD5
@@ -1,9 +1,11 @@
42c7dd034007983102eb76d722e53d72 *DESCRIPTION
4770a72c3071c9afc96e8b8d3a90c9ab *NAMESPACE
1b1e794c102705cf347670e70fad2daf *R/RsqMed.R
64d28dc4aa8ce864a7eddba31408ed88 *R/RsqMed_sd.R
b54914896ce5e7ee7bfd1cd3f9ffa11f *DESCRIPTION
49e37e75b792ecb23d970d387792660f *NAMESPACE
d264eb8a64e5c30367bb327305294904 *R/CrossFittedR2.R
2541c2204aca8e1aed7e9a58a969ac8c *R/RsqMed.R
f027adf1f305f41c0eacd1cdb7868736 *R/RsqMed_sd.R
ddce269a1e3d054cae349621c198dd52 *data/datalist
58e7ed111f1afe1ba1b257fbfdeedeb4 *data/example.rda
b1d8b8fe4a035cf3a8f05f57d4facabe *man/CI.Rsq.measure.Rd
e42d2ef97d1548d354807b6d817a184b *man/Rsq.measure.Rd
0ab5693af6f37d494ecbcefac26663e0 *data/example.rda
20caf3bc3ed80e4b72809a1c1ed12328 *man/CF_Rsq.measure.Rd
bb4866770c0e3aa4ef31473d211be831 *man/CI.Rsq.measure.Rd
405678ab3ee9fa822778a49722679d02 *man/Rsq.measure.Rd
fd499adc04aba5a36eeb15d1a1454e25 *man/example.Rd
1 change: 1 addition & 0 deletions NAMESPACE
@@ -1,4 +1,5 @@
# Generated by roxygen2: do not edit by hand

export(CF_Rsq.measure)
export(CI.Rsq.measure)
export(Rsq.measure)
244 changes: 244 additions & 0 deletions R/CrossFittedR2.R
@@ -0,0 +1,244 @@
#' Function to calculate the Rsq function as a total effect size measure for mediation effect using cross-fitted estimation
#'
#' @param Y vector of the outcome of interest; outcome has to follow a Gaussian distribution.
#' @param M matrix of putative mediators
#' @param Covar covariates matrix
#' @param X vector of the independent variable of interest, e.g. environmental variable
#' @param iter.max Maximum number of iterations used in iSIS, default = 3 (details see the SIS package).
#' @param nsis Number of predictors recruited by iSIS, default = NULL
#' @param first.half TRUE: split sample into two halves by the order in the dataset. FALSE: randomly split samples into halves, default = TRUE.
#' @param seed Random seed used for sample splitting, default = 2022.
#' @param tune Method for tuning the regularization parameter of the penalized likelihood subproblems and of the final model selected by (i)SIS. Options include tune = 'bic' and tune = 'aic'.
#' @param penalty The penalty to be applied in the regularized likelihood subproblems. 'MCP', and 'lasso' are provided. 'MCP' is recommended.
#' @return Output Vector consisting of Rsq mediated(Rsq.mediated), Lower confidence bound constructed by the asymptotic variance (CI_asym_low), Upper confidence bound constructed by the asymptotic variance (CI_asym_up), Lower confidence bound constructed by the conservative variance (CI_cons_low), Upper confidence bound constructed by the conservative variance (CI_cons_up), number of selected mediators in subsample 1 (pab1), number of selected mediators in subsample 2 (pab2), and the Rsq that used to calculate the Rsq measure: variance of outcome explained by mediator (Rsq.YM), variance of outcome explained by the independent variable (Rsq.YX), and variance of outcome explained by mediator and independent variable (Rsq.YMX); Sample Size in analysis (Sample Size)
#' @return Name of selected mediators in subsample 1 (select1)
#' @return Name of selected mediators in subsample 2 (select2)
#' @export
#' @examples{
#'\dontrun{
#' data(example)
#' attach(example)
#'CF_Rsq.measure(Y=Y, M=M, X=X, tune = "bic", penalty = "MCP")
#'}
#'}

CF_Rsq.measure <- function(Y, M, Covar = NULL, X, iter.max = 3, nsis = NULL, first.half = TRUE, seed = 2022,
tune = c("aic", "bic"), penalty = c("MCP", "lasso")){


if (length(Y)!=nrow(M)) stop('Sample sizes do not match.')
if (length(Y)!=length(X)) stop('Sample sizes do not match.')
if (nrow(M)!=length(X)) stop('Sample sizes do not match.')
if (!is.null(Covar)) {Covar <- as.matrix(Covar)
if (nrow(Covar)!=length(X)) stop('Sample sizes do not match.')}
if (length(unique(Y)) < 3) message('Warning: Current algorithm can only deal with continuous outcome.')
if (is.vector(M)) stop('This algorithm does not support single mediator model.')
if (is.null(colnames(M))) {
print("Column name of Mediators are not specified. Naming the M by its position in the dataset. ")
colnames(M) <- paste0('M', 1:ncol(M))
}
if (sum(is.na(Y)) + sum(is.na(M)) + sum(is.na(Covar)) + sum(is.na(X)) > 0) stop('Algorithm cannot deal with missing data! Impute or subset the data first. ')

# Whether use the first half or random sample split for variable selection
if (first.half == TRUE) {
idx1 <- 1:(nrow(M) * 1/2)
} else {
set.seed(seed)
idx1 <- sample(1:nrow(M), ceiling(nrow(M)/2), replace = FALSE)
}

if (max(idx1) < 20){stop("Please specifiy a larger dataset.")}
if (is.null(tune)){stop("Please specifiy the tuning parameter.")}

d <- ncol(M)
n <- nrow(M)

# ---- iSIS variable selection ----

# Scale M matrix
M_res_1 <- apply(M, 2, scale)[idx1, ]
M_res_2 <- apply(M, 2, scale)[-idx1, ]

# Regress covariates and X out
if (!is.null(Covar)){
tdat_1 <- data.frame(y = Y[idx1],
envir = scale(X[idx1]),
cov = Covar[idx1, ])
f_1 <- stats::lm(y ~ ., data = tdat_1)
} else{
tdat_1 <- data.frame(y = Y[idx1],
envir = scale(X[idx1]))
f_1 <- stats::lm(y ~ ., data = tdat_1)
}
Y_res_1 <- stats::residuals(f_1)

if (!is.null(Covar)){
tdat_2 <- data.frame(y = Y[-idx1],
envir = scale(X[-idx1]),
cov = Covar[-idx1, ])
f_2 <- stats::lm(y ~ ., data = tdat_2)
} else{
tdat_2 <- data.frame(y = Y[-idx1],
envir = scale(X[-idx1]))
f_2 <- stats::lm(y ~ ., data = tdat_2)
}
Y_res_2 <- stats::residuals(f_2)

# Regress Covariates out
if (!is.null(Covar)){
tdat_3 <- data.frame(y = Y[idx1],
cov = Covar[idx1, ])
f_3 <- stats::lm(y ~ ., data = tdat_3)
Y_res_3 <- stats::residuals(f_3)
} else{
Y_res_3 <- Y[idx1]
}

if (!is.null(Covar)){
tdat_4 <- data.frame(y = Y[-idx1],
cov = Covar[-idx1, ])
f_4 <- stats::lm(y ~ ., data = tdat_4)
Y_res_4 <- stats::residuals(f_4)
} else{
Y_res_4 <- Y[-idx1]
}

# Scale the independent variable
X_res_1 <- as.numeric(scale(X[idx1]))
X_res_2 <- as.numeric(scale(X[-idx1]))

# ---- iSIS ----
model1 <- invisible(SIS::SIS(x = M_res_1, y = Y_res_1,
family = 'gaussian', tune = tune, seed = 1234,
penalty = penalty,
nsis = nsis,
iter.max = iter.max))
pab_1 <- length(model1$ix)
m1 <- model1$ix

# ------ Estimation Procedure ------
if (length(m1) > 0){
ols1.YW <- stats::lm(Y_res_4 ~ cbind(M_res_2, X_res_2)[, c(m1, d + 1)]) # Y ~ M + X
err_yw1 <- ols1.YW$residuals

ols1.YZ <- stats::lm(Y_res_4 ~ M_res_2[, m1]) # Y ~ M
err_yz1 <- ols1.YZ$residuals

ols1.YX <- stats::lm(Y_res_4 ~ X_res_2) # Y ~ X
err_yx1 <- ols1.YX$residuals

RYX_1 <- summary(ols1.YX)$adj.r.squared
V.YW1 <- sum(ols1.YW$residuals^2)/ols1.YW$df.residual
V.YZ1 <- sum(ols1.YZ$residuals^2)/ols1.YZ$df.residual
V.YX1 <- sum(ols1.YX$residuals^2)/ols1.YX$df.residual

err_y1 <- Y_res_4 - mean(Y_res_4)

v_yx1 <- mean(err_yx1^2)
v_yw1 <- mean(err_yw1^2)
v_yz1 <- mean(err_yz1^2)
v_y1 <- mean(err_y1^2)

err1 <- cbind(err_yx1^2, err_yz1^2, err_yw1^2, err_y1^2)
A1 <- stats::cov(err1)

}else{
paste0("There is no mediators selected in the 1st half")
}

# ------ Subset 2 Estimation ------

# ------ iSIS
model2 <- invisible(SIS::SIS(x = M_res_2, y = Y_res_2,
family = 'gaussian', tune = tune, seed = 1234,
penalty = penalty,
nsis = nsis,
iter.max = iter.max))
pab_2 <- length(model2$ix)
m2 <- model2$ix


# -----Estimation Process
if (length(m2) > 0) {
ols2.YW <- stats::lm(Y_res_3 ~ cbind(M_res_1, X_res_1)[, c(m2, d + 1)])
err_yw2 <- ols2.YW$residuals

ols2.YZ <- stats::lm(Y_res_3 ~ M_res_1[, m2])
err_yz2 <- ols2.YZ$residuals

ols2.YX <- stats::lm(Y_res_3 ~ X_res_1)
err_yx2 <- ols2.YX$residuals

RYX_2 <- summary(ols2.YX)$adj.r.squared
V.YW2 <- sum(ols2.YW$residuals^2)/ols2.YW$df.residual
V.YZ2 <- sum(ols2.YZ$residuals^2)/ols2.YZ$df.residual
V.YX2 <- sum(ols2.YX$residuals^2)/ols2.YX$df.residual

err_y2 <- Y_res_3 - mean(Y_res_3)

v_yx2 <- mean(err_yx2^2)
v_yw2 <- mean(err_yw2^2)
v_yz2 <- mean(err_yz2^2)
v_y2 <- mean(err_y2^2)

err2 <- cbind(err_yx2^2, err_yz2^2, err_yw2^2, err_y2^2)
A2 <- stats::cov(err2)

}else{
paste0("There is no mediators selected in the 2nd half")
}

if(length(m1) > 0 & length(m2) > 0){
A <- 0.5 * (A1 + A2)
v_yw <- 0.5 * (v_yw1 + v_yw2)
v_yz <- 0.5 * (v_yz1 + v_yz2)
v_yx <- 0.5 * (v_yx1 + v_yx2)
v_y <- stats::var(c(Y_res_3, Y_res_4))

a <- c(-1/v_y, -1/v_y, 1/v_y, (v_yx + v_yz - v_yw)/v_y^2)
v <- t(a) %*% A %*% a

V.YW <- 0.5 * (V.YW1 + V.YW2)
V.YZ <- 0.5 * (V.YZ1 + V.YZ2)

# Compute the R2
Rsq.mediated <- 1.0 - (v_yx + v_yz - v_yw) / v_y
CI_width_asym <- stats::qnorm(0.975) * sqrt(v) / sqrt(n)
v_asym <- sqrt(v) / sqrt(n)


# V.YX <- 0.5 * (V.YX1 + V.YX2) # 1
ols.YX <- stats::lm(c(Y_res_3, Y_res_4) ~ c(X_res_1, X_res_2))
V.YX <- sum(ols.YX$residuals^2)/ols.YX$df.residual
RYX_12 <- summary(ols.YX)$adj.r.squared
V.Y <- stats::var(c(Y_res_3, Y_res_4))
} else{
stop("No mediators selected.")
}

output <- c(Rsq.mediated = Rsq.mediated,
CI_asym_low = Rsq.mediated - CI_width_asym, CI_asym_up = Rsq.mediated + CI_width_asym,
pab1 = round(pab_1, 0), pab2 = round(pab_2, 0),
Rsq.YX = RYX_12, Rsq.YMX = 1 - V.YW/V.Y, Rsq.YM = 1 - V.YZ/V.Y,
SampleSize = n)

Med1 <- M[-idx1, m1, drop = F]
Med2 <- M[idx1, m2, drop = F]

if (is.null(colnames(Med1)) | is.null(colnames(Med2))){
select1 <- "Column name of Mediators are not specified."
select2 <- "Column name of Mediators are not specified."
} else{
select1 <- colnames(Med1)
select2 <- colnames(Med2)
}

return(list(output = output,
select1 = select1,
select2 = select2))

}




0 comments on commit 7acfd8c

Please sign in to comment.