Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
af3961d
commit 7acfd8c
Showing
10 changed files
with
531 additions
and
145 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,22 @@ | ||
Package: RsqMed | ||
Title: Total Mediation Effect Size Measure (R-Squared Measure) under | ||
Moderate or High-Dimensional Mediator Settings | ||
Version: 0.1.7.1 | ||
Authors@R: person("Tianzhong", "Yang", email = "yang3704@umn.edu", role = c("aut", "cre")) | ||
Description: An implementation of calculating the R-squared measure as a total mediation effect size measure and its confidence interval for moderate- or high-dimensional mediator model. It gives an option to filter out non-mediators using variable selection method. This R package is directly related to the paper "Estimation of mediation effect for high-dimensional omics mediators with application to the Framingham Heart Study" with <doi:10.1101/774877>. | ||
Version: 0.2.1.0 | ||
Authors@R: | ||
person(given = "Tianzhong", | ||
family = "Yang", | ||
role = c("aut", "cre"), | ||
email = "yang3704@umn.edu") | ||
Description: An implementation of calculating the R-squared measure as a total mediation effect size measure and its confidence interval for moderate- or high-dimensional mediator model. It gives an option to filter out non-mediators using variable selection method. The original R package is directly related to the paper "Estimation of mediation effect for high-dimensional omics mediators with application to the Framingham Heart Study" with <doi:10.1101/774877>. The new version contains a choice of using cross-fitting, which is computationally faster. | ||
License: GPL-3 | ||
Depends: R (>= 3.5.0) | ||
Imports: SIS (>= 0.8), GMMAT (>= 1.1.0) | ||
Suggests: MASS, stats, knitr | ||
License: GPL-3 | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 6.1.1 | ||
RoxygenNote: 7.1.1 | ||
NeedsCompilation: no | ||
Packaged: 2020-01-23 18:29:31 UTC; tyang | ||
Packaged: 2022-11-07 20:10:02 UTC; yang | ||
Author: Tianzhong Yang [aut, cre] | ||
Maintainer: Tianzhong Yang <yang3704@umn.edu> | ||
Repository: CRAN | ||
Date/Publication: 2020-02-02 16:00:02 UTC | ||
Date/Publication: 2022-11-07 20:40:02 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,11 @@ | ||
42c7dd034007983102eb76d722e53d72 *DESCRIPTION | ||
4770a72c3071c9afc96e8b8d3a90c9ab *NAMESPACE | ||
1b1e794c102705cf347670e70fad2daf *R/RsqMed.R | ||
64d28dc4aa8ce864a7eddba31408ed88 *R/RsqMed_sd.R | ||
b54914896ce5e7ee7bfd1cd3f9ffa11f *DESCRIPTION | ||
49e37e75b792ecb23d970d387792660f *NAMESPACE | ||
d264eb8a64e5c30367bb327305294904 *R/CrossFittedR2.R | ||
2541c2204aca8e1aed7e9a58a969ac8c *R/RsqMed.R | ||
f027adf1f305f41c0eacd1cdb7868736 *R/RsqMed_sd.R | ||
ddce269a1e3d054cae349621c198dd52 *data/datalist | ||
58e7ed111f1afe1ba1b257fbfdeedeb4 *data/example.rda | ||
b1d8b8fe4a035cf3a8f05f57d4facabe *man/CI.Rsq.measure.Rd | ||
e42d2ef97d1548d354807b6d817a184b *man/Rsq.measure.Rd | ||
0ab5693af6f37d494ecbcefac26663e0 *data/example.rda | ||
20caf3bc3ed80e4b72809a1c1ed12328 *man/CF_Rsq.measure.Rd | ||
bb4866770c0e3aa4ef31473d211be831 *man/CI.Rsq.measure.Rd | ||
405678ab3ee9fa822778a49722679d02 *man/Rsq.measure.Rd | ||
fd499adc04aba5a36eeb15d1a1454e25 *man/example.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(CF_Rsq.measure) | ||
export(CI.Rsq.measure) | ||
export(Rsq.measure) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,244 @@ | ||
#' Function to calculate the Rsq function as a total effect size measure for mediation effect using cross-fitted estimation | ||
#' | ||
#' @param Y vector of the outcome of interest; outcome has to follow a Gaussian distribution. | ||
#' @param M matrix of putative mediators | ||
#' @param Covar covariates matrix | ||
#' @param X vector of the independent variable of interest, e.g. environmental variable | ||
#' @param iter.max Maximum number of iterations used in iSIS, default = 3 (details see the SIS package). | ||
#' @param nsis Number of predictors recruited by iSIS, default = NULL | ||
#' @param first.half TRUE: split sample into two halves by the order in the dataset. FALSE: randomly split samples into halves, default = TRUE. | ||
#' @param seed Random seed used for sample splitting, default = 2022. | ||
#' @param tune Method for tuning the regularization parameter of the penalized likelihood subproblems and of the final model selected by (i)SIS. Options include tune = 'bic' and tune = 'aic'. | ||
#' @param penalty The penalty to be applied in the regularized likelihood subproblems. 'MCP', and 'lasso' are provided. 'MCP' is recommended. | ||
#' @return Output Vector consisting of Rsq mediated(Rsq.mediated), Lower confidence bound constructed by the asymptotic variance (CI_asym_low), Upper confidence bound constructed by the asymptotic variance (CI_asym_up), Lower confidence bound constructed by the conservative variance (CI_cons_low), Upper confidence bound constructed by the conservative variance (CI_cons_up), number of selected mediators in subsample 1 (pab1), number of selected mediators in subsample 2 (pab2), and the Rsq that used to calculate the Rsq measure: variance of outcome explained by mediator (Rsq.YM), variance of outcome explained by the independent variable (Rsq.YX), and variance of outcome explained by mediator and independent variable (Rsq.YMX); Sample Size in analysis (Sample Size) | ||
#' @return Name of selected mediators in subsample 1 (select1) | ||
#' @return Name of selected mediators in subsample 2 (select2) | ||
#' @export | ||
#' @examples{ | ||
#'\dontrun{ | ||
#' data(example) | ||
#' attach(example) | ||
#'CF_Rsq.measure(Y=Y, M=M, X=X, tune = "bic", penalty = "MCP") | ||
#'} | ||
#'} | ||
|
||
CF_Rsq.measure <- function(Y, M, Covar = NULL, X, iter.max = 3, nsis = NULL, first.half = TRUE, seed = 2022, | ||
tune = c("aic", "bic"), penalty = c("MCP", "lasso")){ | ||
|
||
|
||
if (length(Y)!=nrow(M)) stop('Sample sizes do not match.') | ||
if (length(Y)!=length(X)) stop('Sample sizes do not match.') | ||
if (nrow(M)!=length(X)) stop('Sample sizes do not match.') | ||
if (!is.null(Covar)) {Covar <- as.matrix(Covar) | ||
if (nrow(Covar)!=length(X)) stop('Sample sizes do not match.')} | ||
if (length(unique(Y)) < 3) message('Warning: Current algorithm can only deal with continuous outcome.') | ||
if (is.vector(M)) stop('This algorithm does not support single mediator model.') | ||
if (is.null(colnames(M))) { | ||
print("Column name of Mediators are not specified. Naming the M by its position in the dataset. ") | ||
colnames(M) <- paste0('M', 1:ncol(M)) | ||
} | ||
if (sum(is.na(Y)) + sum(is.na(M)) + sum(is.na(Covar)) + sum(is.na(X)) > 0) stop('Algorithm cannot deal with missing data! Impute or subset the data first. ') | ||
|
||
# Whether use the first half or random sample split for variable selection | ||
if (first.half == TRUE) { | ||
idx1 <- 1:(nrow(M) * 1/2) | ||
} else { | ||
set.seed(seed) | ||
idx1 <- sample(1:nrow(M), ceiling(nrow(M)/2), replace = FALSE) | ||
} | ||
|
||
if (max(idx1) < 20){stop("Please specifiy a larger dataset.")} | ||
if (is.null(tune)){stop("Please specifiy the tuning parameter.")} | ||
|
||
d <- ncol(M) | ||
n <- nrow(M) | ||
|
||
# ---- iSIS variable selection ---- | ||
|
||
# Scale M matrix | ||
M_res_1 <- apply(M, 2, scale)[idx1, ] | ||
M_res_2 <- apply(M, 2, scale)[-idx1, ] | ||
|
||
# Regress covariates and X out | ||
if (!is.null(Covar)){ | ||
tdat_1 <- data.frame(y = Y[idx1], | ||
envir = scale(X[idx1]), | ||
cov = Covar[idx1, ]) | ||
f_1 <- stats::lm(y ~ ., data = tdat_1) | ||
} else{ | ||
tdat_1 <- data.frame(y = Y[idx1], | ||
envir = scale(X[idx1])) | ||
f_1 <- stats::lm(y ~ ., data = tdat_1) | ||
} | ||
Y_res_1 <- stats::residuals(f_1) | ||
|
||
if (!is.null(Covar)){ | ||
tdat_2 <- data.frame(y = Y[-idx1], | ||
envir = scale(X[-idx1]), | ||
cov = Covar[-idx1, ]) | ||
f_2 <- stats::lm(y ~ ., data = tdat_2) | ||
} else{ | ||
tdat_2 <- data.frame(y = Y[-idx1], | ||
envir = scale(X[-idx1])) | ||
f_2 <- stats::lm(y ~ ., data = tdat_2) | ||
} | ||
Y_res_2 <- stats::residuals(f_2) | ||
|
||
# Regress Covariates out | ||
if (!is.null(Covar)){ | ||
tdat_3 <- data.frame(y = Y[idx1], | ||
cov = Covar[idx1, ]) | ||
f_3 <- stats::lm(y ~ ., data = tdat_3) | ||
Y_res_3 <- stats::residuals(f_3) | ||
} else{ | ||
Y_res_3 <- Y[idx1] | ||
} | ||
|
||
if (!is.null(Covar)){ | ||
tdat_4 <- data.frame(y = Y[-idx1], | ||
cov = Covar[-idx1, ]) | ||
f_4 <- stats::lm(y ~ ., data = tdat_4) | ||
Y_res_4 <- stats::residuals(f_4) | ||
} else{ | ||
Y_res_4 <- Y[-idx1] | ||
} | ||
|
||
# Scale the independent variable | ||
X_res_1 <- as.numeric(scale(X[idx1])) | ||
X_res_2 <- as.numeric(scale(X[-idx1])) | ||
|
||
# ---- iSIS ---- | ||
model1 <- invisible(SIS::SIS(x = M_res_1, y = Y_res_1, | ||
family = 'gaussian', tune = tune, seed = 1234, | ||
penalty = penalty, | ||
nsis = nsis, | ||
iter.max = iter.max)) | ||
pab_1 <- length(model1$ix) | ||
m1 <- model1$ix | ||
|
||
# ------ Estimation Procedure ------ | ||
if (length(m1) > 0){ | ||
ols1.YW <- stats::lm(Y_res_4 ~ cbind(M_res_2, X_res_2)[, c(m1, d + 1)]) # Y ~ M + X | ||
err_yw1 <- ols1.YW$residuals | ||
|
||
ols1.YZ <- stats::lm(Y_res_4 ~ M_res_2[, m1]) # Y ~ M | ||
err_yz1 <- ols1.YZ$residuals | ||
|
||
ols1.YX <- stats::lm(Y_res_4 ~ X_res_2) # Y ~ X | ||
err_yx1 <- ols1.YX$residuals | ||
|
||
RYX_1 <- summary(ols1.YX)$adj.r.squared | ||
V.YW1 <- sum(ols1.YW$residuals^2)/ols1.YW$df.residual | ||
V.YZ1 <- sum(ols1.YZ$residuals^2)/ols1.YZ$df.residual | ||
V.YX1 <- sum(ols1.YX$residuals^2)/ols1.YX$df.residual | ||
|
||
err_y1 <- Y_res_4 - mean(Y_res_4) | ||
|
||
v_yx1 <- mean(err_yx1^2) | ||
v_yw1 <- mean(err_yw1^2) | ||
v_yz1 <- mean(err_yz1^2) | ||
v_y1 <- mean(err_y1^2) | ||
|
||
err1 <- cbind(err_yx1^2, err_yz1^2, err_yw1^2, err_y1^2) | ||
A1 <- stats::cov(err1) | ||
|
||
}else{ | ||
paste0("There is no mediators selected in the 1st half") | ||
} | ||
|
||
# ------ Subset 2 Estimation ------ | ||
|
||
# ------ iSIS | ||
model2 <- invisible(SIS::SIS(x = M_res_2, y = Y_res_2, | ||
family = 'gaussian', tune = tune, seed = 1234, | ||
penalty = penalty, | ||
nsis = nsis, | ||
iter.max = iter.max)) | ||
pab_2 <- length(model2$ix) | ||
m2 <- model2$ix | ||
|
||
|
||
# -----Estimation Process | ||
if (length(m2) > 0) { | ||
ols2.YW <- stats::lm(Y_res_3 ~ cbind(M_res_1, X_res_1)[, c(m2, d + 1)]) | ||
err_yw2 <- ols2.YW$residuals | ||
|
||
ols2.YZ <- stats::lm(Y_res_3 ~ M_res_1[, m2]) | ||
err_yz2 <- ols2.YZ$residuals | ||
|
||
ols2.YX <- stats::lm(Y_res_3 ~ X_res_1) | ||
err_yx2 <- ols2.YX$residuals | ||
|
||
RYX_2 <- summary(ols2.YX)$adj.r.squared | ||
V.YW2 <- sum(ols2.YW$residuals^2)/ols2.YW$df.residual | ||
V.YZ2 <- sum(ols2.YZ$residuals^2)/ols2.YZ$df.residual | ||
V.YX2 <- sum(ols2.YX$residuals^2)/ols2.YX$df.residual | ||
|
||
err_y2 <- Y_res_3 - mean(Y_res_3) | ||
|
||
v_yx2 <- mean(err_yx2^2) | ||
v_yw2 <- mean(err_yw2^2) | ||
v_yz2 <- mean(err_yz2^2) | ||
v_y2 <- mean(err_y2^2) | ||
|
||
err2 <- cbind(err_yx2^2, err_yz2^2, err_yw2^2, err_y2^2) | ||
A2 <- stats::cov(err2) | ||
|
||
}else{ | ||
paste0("There is no mediators selected in the 2nd half") | ||
} | ||
|
||
if(length(m1) > 0 & length(m2) > 0){ | ||
A <- 0.5 * (A1 + A2) | ||
v_yw <- 0.5 * (v_yw1 + v_yw2) | ||
v_yz <- 0.5 * (v_yz1 + v_yz2) | ||
v_yx <- 0.5 * (v_yx1 + v_yx2) | ||
v_y <- stats::var(c(Y_res_3, Y_res_4)) | ||
|
||
a <- c(-1/v_y, -1/v_y, 1/v_y, (v_yx + v_yz - v_yw)/v_y^2) | ||
v <- t(a) %*% A %*% a | ||
|
||
V.YW <- 0.5 * (V.YW1 + V.YW2) | ||
V.YZ <- 0.5 * (V.YZ1 + V.YZ2) | ||
|
||
# Compute the R2 | ||
Rsq.mediated <- 1.0 - (v_yx + v_yz - v_yw) / v_y | ||
CI_width_asym <- stats::qnorm(0.975) * sqrt(v) / sqrt(n) | ||
v_asym <- sqrt(v) / sqrt(n) | ||
|
||
|
||
# V.YX <- 0.5 * (V.YX1 + V.YX2) # 1 | ||
ols.YX <- stats::lm(c(Y_res_3, Y_res_4) ~ c(X_res_1, X_res_2)) | ||
V.YX <- sum(ols.YX$residuals^2)/ols.YX$df.residual | ||
RYX_12 <- summary(ols.YX)$adj.r.squared | ||
V.Y <- stats::var(c(Y_res_3, Y_res_4)) | ||
} else{ | ||
stop("No mediators selected.") | ||
} | ||
|
||
output <- c(Rsq.mediated = Rsq.mediated, | ||
CI_asym_low = Rsq.mediated - CI_width_asym, CI_asym_up = Rsq.mediated + CI_width_asym, | ||
pab1 = round(pab_1, 0), pab2 = round(pab_2, 0), | ||
Rsq.YX = RYX_12, Rsq.YMX = 1 - V.YW/V.Y, Rsq.YM = 1 - V.YZ/V.Y, | ||
SampleSize = n) | ||
|
||
Med1 <- M[-idx1, m1, drop = F] | ||
Med2 <- M[idx1, m2, drop = F] | ||
|
||
if (is.null(colnames(Med1)) | is.null(colnames(Med2))){ | ||
select1 <- "Column name of Mediators are not specified." | ||
select2 <- "Column name of Mediators are not specified." | ||
} else{ | ||
select1 <- colnames(Med1) | ||
select2 <- colnames(Med2) | ||
} | ||
|
||
return(list(output = output, | ||
select1 = select1, | ||
select2 = select2)) | ||
|
||
} | ||
|
||
|
||
|
||
|
Oops, something went wrong.