-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 73834a2
Showing
47 changed files
with
2,888 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
Package: drape | ||
Title: Doubly Robust Average Partial Effects | ||
Version: 0.0.1 | ||
Authors@R: c( | ||
person("Harvey", "Klyne", role = c("aut", "cre", "cph"), email = "hklyne@g.harvard.edu")) | ||
Description: Doubly robust average partial effect estimation. This implementation contains methods for adding additional smoothness to plug-in regression procedures and for estimating score functions using smoothing splines. Details of the method can be found in Harvey Klyne and Rajen D. Shah (2023) <arXiv:2308.09207>. | ||
License: MIT + file LICENSE | ||
Encoding: UTF-8 | ||
BugReports: https://github.com/harveyklyne/drape/issues | ||
Suggests: DoubleML, glmnet, graphics, hdi, knitr, Matrix, mlr3, | ||
paradox, partykit, rjson, rmarkdown, testthat (>= 3.0.0), | ||
xgboost | ||
VignetteBuilder: knitr | ||
Config/testthat/edition: 3 | ||
RoxygenNote: 7.1.2 | ||
NeedsCompilation: no | ||
Packaged: 2023-09-17 00:36:38 UTC; Harvey | ||
Author: Harvey Klyne [aut, cre, cph] | ||
Maintainer: Harvey Klyne <hklyne@g.harvard.edu> | ||
Repository: CRAN | ||
Date/Publication: 2023-09-18 13:30:04 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2023 | ||
COPYRIGHT HOLDER: drape authors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
279745416be714cba51dca4ed767255a *DESCRIPTION | ||
833f66fc5437a9bfb6a440f0321a7bac *LICENSE | ||
1875c5f9aad88810e4a8e6ebf8e47c69 *NAMESPACE | ||
ec0019438fbd8a7effb88075153e5a73 *NEWS.md | ||
1d9a29ee062e3d7cb7263f259adf8e29 *R/average_partial_effect.R | ||
d2ccdab3f3958c865c23219560d67f00 *R/other_methods.R | ||
3c195beb43319c7cd0a7a4341bc1aed8 *R/regression_functions.R | ||
4550369a81a8bc4c0e38c663a3e8aee9 *R/resmooth.R | ||
3a86cafbbde47f34f337335ee043d232 *R/score.R | ||
7e8c75904a7730345ce58a30f5548b80 *R/simulation_settings.R | ||
3204bb6b4e5c69af5fb0a80f00a8721d *README.md | ||
731de9963fb32f455f89f48a43053693 *build/vignette.rds | ||
0f2835c57f623fd7c44755d8bc72a72d *inst/doc/drape.R | ||
e7dccb040c2078202ca075cdaef6c63c *inst/doc/drape.Rmd | ||
4f57b1c556616e9b95ba4ed826aeb46c *inst/doc/drape.html | ||
9ff4dfd7b52ef17fbc731d451a68954b *man/MC_sums.Rd | ||
bc1e645361eef0debeefa38202388ee8 *man/basis_poly.Rd | ||
74bf167f2265e75fa7ef72021624956a *man/compare.Rd | ||
6a61cdd60c5b8b22c31467694ca5cea3 *man/compare_evaluate.Rd | ||
6bd754eb69f0ca8df05b17348d186fd8 *man/compare_lm.Rd | ||
996803563357e45c6f162d0432febc93 *man/compare_partially_linear.Rd | ||
0f7738bb5d74bcc3aed7834f0f3508a0 *man/compare_rothenhausler.Rd | ||
ebccdae8ba18ac48cdf97028592a713d *man/cv_resmooth.Rd | ||
db6cddfb12784d55512d7bc90c94f828 *man/cv_spline_score.Rd | ||
c28c754d75ef271763eb7f2d03aeffa7 *man/drape.Rd | ||
5c43ee3f2909633a269ef48d63bb5ebb *man/fit_lasso_poly.Rd | ||
3ff0540aab203589c3d153ad5cd63523 *man/fit_xgboost.Rd | ||
998f1ef1a21d301940bd41ea3048c1b5 *man/mixture_score.Rd | ||
f4690811424df26f5a1050307077afd2 *man/new_X.Rd | ||
54d09c567ca1cf2086a211ae3b0bb9f6 *man/ng_pseudo_response.Rd | ||
b6cbad85983066ac042b8c8418a0e779 *man/partially_linear.Rd | ||
1e84ba25c5578081923de7cadf4f479b *man/resmooth.Rd | ||
796c8a4549c9275060a98449c69bdf23 *man/rmixture.Rd | ||
bda62e8e65f1c0f41a33cdc6441fedf9 *man/rothenhausler_basis.Rd | ||
83c375fae1383784a5b720f4199c5246 *man/rothenhausler_yu.Rd | ||
a7665e0b1a0ddfc0a64f6a9dfc75c9c7 *man/simulate_data.Rd | ||
decdc0cad86696290903f3bca2c2075f *man/sort_bin.Rd | ||
4a18660032e8b7affe736715089bc87d *man/spline_score.Rd | ||
562211694a4908da8496def73b498c17 *man/z_correlated_normal.Rd | ||
c8a3c33282b990b98157faa30ae2de8f *tests/testthat.R | ||
4e24568fe403fe249e261ff6eb6865ca *tests/testthat/test_average_partial_effect.R | ||
e7ce5a471977b9ae1885b95b5a320605 *tests/testthat/test_regression_functions.R | ||
39585d5403ab73201616833e88f8716b *tests/testthat/test_resmooth.R | ||
f403ff692a9a199730e6bfa0670ea6d6 *tests/testthat/test_score.R | ||
043060cb8531a08de0f0d102ae1e8d95 *tests/testthat/test_simulation_settings.R | ||
e7dccb040c2078202ca075cdaef6c63c *vignettes/drape.Rmd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(basis_poly) | ||
export(cv_resmooth) | ||
export(cv_spline_score) | ||
export(drape) | ||
export(ng_pseudo_response) | ||
export(resmooth) | ||
export(simulate_data) | ||
export(spline_score) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# drape 0.0.1 | ||
|
||
* Added a `NEWS.md` file to track changes to the package. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
drape <- function(y, x, z, | ||
response_regression, | ||
predictor_regression, | ||
resmooth_bw = NULL, | ||
spline_df = NULL, | ||
nfolds = 5L, | ||
foldid = NULL, | ||
verbose = FALSE) { | ||
#' Estimate the doubly-robust average partial effect estimate of X on Y, in the presence of Z. | ||
#' | ||
#' @param y vector of responses. | ||
#' @param x vector of the predictor of interest. | ||
#' @param z matrix of additional predictors. | ||
#' @param response_regression function which takes input data of the form | ||
#' (X,y), where X=cbind(x,z), and returns a prediction function f:X -> y and optionally a | ||
#' similar derivative estimation function (in this case no resmoothing is done). | ||
#' @param predictor_regression function which takes input data of the form | ||
#' (z,x), and returns a prediction function m:z -> x. | ||
#' @param resmooth_bw optional numeric to be used as resmoothing bandwidth, | ||
#' otherwise chosen via cross-validation. Only used if | ||
#' response_regression doesn't predict derivatives. | ||
#' @param spline_df optional double, a smoothing parameter for the | ||
#' unconditional spline score estimator, corresponding to the effective degrees | ||
#' of freedom for a smoothing spline. If NULL, chosen via | ||
#' cross-validation. | ||
#' @param nfolds integer, number of sample-splits. If set to | ||
#' one, then all data is used for both training and evaluation. | ||
#' @param foldid optional vector with components in 1:nfolds indicating the folds in which | ||
#' each observation fell. Overwrites nfolds. | ||
#' @param verbose boolean controlling level of information outputted. | ||
#' | ||
#' @return list containing the average partial effect estimate and the | ||
#' corresponding standard error estimate. If verbose=TRUE, additionally contains | ||
#' variables used in computations. | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' set.seed(0) | ||
#' data <- simulate_data(200, "normal", "plm") | ||
#' response_regression <- function(X,y){ | ||
#' df <- data.frame(y,X) | ||
#' colnames(df) <- c("y", paste0("X", 1:10)) | ||
#' lm1 <- stats::lm(y~X1+sin(X2), data=df) | ||
#' fit <- function(newX){ | ||
#' newdf <- data.frame(newX) | ||
#' colnames(newdf) <- paste0("X", 1:10) | ||
#' return(as.vector(stats::predict(lm1, newdata=newdf)))} | ||
#' return(list("fit"=fit)) | ||
#' } | ||
#' predictor_regression <- function(z,x){ | ||
#' df <- data.frame(x,z) | ||
#' colnames(df) <- c("x", paste0("Z", 1:9)) | ||
#' lm1 <- stats::lm(x~Z1+Z2, data=df) | ||
#' fit <- function(newz){ | ||
#' newdf <- data.frame(newz) | ||
#' colnames(newdf) <- paste0("Z", 1:9) | ||
#' return(as.vector(stats::predict(lm1, newdata=newdf)))} | ||
#' return(list("fit"=fit)) | ||
#' } | ||
#' drape(data$y, data$x, data$z, response_regression, predictor_regression, nfolds=2) | ||
|
||
if(!is.vector(y)){ | ||
warning("y must be a vector. Updating y <- as.vector(y).") | ||
y <- as.vector(y) | ||
} | ||
|
||
# Converting (x,z) to matrix form for compatibility. | ||
X <- cbind(x,z) | ||
d <- 1 | ||
|
||
n <- length(y) | ||
|
||
if(dim(X)[1]!=n){stop("Must have length(y) == dim(X)[1].")} | ||
|
||
if (is.null(foldid)){ | ||
foldid <- sample(rep(seq(nfolds), length.out=n)) | ||
} else{ nfolds <- max(foldid) } | ||
|
||
if(nfolds < 1){ | ||
warning("nfolds must be a positive integer. Setting nfolds=1.") | ||
nfolds <- 1L | ||
} | ||
if(round(nfolds)!=nfolds){ | ||
warning("nfolds must be an integer. Rounding.") | ||
nfolds <- round(nfolds) | ||
} | ||
foldid <- sample(rep(seq(nfolds), length.out=n)) | ||
|
||
if (verbose){out_all <- list("folds" = foldid)} | ||
|
||
|
||
# Train regression procedures | ||
|
||
regression_list <- list() | ||
predictor_list <- list() | ||
|
||
for (fold in seq(nfolds)){ | ||
|
||
te <- (foldid == fold) | ||
if (nfolds > 1){tr <- !te} else{tr <- te} | ||
train <- list("X" = matrix(X[tr,], ncol=ncol(X)), "y" = y[tr]) | ||
|
||
regression_list[[fold]] <- response_regression(train$X, train$y) | ||
predictor_list[[fold]] <- predictor_regression(train$X[,-d], train$X[,d]) | ||
|
||
} | ||
|
||
# See if regression method produces usable derivative estimates on all folds | ||
do_resmooth <- !all(sapply(regression_list, function(x){"deriv_fit" %in% names(x)})) | ||
|
||
# Choose resmoothing bandwidth | ||
if (do_resmooth & is.null(resmooth_bw)){ | ||
cv_resmooth_output <- cv_resmooth(X=X, | ||
y=y, | ||
d=1, | ||
regression=regression_list, | ||
tol=2, | ||
prefit=TRUE, | ||
foldid=foldid) | ||
resmooth_bw <- cv_resmooth_output$bw_opt[1] | ||
if (verbose) {out_all["cv_resmooth_output"] = list(cv_resmooth_output)} | ||
} | ||
|
||
# Choose spline df (using in-sample residuals from whole data set) | ||
if (is.null(spline_df)){ | ||
m_fit <- predictor_regression(X[,-d], X[,d])$fit | ||
|
||
# Variance estimation | ||
resid <- X[,d] - m_fit(X[,-d]) | ||
tree <- partykit::ctree(resid^2 ~ ., | ||
data=data.frame("resid"=resid, "z"=X[,-d])) | ||
sigma <- sqrt(unname(stats::predict(tree, | ||
newdata = data.frame("z"=X[,-d])))) | ||
eps <- resid / sigma | ||
cv_spline_score_output <- cv_spline_score(x=eps) | ||
spline_df <- cv_spline_score_output$df_1se # Or df_min | ||
|
||
if (verbose) {out_all["cv_spline_score_output"] = list(cv_spline_score_output)} | ||
} | ||
|
||
# Evaluate f, df, and score | ||
f_pred <- rep(NA, n) | ||
df_pred <- rep(NA, n) | ||
score_pred <- rep(NA, n) | ||
for (fold in seq(nfolds)){ | ||
|
||
if (verbose) {out_fold = list()} | ||
|
||
te <- (foldid == fold) | ||
if (nfolds > 1){tr <- !te} else{tr <- te} | ||
train <- list("X" = matrix(X[tr,], ncol=ncol(X)), "y" = y[tr]) | ||
test <- list("X" = matrix(X[te,], ncol=ncol(X)), "y" = y[te]) | ||
|
||
|
||
# # # f and df evaluation | ||
f_fit <- regression_list[[fold]]$fit | ||
|
||
if ("deriv_fit" %in% names(regression_list[[fold]])) { | ||
f_pred[te] <- f_fit(test$X) | ||
df_fit <- regression_list[[fold]]$deriv_fit | ||
df_pred[te] <- df_fit(test$X) | ||
} else{ | ||
smooth_list <- resmooth(fit = f_fit, | ||
X = test$X, | ||
d = d, | ||
bw = resmooth_bw) | ||
if (verbose){out_fold["smooth_list"] <- list(smooth_list)} | ||
f_pred[te] <- smooth_list$pred[[1]] | ||
df_pred[te] <- smooth_list$deriv[[1]] | ||
} | ||
|
||
# # # Score evaluation | ||
m_fit <- predictor_list[[fold]]$fit | ||
|
||
# Variance estimation | ||
resid_train <- train$X[,d] - m_fit(train$X[,-d]) | ||
tree <- partykit::ctree(resid_train^2 ~ ., | ||
data=data.frame("resid_train"=resid_train, | ||
"z"=train$X[,-d])) | ||
if (verbose){out_fold["tree"] <- list(tree)} | ||
sigma_train <- sqrt(unname(stats::predict(tree, | ||
newdata = data.frame("z"=train$X[,-d])))) | ||
|
||
resid_test <- test$X[,1] - m_fit(test$X[,-1]) | ||
sigma_test <- sqrt(unname(stats::predict(tree, | ||
newdata = data.frame("z"=test$X[,-d])))) | ||
|
||
# If any variance estimates are close to zero, assume homogeneous (more stable) | ||
if (any(sigma_test < 0.01)){ | ||
warning(paste0("Minimal heterogeneous variance equals ", | ||
round(min(sigma_test),4), ", reducing to homogeneous | ||
case for stability.")) | ||
sigma_train <- rep(sqrt(mean(resid_train^2)), length(train$y)) | ||
sigma_test <- rep(sqrt(mean(resid_test^2)), length(test$y)) | ||
} | ||
|
||
# Score estimation | ||
eps_train <- resid_train / sigma_train | ||
rho <- spline_score(eps_train, df=spline_df)$rho | ||
if (verbose){out_fold["rho"] <- list(rho)} | ||
|
||
eps_test <- resid_test / sigma_test | ||
rho_test <- rho(eps_test) | ||
|
||
# If any score estimates are large, assume Gaussian (more stable) | ||
if (any(abs(rho_test) > 10)){ | ||
warning(paste0("Maximal absolute spline score prediction equals ", | ||
round(max(abs(rho_test)),2), ", reducing to Gaussian | ||
case for stability.")) | ||
rho_test <- - eps_test | ||
} | ||
|
||
score_pred[te] <- rho_test/sigma_test # rho_P(x,z)=1/sigma(z) * rho_\varepsilon( (x-m(z))/sigma(z) ) | ||
|
||
if (verbose){ | ||
str <- paste0("fold", fold) | ||
out_all[[str]] <- out_fold | ||
} | ||
|
||
} # End of fold | ||
|
||
evaluations <- df_pred - score_pred * (y - f_pred) | ||
est <- mean(evaluations) | ||
se <- stats::sd(evaluations) / sqrt(n) | ||
|
||
out <- list("est" = est, | ||
"se" = se) | ||
|
||
if (verbose) { | ||
out[["computations"]] <- out_all | ||
out[["f_pred"]] <- f_pred | ||
out[["df_deriv"]] <- df_pred | ||
out[["score_pred"]] <- score_pred | ||
out[["evaluations"]] <- evaluations | ||
} | ||
|
||
return(out) | ||
} |
Oops, something went wrong.