diff --git a/.github/workflows/render-readme.yaml b/.github/workflows/render-readme.yaml index 280436e..e274d4e 100644 --- a/.github/workflows/render-readme.yaml +++ b/.github/workflows/render-readme.yaml @@ -11,8 +11,10 @@ jobs: runs-on: macOS-latest steps: - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@v1 - - uses: r-lib/actions/setup-pandoc@v1 + - uses: r-lib/actions/setup-pandoc@v2 + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true - name: Install rmarkdown run: Rscript -e 'install.packages(c("rmarkdown", "badger"))' - name: Render README diff --git a/NAMESPACE b/NAMESPACE index d0dbdc1..53ecdf3 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(boot.matching) export(boot.matchit) export(boot.rpart) export(boot.strata) +export(boot.weighting) export(getPSAbootMethods) export(matrixplot) export(psa.strata) @@ -40,14 +41,17 @@ importFrom(party,where) importFrom(psych,describe) importFrom(psych,describeBy) importFrom(stats,as.formula) +importFrom(stats,binomial) importFrom(stats,cor) importFrom(stats,density) importFrom(stats,fitted) importFrom(stats,glm) +importFrom(stats,lm) importFrom(stats,predict) importFrom(stats,qnorm) importFrom(stats,qt) importFrom(stats,quantile) +importFrom(stats,quasibinomial) importFrom(stats,sd) importFrom(stats,t.test) importFrom(stats,update.formula) diff --git a/NEWS.md b/NEWS.md index 3e51120..c828763 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # PSAboot 1.3.7 +* Added propensity score weighting. * Fixed a bug where the balance table wasn't combined correctly if the covariates were not specified in the correct order. # PSAboot 1.3.6 diff --git a/Poster/Poster.R b/Poster/Poster.R index e77cac9..5ddfebd 100644 --- a/Poster/Poster.R +++ b/Poster/Poster.R @@ -74,7 +74,7 @@ matrixplot(tutoring.boot) dev.off() tutoring.balance <- balance(tutoring.boot) -summary(tutoring.balance) +print(tutoring.balance) plot(tutoring.balance) + theme(legend.position = 'bottom') ggsave('Poster/balance.pdf', width = 12, height = 8, units = 'in', dpi = 100) diff --git a/R/PSAboot.R b/R/PSAboot.R index 7858003..ab850a1 100755 --- a/R/PSAboot.R +++ b/R/PSAboot.R @@ -21,7 +21,8 @@ getPSAbootMethods <- function() { 'ctree' = boot.ctree, 'rpart' = boot.rpart, 'Matching' = boot.matching, - 'MatchIt' = boot.matchit)) + 'MatchIt' = boot.matchit, + 'Weighting' = boot.weighting)) invisible(methods) } diff --git a/R/boot.weighting.R b/R/boot.weighting.R new file mode 100644 index 0000000..f991559 --- /dev/null +++ b/R/boot.weighting.R @@ -0,0 +1,72 @@ +#' Calculates propensity score weights. +#' +#' @param treatment a logical vector for treatment status. +#' @param ps numeric vector of propensity scores +#' @param estimand character string indicating which estimand to be used. Possible +#' values are +#' ATE (average treatment effect), +#' ATT (average treatment effect for the treated), +#' ATC (average treatement effect for the controls), +#' ATM (Average Treatment Effect Among the Evenly Matchable), +#' ATO (Average Treatment Effect Among the Overlap Populatio) +calculate_ps_weights <- function(treatment, ps, estimand = 'ATE') { + # TODO: this is a copy of a function from the psa package. Use that once it is on CRAN + weights <- NA + if(estimand == 'ATE') { + weights <- (treatment / ps) + ((1 - treatment) / (1 - ps)) + } else if(estimand == 'ATT') { + weights <- ((ps * treatment) / ps) + ((ps * (1 - treatment)) / (1 - ps)) + } else if(estimand == 'ATC') { + weights <- (((1 - ps) * treatment) / ps) + (((1 - ps) * (1 - treatment)) / (1 - ps)) + } else if(estimand == 'ATM') { + weights <- pmin(ps, 1 - ps) / (treatment * ps + (1 - treatment) * (1 - ps)) + } else if(estimand == 'ATO') { + weights <- (1 - ps) * treatment + ps * (1 - treatment) + } else { + stop(paste0('Invalid estimand specified: ', estimand)) + } + return(weights) +} + +#' Propensity score weighting implementation for bootstrapping. +#' +#' @inherit boot.strata return params +#' @param estimand which treatment effect to estimate. Values can be ATE, ATT, +#' ATC, or ATM. +#' @importFrom stats quasibinomial binomial lm +#' @export +boot.weighting <- function(Tr, Y, X, X.trans, formu, estimand = 'ATE', ...) { + formu.treat <- update.formula(formu, 'treat ~ .') + df <- cbind(treat = Tr, X) + ps <- fitted(glm(formu.treat, + data = df, + family = binomial(link = 'logit'))) + + weights <- calculate_ps_weights(treatment = Tr, + ps = ps, + estimand = estimand) + + X.trans.scaled <- lapply(X.trans, scale) |> as.data.frame() + X.trans.scaled$treat <- Tr + lm_balance <- glm(treat ~ ., + data = X.trans.scaled, + family = quasibinomial(link = 'logit'), + weights = weights) |> summary() + + te_lm <- lm(formula = Y ~ treat, + data = data.frame(Y = Y, treat = Tr), + weights = weights) |> summary() + + se <- te_lm$coefficients[2,]['Std. Error'] |> unname() + te <- te_lm$coefficients[2,]['Estimate'] |> unname() + + return(list( + summary = c(estimate = te, + ci.min = te - 1.96 * se, + ci.max = te + 1.96 * se, + se.wtd = se, + approx.t = unname(te_lm$coefficients[2,]['t value'])), + details = te_lm, + balance = lm_balance$coefficients[,'Estimate'][-1] + )) +} diff --git a/README.Rmd b/README.Rmd index 5182d24..9684fda 100755 --- a/README.Rmd +++ b/README.Rmd @@ -5,7 +5,7 @@ editor_options: --- -# An R Package for Bootstrapping Propensity Score Analysis +# Bootstrapping Propensity Score Analysis [![R-CMD-check](https://github.com/jbryer/PSAboot/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/jbryer/PSAboot/actions/workflows/R-CMD-check.yaml) @@ -14,6 +14,9 @@ editor_options: [![CRAN Status](https://badges.cranchecks.info/flavor/release/PSAboot.svg)](https://cran.r-project.org/web/checks/check_results_PSAboot.html) +Package website: https://jbryer.github.io/PSAboot/ +Poster: https://github.com/jbryer/PSAboot/blob/master/Poster/PSAboot_Poster.pdf + As the popularity of propensity score methods for estimating causal effects in observational studies increase, the choices researchers have for which methods to use has also increased. Estimated treatment effects may be sensitive to choice of method. One approach to test the sensitivity of method choice is to test the null hypothesis more than once using more than one method (Rosenbaum, 2012). With the wide availability of high power computers resampling methods such as bootstrapping (Efron, 1979) have become popular for providing more estimates of the sampling distribution. This paper introduces the `PSAboot` R package that provides functions for bootstrapping propensity score methods. It deviates from traditional bootstrapping methods by allowing for different sampling specifications for treatment and control groups, mainly to ensure the ratio of treatment-to-control observations are consistent. This approach can also be used in situations where there is imbalance between the number of treatment and control observations by allowing for up and/or down sampling. Lastly, by estimating balance statistics and treatment effects for each bootstrap sample we can compare the distributions across multiple propensity score methods to examine the relative performance of these methods. diff --git a/README.md b/README.md index 7defc39..5d005d9 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,38 @@ -# An R Package for Bootstrapping Propensity Score Analysis +# Bootstrapping Propensity Score Analysis [![R-CMD-check](https://github.com/jbryer/PSAboot/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/jbryer/PSAboot/actions/workflows/R-CMD-check.yaml) -[![](https://img.shields.io/badge/devel%20version-1.3.6-blue.svg)](https://github.com/jbryer/PSAboot) +[![](https://img.shields.io/badge/devel%20version-1.3.7-blue.svg)](https://github.com/jbryer/PSAboot) [![](https://www.r-pkg.org/badges/version/PSAboot)](https://cran.r-project.org/package=PSAboot) [![CRAN Status](https://badges.cranchecks.info/flavor/release/PSAboot.svg)](https://cran.r-project.org/web/checks/check_results_PSAboot.html) +Package website: +Poster: + + As the popularity of propensity score methods for estimating causal effects in observational studies increase, the choices researchers have -for which methods to use has also increased. Rosenbaum (2012) suggested -that there are benefits for testing the null hypothesis more than once -in observational studies. With the wide availability of high power -computers resampling methods such as bootstrapping (Efron, 1979) have -become popular for providing more stable estimates of the sampling -distribution. This paper introduces the `PSAboot` package for R that -provides functions for bootstrapping propensity score methods. It -deviates from traditional bootstrapping methods by allowing for -different sampling specifications for treatment and control groups, -mainly to ensure the ratio of treatment-to-control observations are -maintained. Additionally, this framework will provide estimates using -multiple methods for each bootstrap sample. Two examples are discussed: -the classic National Work Demonstration and PSID (Lalonde, 1986) study -and a study on tutoring effects on student grades. +for which methods to use has also increased. Estimated treatment effects +may be sensitive to choice of method. One approach to test the +sensitivity of method choice is to test the null hypothesis more than +once using more than one method (Rosenbaum, 2012). With the wide +availability of high power computers resampling methods such as +bootstrapping (Efron, 1979) have become popular for providing more +estimates of the sampling distribution. This paper introduces the +`PSAboot` R package that provides functions for bootstrapping propensity +score methods. It deviates from traditional bootstrapping methods by +allowing for different sampling specifications for treatment and control +groups, mainly to ensure the ratio of treatment-to-control observations +are consistent. This approach can also be used in situations where there +is imbalance between the number of treatment and control observations by +allowing for up and/or down sampling. Lastly, by estimating balance +statistics and treatment effects for each bootstrap sample we can +compare the distributions across multiple propensity score methods to +examine the relative performance of these methods. ## Installation diff --git a/man/boot.weighting.Rd b/man/boot.weighting.Rd new file mode 100644 index 0000000..3f17413 --- /dev/null +++ b/man/boot.weighting.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/boot.weighting.R +\name{boot.weighting} +\alias{boot.weighting} +\title{Propensity score weighting implementation for bootstrapping.} +\usage{ +boot.weighting(Tr, Y, X, X.trans, formu, estimand = "ATE", ...) +} +\arguments{ +\item{Tr}{vector indicating treatment assignment.} + +\item{Y}{vector of outcome.} + +\item{X}{matrix or data frame of covariates.} + +\item{X.trans}{a data frame of \code{X} with factors recoded. See \code{\link{cv.trans.psa}}} + +\item{formu}{the formula to use to estimate propensity scores. Note that the +dependent varaible (i.e. treatment varaible) name will be updated using +the \code{Tr} vector.} + +\item{estimand}{which treatment effect to estimate. Values can be ATE, ATT, +ATC, or ATM.} + +\item{...}{other parameters passed from \code{\link{PSAboot}}} +} +\value{ +a list with three elements: + \describe{ + \item{\code{summary}}{a named numeric vector (with at minimum \code{estimate}, + \code{ci.min}, and \code{ci.max} but other values allowed)} + \item{\code{balance}}{a named numeric vector with one element per + covariate listed in \code{X.trans} representing a balance statistic + (usually standardized effect size after adjustment)} + \item{\code{details}}{an arbitrary object that contains the full results of the + analysis} + } +} +\description{ +Propensity score weighting implementation for bootstrapping. +} diff --git a/man/calculate_ps_weights.Rd b/man/calculate_ps_weights.Rd new file mode 100644 index 0000000..33d89e4 --- /dev/null +++ b/man/calculate_ps_weights.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/boot.weighting.R +\name{calculate_ps_weights} +\alias{calculate_ps_weights} +\title{Calculates propensity score weights.} +\usage{ +calculate_ps_weights(treatment, ps, estimand = "ATE") +} +\arguments{ +\item{treatment}{a logical vector for treatment status.} + +\item{ps}{numeric vector of propensity scores} + +\item{estimand}{character string indicating which estimand to be used. Possible +values are +ATE (average treatment effect), +ATT (average treatment effect for the treated), +ATC (average treatement effect for the controls), +ATM (Average Treatment Effect Among the Evenly Matchable), +ATO (Average Treatment Effect Among the Overlap Populatio)} +} +\description{ +Calculates propensity score weights. +} diff --git a/vignettes/PSAboot.Rmd b/vignettes/PSAboot.Rmd index af24c1a..0d60047 100755 --- a/vignettes/PSAboot.Rmd +++ b/vignettes/PSAboot.Rmd @@ -243,7 +243,7 @@ hist(tutoring.boot) boxplot(tutoring.boot) ``` -```{r tutoring.matrixplot} +```{r tutoring.matrixplot, fig.width = 12, fig.height = 12} matrixplot(tutoring.boot) ```