diff --git a/.Rbuildignore b/.Rbuildignore index f524394..5e9b462 100755 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -19,3 +19,4 @@ PSAboot Poster.graffle build.R ^CRAN-SUBMISSION$ README.Rmd +Poster/ diff --git a/DESCRIPTION b/DESCRIPTION index 6fee134..204d743 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Type: Package Package: PSAboot Title: Bootstrapping for Propensity Score Analysis -Version: 1.3.6 -Date: 2023-03-21 +Version: 1.3.7 +Date: 2023-04-15 Authors@R: person("Jason", "Bryer", , "jason@bryer.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2454-0402")) diff --git a/NEWS.md b/NEWS.md index ef49c72..3e51120 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# PSAboot 1.3.7 + +* Fixed a bug where the balance table wasn't combined correctly if the covariates were not specified in the correct order. + # PSAboot 1.3.6 * Update to re-release to CRAN. diff --git a/Poster/PSAboot_Poster.graffle b/Poster/PSAboot_Poster.graffle new file mode 100644 index 0000000..1b7d4ac Binary files /dev/null and b/Poster/PSAboot_Poster.graffle differ diff --git a/Poster/PSAboot_Poster.pdf b/Poster/PSAboot_Poster.pdf new file mode 100644 index 0000000..2159657 Binary files /dev/null and b/Poster/PSAboot_Poster.pdf differ diff --git a/Poster/Poster.R b/Poster/Poster.R new file mode 100644 index 0000000..e77cac9 --- /dev/null +++ b/Poster/Poster.R @@ -0,0 +1,88 @@ +# This script creates the figures and output used for the PSAboot poster located +# at https://github.com/jbryer/PSAboot/Poster + +library(PSAboot) # install.packages('PSAboot') +library(psa) # remotes::install_github('jbryer/psa') + +data(tutoring, package = 'TriMatch') + +#' Propensity score weighting implementation for bootstrapping. +#' +#' @inherit boot.strata return params +#' @param estimand which treatment effect to estimate. Values can be ATE, ATT, +#' ATC, or ATM. +#' @export +boot.weighting <- function(Tr, Y, X, X.trans, formu, estimand = 'ATE', ...) { + formu.treat <- update.formula(formu, 'treat ~ .') + df <- cbind(treat = Tr, X) + ps <- fitted(glm(formu.treat, + data = df, + family = binomial(link = 'logit'))) + + weights <- psa::calculate_ps_weights(treatment = Tr, + ps = ps, + estimand = estimand) + + X.trans.scaled <- lapply(X.trans, scale) |> as.data.frame() + X.trans.scaled$treat <- Tr + lm_balance <- glm(treat ~ ., + data = X.trans.scaled, + family = quasibinomial(link = 'logit'), + weights = weights) |> summary() + + te_lm <- lm(formula = Y ~ treat, + data = data.frame(Y = Y, treat = Tr), + weights = weights) |> summary() + + se <- te_lm$coefficients[2,]['Std. Error'] |> unname() + te <- te_lm$coefficients[2,]['Estimate'] |> unname() + + return(list( + summary = c(estimate = te, + ci.min = te - 1.96 * se, + ci.max = te + 1.96 * se, + se.wtd = se, + approx.t = unname(te_lm$coefficients[2,]['t value'])), + details = te_lm, + balance = lm_balance$coefficients[,'Estimate'][-1] + )) +} + +tutoring.formu <- treat2 ~ Gender + Ethnicity + Military + ESL + + EdMother + EdFather + Age + Employment + Income + Transfer + GPA + +tutoring$treat2 <- tutoring$treat != "Control" +covariates <- tutoring[,c("Gender", "Ethnicity", "Military", "ESL", + "EdMother", "EdFather", "Age", "Employment", + "Income", "Transfer", "GPA")] +tutoring.boot <- PSAboot(Tr = tutoring$treat2, + Y = tutoring$Grade, + X = covariates, + # formu = tutoring.formu, + methods = c(getPSAbootMethods(), 'weighting' = boot.weighting), + seed = 2112, + parallel = FALSE) +ls(tutoring.boot) + +summary(tutoring.boot) + +plot(tutoring.boot) + xlab('Average Treatment Effect')# + theme_minimal() +ggsave('Poster/treatment_effects.pdf', width = 12, height = 8, units = 'in', dpi = 100) + +pdf('Poster/matrixplot.pdf', width = 12, height = 12) +matrixplot(tutoring.boot) +dev.off() + +tutoring.balance <- balance(tutoring.boot) +summary(tutoring.balance) + +plot(tutoring.balance) + theme(legend.position = 'bottom') +ggsave('Poster/balance.pdf', width = 12, height = 8, units = 'in', dpi = 100) + +boxplot(tutoring.balance) +ggsave('Poster/balance_boxplot.pdf', width = 12, height = 8, units = 'in', dpi = 100) + +boxplot(tutoring.boot) +ggsave('Poster/boxplot.pdf', width = 12, height = 4, units = 'in', dpi = 100) + + diff --git a/Poster/balance.pdf b/Poster/balance.pdf new file mode 100644 index 0000000..270b205 Binary files /dev/null and b/Poster/balance.pdf differ diff --git a/Poster/balance_boxplot.pdf b/Poster/balance_boxplot.pdf new file mode 100644 index 0000000..e17e96a Binary files /dev/null and b/Poster/balance_boxplot.pdf differ diff --git a/Poster/boxplot.pdf b/Poster/boxplot.pdf new file mode 100644 index 0000000..5355710 Binary files /dev/null and b/Poster/boxplot.pdf differ diff --git a/Poster/matrixplot.pdf b/Poster/matrixplot.pdf new file mode 100644 index 0000000..6ffeb20 Binary files /dev/null and b/Poster/matrixplot.pdf differ diff --git a/Poster/qr_code.pdf b/Poster/qr_code.pdf new file mode 100644 index 0000000..b927d4e Binary files /dev/null and b/Poster/qr_code.pdf differ diff --git a/Poster/qr_code.png b/Poster/qr_code.png new file mode 100644 index 0000000..77171bb Binary files /dev/null and b/Poster/qr_code.png differ diff --git a/Poster/treatment_effects.pdf b/Poster/treatment_effects.pdf new file mode 100644 index 0000000..f8ab9bf Binary files /dev/null and b/Poster/treatment_effects.pdf differ diff --git a/R/PSAboot.R b/R/PSAboot.R index e4106b3..7858003 100755 --- a/R/PSAboot.R +++ b/R/PSAboot.R @@ -87,7 +87,7 @@ getPSAbootMethods <- function() { PSAboot <- function(Tr, Y, X, M = 100, formu = as.formula(paste0('treat ~ ', paste0(names(X), collapse=' + '))), control.ratio = 5, - control.sample.size = min(control.ratio*min(table(Tr)), max(table(Tr))), + control.sample.size = min(control.ratio * min(table(Tr)), max(table(Tr))), control.replace = TRUE, treated.sample.size = min(table(Tr)), treated.replace = TRUE, @@ -141,11 +141,11 @@ PSAboot <- function(Tr, Y, X, M = 100, complete.details[[paste0('details.', n)]] <- r$details complete.details[[paste0('balance.', n)]] <- r$balance complete.summary <- rbind(complete.summary, data.frame( - method=n, - estimate=unname(r$summary['estimate']), - ci.min=unname(r$summary['ci.min']), - ci.max=unname(r$summary['ci.max']), - stringsAsFactors=FALSE)) + method = n, + estimate = unname(r$summary['estimate']), + ci.min = unname(r$summary['ci.min']), + ci.max = unname(r$summary['ci.max']), + stringsAsFactors = FALSE)) } pb <- txtProgressBar(1,M,style=3) @@ -171,11 +171,11 @@ PSAboot <- function(Tr, Y, X, M = 100, result[[paste0('details.', n)]] <- r$details result[[paste0('balance.', n)]] <- r$balance result[['summary']] <- rbind(result[['summary']], data.frame( - Method=n, - estimate=unname(r$summary['estimate']), - ci.min=unname(r$summary['ci.min']), - ci.max=unname(r$summary['ci.max']), - stringsAsFactors=FALSE)) + Method = n, + estimate = unname(r$summary['estimate']), + ci.min = unname(r$summary['ci.min']), + ci.max = unname(r$summary['ci.max']), + stringsAsFactors = FALSE)) }, error=function(e) { warning(paste0('Error occurred during iteration ', i, ' for ', n, ' method: ', e)) @@ -207,15 +207,15 @@ PSAboot <- function(Tr, Y, X, M = 100, cols] <- as.numeric((sum[j,cols])) } } - r <- list(pooled.summary=summary, - pooled.details=tmp, - complete.summary=complete.summary, - complete.details=complete.details, - Y=Y, Tr=Tr, X=X, M=M, seed=seed, - control.sample.size=control.sample.size, - treated.sample.size=treated.sample.size, - control.replace=control.replace, - treated.replace=treated.replace) + r <- list(pooled.summary = summary, + pooled.details = tmp, + complete.summary = complete.summary, + complete.details = complete.details, + Y = Y, Tr = Tr, X = X, M = M, seed = seed, + control.sample.size = control.sample.size, + treated.sample.size = treated.sample.size, + control.replace = control.replace, + treated.replace = treated.replace) class(r) <- "PSAboot" return(r) } diff --git a/R/balance.R b/R/balance.R index 0048ca9..c5882bf 100755 --- a/R/balance.R +++ b/R/balance.R @@ -49,10 +49,10 @@ balance <- function(psaboot, na.rm = TRUE, pool.fun = mean) { index.balance <- which(substr(names(psaboot$complete.details), 1, 8) == 'balance.') index.names <- substr(names(psaboot$complete.details)[index.balance], 9, max(nchar(names(psaboot$complete.details)))) - bal <- psaboot$complete.details[[ index.balance[1] ]] + bal_names <- names(psaboot$complete.details[[ index.balance[1] ]]) bal <- c() for(i in index.balance) { - bal <- rbind(bal, psaboot$complete.details[[ i ]]) + bal <- rbind(bal, psaboot$complete.details[[ i ]][bal_names]) } bal <- abs(bal) dimnames(bal)[[1]] <- index.names diff --git a/R/balance.boxplot.R b/R/balance.boxplot.R index 3bc932e..5eae98e 100755 --- a/R/balance.boxplot.R +++ b/R/balance.boxplot.R @@ -31,7 +31,15 @@ boxplot.PSAboot.balance <- function(x, method <- names(x$balances)[i] tmp <- as.data.frame(x$balances[[i]]) tmp$Method <- method - combined <- rbind(combined, tmp) + if(nrow(combined) == 0) { + combined <- tmp + } else { + missing_vars <- names(combined)[!names(combined) %in% names(tmp)] + for(i in missing_vars) { + tmp[,i] <- NA + } + combined <- rbind(combined, tmp[,names(combined)]) + } } tmp <- reshape2::melt(combined, id='Method') tmp2 <- as.data.frame(x$unadjusted) diff --git a/README.Rmd b/README.Rmd index b45f97f..5182d24 100755 --- a/README.Rmd +++ b/README.Rmd @@ -15,7 +15,7 @@ editor_options: -As the popularity of propensity score methods for estimating causal effects in observational studies increase, the choices researchers have for which methods to use has also increased. Rosenbaum (2012) suggested that there are benefits for testing the null hypothesis more than once in observational studies. With the wide availability of high power computers resampling methods such as bootstrapping (Efron, 1979) have become popular for providing more stable estimates of the sampling distribution. This paper introduces the `PSAboot` package for R that provides functions for bootstrapping propensity score methods. It deviates from traditional bootstrapping methods by allowing for different sampling specifications for treatment and control groups, mainly to ensure the ratio of treatment-to-control observations are maintained. Additionally, this framework will provide estimates using multiple methods for each bootstrap sample. Two examples are discussed: the classic National Work Demonstration and PSID (Lalonde, 1986) study and a study on tutoring effects on student grades. +As the popularity of propensity score methods for estimating causal effects in observational studies increase, the choices researchers have for which methods to use has also increased. Estimated treatment effects may be sensitive to choice of method. One approach to test the sensitivity of method choice is to test the null hypothesis more than once using more than one method (Rosenbaum, 2012). With the wide availability of high power computers resampling methods such as bootstrapping (Efron, 1979) have become popular for providing more estimates of the sampling distribution. This paper introduces the `PSAboot` R package that provides functions for bootstrapping propensity score methods. It deviates from traditional bootstrapping methods by allowing for different sampling specifications for treatment and control groups, mainly to ensure the ratio of treatment-to-control observations are consistent. This approach can also be used in situations where there is imbalance between the number of treatment and control observations by allowing for up and/or down sampling. Lastly, by estimating balance statistics and treatment effects for each bootstrap sample we can compare the distributions across multiple propensity score methods to examine the relative performance of these methods. ## Installation diff --git a/vignettes/PSAboot.Rmd b/vignettes/PSAboot.Rmd index 75055b4..af24c1a 100755 --- a/vignettes/PSAboot.Rmd +++ b/vignettes/PSAboot.Rmd @@ -29,7 +29,13 @@ vignette: > ```{r setup, echo=FALSE, results='hide', message=FALSE, warning=FALSE} require(knitr) -opts_chunk$set(comment = '') +opts_chunk$set(fig.width = 12, + fig.height = 8, + fig.align = 'center', + out.width = '100%', + echo = TRUE, + warning = FALSE, + message = FALSE) require(PSAboot) ``` @@ -118,15 +124,16 @@ The `PSAboot` function returns an object of class `PSAboot`. The following S3 me The `lalonde` (Lalonde, 1986) has become the *de defacto* teaching dataset in PSA since Dehejia and Wahba's (1999) re-examination of the National Supported Work Demonstration (NSW) and the Current Population Survey (CPS). -The `lalonde` data set is included in the `MatchIt` package. The crosstab shows that there are 429 control units and 185 treatment units. +The `lalonde` data set is included in the `Matching` package. The contingency table shows that there are 429 control units and 185 treatment units. ```{r lalonde.load} -data(lalonde, package='MatchIt') +data(lalonde, package='Matching') table(lalonde$treat) ``` ```{r lalonde.boot, cache=FALSE} -lalonde.formu <- treat ~ age + I(age^2) + educ + I(educ^2) + race + - married + nodegree + re74 + I(re74^2) + re75 + I(re75^2) +lalonde.formu <- treat~age + I(age^2) + educ + I(educ^2) + black + + hisp + married + nodegr + re74 + I(re74^2) + re75 + I(re75^2) + + u74 + u75 boot.lalonde <- PSAboot(Tr = lalonde$treat, Y = lalonde$re78, X = lalonde,