-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 9b56813
Showing
58 changed files
with
5,972 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
Package: RDHonest | ||
Title: Honest Inference in Regression Discontinuity Designs | ||
Version: 1.0.0 | ||
Authors@R: | ||
c(person(given = "Michal", | ||
family = "Kolesár", | ||
role = c("aut", "cre", "cph"), | ||
email = "kolesarmi@googlemail.com", | ||
comment = c(ORCID = "0000-0002-2482-7796")), | ||
person(given= "Tim", | ||
family = "Armstrong", | ||
role = "ctb", | ||
email = "timothy.armstrong@usc.edu")) | ||
Description: Honest and nearly-optimal confidence intervals in fuzzy and sharp | ||
regression discontinuity designs and for inference at a point based on local | ||
linear regression. The implementation is based on Armstrong and Kolesár (2018) | ||
<doi:10.3982/ECTA14434>, and Kolesár and Rothe (2018) | ||
<doi:10.1257/aer.20160945>. Supports covariates, clustering, and weighting. | ||
Depends: R (>= 4.3.0) | ||
License: GPL-3 | ||
Encoding: UTF-8 | ||
LazyData: true | ||
Imports: stats, Formula, withr | ||
Suggests: spelling, ggplot2, testthat, knitr, rmarkdown, formatR | ||
Config/testthat/edition: 3 | ||
Language: en-US | ||
URL: https://github.com/kolesarm/RDHonest | ||
BugReports: https://github.com/kolesarm/RDHonest/issues | ||
RoxygenNote: 7.3.1 | ||
VignetteBuilder: knitr | ||
NeedsCompilation: no | ||
Packaged: 2024-03-22 00:10:30 UTC; kolesarm | ||
Author: Michal Kolesár [aut, cre, cph] | ||
(<https://orcid.org/0000-0002-2482-7796>), | ||
Tim Armstrong [ctb] | ||
Maintainer: Michal Kolesár <kolesarmi@googlemail.com> | ||
Repository: CRAN | ||
Date/Publication: 2024-03-22 20:00:02 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
d2925759b43b6f25bf33220e954072a2 *DESCRIPTION | ||
b13372954d71b647d12b8606730d8c53 *NAMESPACE | ||
bd3c733f79f2e99c092844ecd2ad62c8 *NEWS.md | ||
015afbcdfffb9c5d6012b9d66cafda12 *R/Cbound.R | ||
a24e04f58c717f6c1c0a48305c919df3 *R/NPRfunctions.R | ||
b31d30125c5785c24442eb047da32ffd *R/RDHonest.R | ||
6ff45e367363e17b2c02c3691fd803d6 *R/RD_bme.R | ||
bb99dc6d13229fed5c9955a993d99104 *R/RD_opt.R | ||
6a41d0f0e0ab7d7d1607c706a3d40b1c *R/checks.R | ||
b03c1413bf83b493b028bc25c77172b7 *R/cvb.R | ||
f038ccf47cacdbe82ca04040f364c90b *R/documentation.R | ||
e890fdac65a0b1f139bb1229adbf49a8 *R/kernels.R | ||
d6701934e90df5b7cfcf9ddab81edcc0 *R/plots.R | ||
b4e942a0de189d436c015ec07a0fba28 *R/prelim_var.R | ||
2a6a0ad2ce8fdf41853ab58e14419b89 *R/sysdata.rda | ||
ff4c677d1e41a2fdc44263746a2a3de2 *R/utils.R | ||
7080893e02c49cd296d4424b9be55069 *build/partial.rdb | ||
5e0b3c3f604090d949b3a6c74a504f5b *build/vignette.rds | ||
6b3ec335cab7402aa786e91d6f9d6bfa *data/cghs.rda | ||
f0567c81b282df2910c4d7cd1e2dcf96 *data/headst.rda | ||
6838b6a056a61ff55a572c088cf00fd9 *data/lee08.rda | ||
8a8e79d9d25075f1ebad7329cc94f8ee *data/rcp.rda | ||
49125a52ec7640835ee69823066547e5 *data/rebp.rda | ||
a2c3f46c3ffee8040ff755211f87866a *inst/WORDLIST | ||
c42de70ef3bd200609204f1e3d0d908d *inst/doc/RDHonest.R | ||
7a5b45ae730db8773be4a30fb8ae05e3 *inst/doc/RDHonest.Rmd | ||
1f9a45641ed03d9659a916d9d3dc6168 *inst/doc/RDHonest.pdf | ||
85daf08fe9d4a72362964e1cabc6e27b *man/CVb.Rd | ||
8bf4b8167fde615012193992f7631b9a *man/RDHonest.Rd | ||
a53f6886201ccdbf6cb87147856ec1a1 *man/RDHonestBME.Rd | ||
ae60c195e1565df43c543981d91fe9cf *man/RDScatter.Rd | ||
f4a0db0b7307c86f9563fb58f99e9499 *man/RDSmoothnessBound.Rd | ||
843a0cb71fa8bac7e74c2b471a42258f *man/RDTEfficiencyBound.Rd | ||
fe719141f4c4e0a55af182867f9e1bcc *man/cghs.Rd | ||
eca050783faa7c37237742a877f5c167 *man/headst.Rd | ||
3b30d3b653d61da288e883466882c2d9 *man/lee08.Rd | ||
80303d476b9263f6b3d97671cad1fb95 *man/rcp.Rd | ||
56fcffd527d36a7acb4c257060738e23 *man/rebp.Rd | ||
cfd29e708c87a72ad33acf76570706ba *tests/spelling.R | ||
8a0c187fd1f5bc4d456349a41111eb80 *tests/testthat.R | ||
b5007d937f77bb59056b74a7b683bf3c *tests/testthat/test_cbound.R | ||
782beaff44c5394b665c532cd49e4918 *tests/testthat/test_clustering.R | ||
2eb40308400e1fc4c936aa830d64d9c7 *tests/testthat/test_covariates.R | ||
f43ad1d6d638150586538058e560264d *tests/testthat/test_cv.R | ||
d5b1f49e836e4cce16de56ebccc75d83 *tests/testthat/test_frd.R | ||
cda007da6de166093c5618e254342a8e *tests/testthat/test_interface.R | ||
a1097b9d88e33505ca51cc0fc477a1d3 *tests/testthat/test_kernels.R | ||
7d3cfc260fdbe50a3f0b9de88318afbb *tests/testthat/test_lpp.R | ||
79a1ef15cd2d987e94bdf8c1f12bf714 *tests/testthat/test_npr.R | ||
3c30605df610be90035c23645d2ea7fe *tests/testthat/test_rd.R | ||
a2f53e20545716f7bc3f32917aada1c5 *tests/testthat/test_weights.R | ||
7a5b45ae730db8773be4a30fb8ae05e3 *vignettes/RDHonest.Rmd | ||
7478e9ef79105fe2dee22f16146ac142 *vignettes/auto/RDHonest.el | ||
56563d7354840f54f2b5c9da89509755 *vignettes/auto/mk_Rpackage_template.el | ||
24bf16aa0bac0fd47f4c071c5b74a479 *vignettes/auto/np-testing-library.el | ||
df413b8a601ffb22f468a9237fdfdf42 *vignettes/np-testing-library.bib | ||
9bdf483a6bb9aec73d32a926b889030d *vignettes/vignette_head.tex |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
S3method(print,RDResults) | ||
export(CVb) | ||
export(RDHonest) | ||
export(RDHonestBME) | ||
export(RDScatter) | ||
export(RDSmoothnessBound) | ||
export(RDTEfficiencyBound) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# RDHonest 1.0.0 | ||
|
||
## New Features | ||
|
||
- The function `RDHonest` computes estimates and confidence intervals for the | ||
regression discontinuity (RD) parameter in sharp and fuzzy designs. It | ||
supports covariates, clustering, and weighting. Confidence intervals are | ||
honest (or bias-aware), with critical values computed using the `CVb` | ||
function. Worst-case bias of the estimator is computed under either the Taylor | ||
or Hölder smoothness class. | ||
- `RDHonestBME` computes confidence intervals in sharp RD designs with discrete | ||
covariates under the assumption assumption that the conditional mean lies in | ||
the "bounded misspecification error" class of functions, as considered in | ||
[Kolesár and Rothe (2018)](https://doi.org/10.1257/aer.20160945). | ||
- Support for plotting the data is provided by the function `RDScatter` | ||
- The function `RDSmoothnessBound` computes a lower bound on the smoothness | ||
constant `M`, used as a parameter by `RDHonest` to calculate the worst-case | ||
bias of the estimator | ||
- The function `RDTEfficiencyBound` calculates efficiency of minimax one-sided | ||
CIs at constant functions, or efficiency of two-sided fixed-length CIs at | ||
constant functions under second-order Taylor smoothness class. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
#' Lower bound on smoothness constant M in sharp RD designs | ||
#' | ||
#' Estimate a lower bound on the smoothness constant M and provide a lower | ||
#' confidence interval for it, using method described in supplement to | ||
#' Kolesár and Rothe (2018). | ||
#' | ||
#' @param object An object of class \code{"RDResults"}, typically a result of a | ||
#' call to \code{\link{RDHonest}}. | ||
#' @param s Number of support points that curvature estimates should average | ||
#' over. | ||
#' @param sclass Smoothness class, either \code{"T"} for Taylor or \code{"H"} | ||
#' for Hölder class. | ||
#' @param alpha determines confidence level \code{1-alpha}. | ||
#' @param separate If \code{TRUE}, report estimates separately for data above | ||
#' and below cutoff. If \code{FALSE}, report pooled estimates. | ||
#' @param multiple If \code{TRUE}, use multiple curvature estimates. If | ||
#' \code{FALSE}, only use a single curvature estimate using observations | ||
#' closest to the cutoff. | ||
#' @return Returns a data frame wit the following columns: | ||
#' | ||
#' \describe{ | ||
|
||
#' \item{\code{estimate}}{Point estimate for lower bounds for M. } | ||
#' | ||
#' \item{\code{conf.low}}{Lower endpoint for a one-sided confidence interval | ||
#' for M} | ||
#' | ||
#' } | ||
#' | ||
#' The data frame has a single row if \code{separate==FALSE}; otherwise it has | ||
#' two rows, corresponding to smoothness bound estimates and confidence | ||
#' intervals below and above the cutoff, respectively. | ||
#' @references{ | ||
#' | ||
#' \cite{Michal Kolesár and Christoph Rothe. Inference in regression | ||
#' discontinuity designs with a discrete running variable. | ||
#' American Economic Review, 108(8):2277—-2304, | ||
#' August 2018. \doi{10.1257/aer.20160945}} | ||
#' | ||
#' } | ||
#' @examples | ||
#' ## Subset data to increase speed | ||
#' r <- RDHonest(log(earnings)~yearat14, data=cghs, | ||
#' subset=abs(yearat14-1947)<10, | ||
#' cutoff=1947, M=0.04, h=3) | ||
#' RDSmoothnessBound(r, s=2) | ||
#' @export | ||
RDSmoothnessBound <- function(object, s, separate=FALSE, multiple=TRUE, | ||
alpha=0.05, sclass="H") { | ||
d <- PrelimVar(object$data, se.initial="EHW") | ||
|
||
## Curvature estimate based on jth set of three points closest to zero | ||
Dk <- function(Y, X, xu, s2, j) { | ||
I1 <- X >= xu[3*j*s-3*s+1] & X <= xu[3*j*s-2*s] | ||
I2 <- X >= xu[3*j*s-2*s+1] & X <= xu[3*j*s-s] | ||
I3 <- X >= xu[3*j*s- s+1] & X <= xu[3*j*s] | ||
|
||
lam <- (mean(X[I3])-mean(X[I2])) / (mean(X[I3])-mean(X[I1])) | ||
if (sclass=="T") { | ||
den <- (1-lam)*mean(X[I3]^2) + lam*mean(X[I1]^2) + mean(X[I2]^2) | ||
} else { | ||
den <- (1-lam)*mean(X[I3]^2) + lam*mean(X[I1]^2) - mean(X[I2]^2) | ||
} | ||
## Delta is lower bound on M by lemma S2 in Kolesar and Rothe | ||
Del <- 2 * (lam*mean(Y[I1]) + (1-lam) * mean(Y[I3])-mean(Y[I2])) / den | ||
## Variance of Delta | ||
VD <- 4 * (lam^2*mean(s2[I1])/sum(I1) + (1-lam)^2*mean(s2[I3]) / | ||
sum(I3) + mean(s2[I2])/sum(I2)) / den^2 | ||
c(Del, sqrt(VD), mean(Y[I1]), mean(Y[I2]), mean(Y[I3]), range(X[I1]), | ||
range(X[I2]), range(X[I3])) | ||
} | ||
|
||
xp <- unique(d$X[d$p]) | ||
xm <- sort(unique(abs(d$X[d$m]))) | ||
|
||
Dpj <- function(j) Dk(d$Y[d$p], d$X[d$p], xp, d$sigma2[d$p], j) | ||
Dmj <- function(j) Dk(d$Y[d$m], abs(d$X[d$m]), xm, d$sigma2[d$m], j) | ||
|
||
Sp <- floor(length(xp) / (3*s)) | ||
Sm <- floor(length(xm) / (3*s)) | ||
if (min(Sp, Sm) == 0) | ||
stop("Value of s is too big") | ||
|
||
if (!multiple) { | ||
Sp <- Sm <- 1 | ||
} | ||
Dp <- vapply(seq_len(Sp), Dpj, numeric(11)) | ||
Dm <- vapply(seq_len(Sm), Dmj, numeric(11)) | ||
|
||
## Critical value | ||
cv <- function(M, Z, sd, alpha) { | ||
if (ncol(Z)==1) { | ||
CVb(M/sd, alpha=alpha) | ||
} else { | ||
S <- Z+M*outer(rep(1, nrow(Z)), 1/sd) | ||
maxS <- abs(S[cbind(seq_len(nrow(Z)), max.col(S))]) | ||
unname(stats::quantile(maxS, 1-alpha)) | ||
} | ||
} | ||
|
||
hatM <- function(D) { | ||
ts <- abs(D[1, ]/D[2, ]) # sup_t statistic | ||
maxt <- D[, which.max(ts)] | ||
|
||
Z <- matrix(stats::rnorm(10000*ncol(D)), ncol=ncol(D)) | ||
## median unbiased point estimate and lower CI | ||
hatm <- lower <- 0 | ||
if (max(ts) > cv(0, Z, D[2, ], 1/2)) { | ||
hatm <- FindZero(function(m) max(ts)-cv(m, Z, D[2, ], 1/2), | ||
negative=FALSE) | ||
} | ||
if (max(ts) >= cv(0, Z, D[2, ], alpha)) { | ||
lower <- FindZero(function(m) max(ts)-cv(m, Z, D[2, ], alpha), | ||
negative=FALSE) | ||
} | ||
list(estimate=hatm, conf.low=lower, | ||
diagnostics=c(Delta=maxt[1], sdDelta=maxt[2], | ||
y1=maxt[3], y2=maxt[4], y3=maxt[5], | ||
I1=maxt[6:7], I2=maxt[8:9], I3=maxt[10:11])) | ||
} | ||
|
||
if (separate) { | ||
withr::with_seed(42, po <- hatM(Dp)) | ||
withr::with_seed(42, ne <- hatM(Dm)) | ||
ret <- data.frame(rbind("Below cutoff"=unlist(ne[1:2]), | ||
"Above cutoff"=unlist(po[1:2]))) | ||
} else { | ||
ret <- withr::with_seed(42, | ||
data.frame((hatM(cbind(Dm, Dp))[1:2]))) | ||
rownames(ret) <- c("Pooled") | ||
} | ||
ret | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
## Nonparametric Regression | ||
## | ||
## Calculate fuzzy or sharp RD estimate, or estimate of a conditional mean at a | ||
## point (depending on the class of \code{d}), and its variance using local | ||
## polynomial regression of order \code{order}. | ||
NPReg <- function(d, h, kern="triangular", order=1, se.method="nn", J=3) { | ||
if (!is.function(kern)) | ||
kern <- EqKern(kern, boundary=FALSE, order=0) | ||
## Keep only positive kernel weights | ||
X <- drop(d$X) | ||
W <- if (h<=0) 0*X else kern(X/h)*d$w # kernel weights | ||
## if (!is.null(d$sigma2)) d$sigma2 <- as.matrix(d$sigma2) | ||
Z <- outer(X, 0:order, "^") | ||
nZ <- c("(Intercept)", colnames(d$X), paste0("I(", colnames(d$X), "^", | ||
seq_len(order), ")")[-1]) | ||
colnames(Z) <- nZ[seq_len(order+1)] | ||
if (!inherits(d, "IP")) { | ||
ZZ <- (X>=0)*Z | ||
colnames(ZZ) <- c(paste0("I(", colnames(d$X), ">0)"), | ||
paste0(paste0("I(", colnames(d$X), ">0):"), | ||
colnames(Z))[-1]) | ||
Z <- cbind(ZZ, Z, d$covs) | ||
} | ||
r0 <- stats::lm.wfit(x=Z, y=d$Y, w=W) | ||
class(r0) <- c(if (ncol(d$Y)>1) "mlm", "lm") | ||
if (any(is.na(r0$coefficients))) { | ||
return(list(estimate=0, se=NA, est_w=W*0, sigma2=NA*d$Y, eff.obs=0, | ||
fs=NA, lm=r0)) | ||
} | ||
wgt <- W*0 | ||
ok <- W!=0 | ||
wgt[ok] <- solve(qr.R(r0$qr), t(sqrt(W[W>0])*qr.Q(r0$qr)))[1, ] | ||
## To compute effective observations, rescale against uniform kernel | ||
Wu <- d$w * (abs(X)<=h) | ||
q_u <- qr(sqrt(Wu) * Z) | ||
wgt_u <- solve(qr.R(q_u), t(sqrt(Wu) * qr.Q(q_u)))[1, ] | ||
eff.obs <- sum(Wu)*sum(wgt_u^2/d$w)/sum(wgt^2/d$w) | ||
|
||
ny <- NCOL(r0$residuals) | ||
## Squared residuals, allowing for multivariate Y | ||
HC <- function(r) r[, rep(seq_len(ny), each=ny)] * r[, rep(seq_len(ny), ny)] | ||
|
||
## For RD, compute variance separately on either side of cutoff | ||
## TODO: better implement | ||
NN <- function(X) { | ||
res <- matrix(0, nrow=length(X), ncol=ny^2) | ||
res[ok] <- | ||
if (!inherits(d, "IP")) | ||
rbind(as.matrix(sigmaNN(X[d$m & ok], d$Y[d$m & ok, ], J, | ||
d$w[d$m & ok])), | ||
as.matrix(sigmaNN(X[d$p & ok], d$Y[d$p & ok, ], J, | ||
d$w[d$p & ok]))) | ||
else | ||
sigmaNN(X[ok], d$Y[ok, ], J, d$w[ok]) | ||
res | ||
} | ||
|
||
hsigma2 <- switch(se.method, nn=NN(X), EHW=HC(as.matrix(r0$residuals)), | ||
supplied.var=as.matrix(d$sigma2)) | ||
|
||
## Variance | ||
if (is.null(d$clusterid)) { | ||
V <- colSums(as.matrix(wgt^2 * hsigma2)) | ||
} else if (se.method == "supplied.var") { | ||
V <- colSums(as.matrix(wgt^2 * hsigma2))+ | ||
d$rho * (sum(tapply(wgt, d$clusterid, sum)^2)-sum(wgt^2)) | ||
} else { | ||
us <- apply(as.matrix(wgt*r0$residuals)[ok, , drop=FALSE], 2, | ||
function(x) tapply(x, d$clusterid[ok], sum)) | ||
V <- as.vector(crossprod(us)) | ||
} | ||
ret <- list(estimate=r0$coefficients[1], se=sqrt(V[1]), est_w=wgt, | ||
sigma2=hsigma2, eff.obs=eff.obs, fs=NA, lm=r0) | ||
|
||
if (inherits(d, "FRD")) { | ||
ret$fs <- r0$coefficients[1, 2] | ||
ret$estimate <- r0$coefficients[1, 1]/r0$coefficients[1, 2] | ||
names(ret$estimate) <- colnames(r0$coefficients)[2] | ||
ret$se <- sqrt(sum(c(1, -ret$estimate, -ret$estimate, | ||
ret$estimate^2) * V) / ret$fs^2) | ||
} | ||
ret | ||
} | ||
|
||
|
||
## Rule of thumb for choosing M | ||
## | ||
## Use global quartic regression to estimate a bound on the second derivative | ||
## for inference under under second order Hölder class. For RD, use a separate | ||
## regression on either side of the cutoff | ||
MROT <- function(d) { | ||
if (inherits(d, "SRD")) { | ||
max(MROT(data.frame(Y=d$Y[d$p], X=d$X[d$p], w=d$w[d$p])), | ||
MROT(data.frame(Y=d$Y[d$m], X=d$X[d$m], w=d$w[d$m]))) | ||
} else if (inherits(d, "FRD")) { | ||
c(MY=max(MROT(data.frame(Y=d$Y[d$p, 1], X=d$X[d$p], w=d$w[d$p])), | ||
MROT(data.frame(Y=d$Y[d$m, 1], X=d$X[d$m], w=d$w[d$m]))), | ||
MD=max(MROT(data.frame(Y=d$Y[d$p, 2], X=d$X[d$p], w=d$w[d$p])), | ||
MROT(data.frame(Y=d$Y[d$m, 2], X=d$X[d$m], w=d$w[d$m])))) | ||
} else { | ||
## STEP 1: Estimate global polynomial regression | ||
r1 <- unname(stats::lm.wfit(y=d$Y, x=outer(drop(d$X), 0:4, "^"), | ||
w=d$w)$coefficients) | ||
if (length(unique(d$X))<4 || any(is.na(r1))) | ||
stop(paste0("Insufficient unique values of the running", | ||
" variable to compute rule of thumb for M.")) | ||
f2 <- function(x) abs(2*r1[3]+6*x*r1[4]+12*x^2*r1[5]) | ||
## maximum occurs either at endpoints, or else at the extremum, | ||
## -r1[4]/(4*r1[5]), if the extremum is in the support | ||
f2e <- if (abs(r1[5])<=1e-10) Inf else -r1[4] / (4*r1[5]) | ||
M <- max(f2(min(d$X)), f2(max(d$X))) | ||
if (min(d$X) < f2e && max(d$X) > f2e) M <- max(f2(f2e), M) | ||
|
||
M | ||
} | ||
} |
Oops, something went wrong.