Skip to content

Commit

Permalink
version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
kolesarm authored and cran-robot committed Mar 23, 2024
0 parents commit 9b56813
Show file tree
Hide file tree
Showing 58 changed files with 5,972 additions and 0 deletions.
38 changes: 38 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
Package: RDHonest
Title: Honest Inference in Regression Discontinuity Designs
Version: 1.0.0
Authors@R:
c(person(given = "Michal",
family = "Kolesár",
role = c("aut", "cre", "cph"),
email = "kolesarmi@googlemail.com",
comment = c(ORCID = "0000-0002-2482-7796")),
person(given= "Tim",
family = "Armstrong",
role = "ctb",
email = "timothy.armstrong@usc.edu"))
Description: Honest and nearly-optimal confidence intervals in fuzzy and sharp
regression discontinuity designs and for inference at a point based on local
linear regression. The implementation is based on Armstrong and Kolesár (2018)
<doi:10.3982/ECTA14434>, and Kolesár and Rothe (2018)
<doi:10.1257/aer.20160945>. Supports covariates, clustering, and weighting.
Depends: R (>= 4.3.0)
License: GPL-3
Encoding: UTF-8
LazyData: true
Imports: stats, Formula, withr
Suggests: spelling, ggplot2, testthat, knitr, rmarkdown, formatR
Config/testthat/edition: 3
Language: en-US
URL: https://github.com/kolesarm/RDHonest
BugReports: https://github.com/kolesarm/RDHonest/issues
RoxygenNote: 7.3.1
VignetteBuilder: knitr
NeedsCompilation: no
Packaged: 2024-03-22 00:10:30 UTC; kolesarm
Author: Michal Kolesár [aut, cre, cph]
(<https://orcid.org/0000-0002-2482-7796>),
Tim Armstrong [ctb]
Maintainer: Michal Kolesár <kolesarmi@googlemail.com>
Repository: CRAN
Date/Publication: 2024-03-22 20:00:02 UTC
57 changes: 57 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
d2925759b43b6f25bf33220e954072a2 *DESCRIPTION
b13372954d71b647d12b8606730d8c53 *NAMESPACE
bd3c733f79f2e99c092844ecd2ad62c8 *NEWS.md
015afbcdfffb9c5d6012b9d66cafda12 *R/Cbound.R
a24e04f58c717f6c1c0a48305c919df3 *R/NPRfunctions.R
b31d30125c5785c24442eb047da32ffd *R/RDHonest.R
6ff45e367363e17b2c02c3691fd803d6 *R/RD_bme.R
bb99dc6d13229fed5c9955a993d99104 *R/RD_opt.R
6a41d0f0e0ab7d7d1607c706a3d40b1c *R/checks.R
b03c1413bf83b493b028bc25c77172b7 *R/cvb.R
f038ccf47cacdbe82ca04040f364c90b *R/documentation.R
e890fdac65a0b1f139bb1229adbf49a8 *R/kernels.R
d6701934e90df5b7cfcf9ddab81edcc0 *R/plots.R
b4e942a0de189d436c015ec07a0fba28 *R/prelim_var.R
2a6a0ad2ce8fdf41853ab58e14419b89 *R/sysdata.rda
ff4c677d1e41a2fdc44263746a2a3de2 *R/utils.R
7080893e02c49cd296d4424b9be55069 *build/partial.rdb
5e0b3c3f604090d949b3a6c74a504f5b *build/vignette.rds
6b3ec335cab7402aa786e91d6f9d6bfa *data/cghs.rda
f0567c81b282df2910c4d7cd1e2dcf96 *data/headst.rda
6838b6a056a61ff55a572c088cf00fd9 *data/lee08.rda
8a8e79d9d25075f1ebad7329cc94f8ee *data/rcp.rda
49125a52ec7640835ee69823066547e5 *data/rebp.rda
a2c3f46c3ffee8040ff755211f87866a *inst/WORDLIST
c42de70ef3bd200609204f1e3d0d908d *inst/doc/RDHonest.R
7a5b45ae730db8773be4a30fb8ae05e3 *inst/doc/RDHonest.Rmd
1f9a45641ed03d9659a916d9d3dc6168 *inst/doc/RDHonest.pdf
85daf08fe9d4a72362964e1cabc6e27b *man/CVb.Rd
8bf4b8167fde615012193992f7631b9a *man/RDHonest.Rd
a53f6886201ccdbf6cb87147856ec1a1 *man/RDHonestBME.Rd
ae60c195e1565df43c543981d91fe9cf *man/RDScatter.Rd
f4a0db0b7307c86f9563fb58f99e9499 *man/RDSmoothnessBound.Rd
843a0cb71fa8bac7e74c2b471a42258f *man/RDTEfficiencyBound.Rd
fe719141f4c4e0a55af182867f9e1bcc *man/cghs.Rd
eca050783faa7c37237742a877f5c167 *man/headst.Rd
3b30d3b653d61da288e883466882c2d9 *man/lee08.Rd
80303d476b9263f6b3d97671cad1fb95 *man/rcp.Rd
56fcffd527d36a7acb4c257060738e23 *man/rebp.Rd
cfd29e708c87a72ad33acf76570706ba *tests/spelling.R
8a0c187fd1f5bc4d456349a41111eb80 *tests/testthat.R
b5007d937f77bb59056b74a7b683bf3c *tests/testthat/test_cbound.R
782beaff44c5394b665c532cd49e4918 *tests/testthat/test_clustering.R
2eb40308400e1fc4c936aa830d64d9c7 *tests/testthat/test_covariates.R
f43ad1d6d638150586538058e560264d *tests/testthat/test_cv.R
d5b1f49e836e4cce16de56ebccc75d83 *tests/testthat/test_frd.R
cda007da6de166093c5618e254342a8e *tests/testthat/test_interface.R
a1097b9d88e33505ca51cc0fc477a1d3 *tests/testthat/test_kernels.R
7d3cfc260fdbe50a3f0b9de88318afbb *tests/testthat/test_lpp.R
79a1ef15cd2d987e94bdf8c1f12bf714 *tests/testthat/test_npr.R
3c30605df610be90035c23645d2ea7fe *tests/testthat/test_rd.R
a2f53e20545716f7bc3f32917aada1c5 *tests/testthat/test_weights.R
7a5b45ae730db8773be4a30fb8ae05e3 *vignettes/RDHonest.Rmd
7478e9ef79105fe2dee22f16146ac142 *vignettes/auto/RDHonest.el
56563d7354840f54f2b5c9da89509755 *vignettes/auto/mk_Rpackage_template.el
24bf16aa0bac0fd47f4c071c5b74a479 *vignettes/auto/np-testing-library.el
df413b8a601ffb22f468a9237fdfdf42 *vignettes/np-testing-library.bib
9bdf483a6bb9aec73d32a926b889030d *vignettes/vignette_head.tex
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Generated by roxygen2: do not edit by hand

S3method(print,RDResults)
export(CVb)
export(RDHonest)
export(RDHonestBME)
export(RDScatter)
export(RDSmoothnessBound)
export(RDTEfficiencyBound)
21 changes: 21 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# RDHonest 1.0.0

## New Features

- The function `RDHonest` computes estimates and confidence intervals for the
regression discontinuity (RD) parameter in sharp and fuzzy designs. It
supports covariates, clustering, and weighting. Confidence intervals are
honest (or bias-aware), with critical values computed using the `CVb`
function. Worst-case bias of the estimator is computed under either the Taylor
or Hölder smoothness class.
- `RDHonestBME` computes confidence intervals in sharp RD designs with discrete
covariates under the assumption assumption that the conditional mean lies in
the "bounded misspecification error" class of functions, as considered in
[Kolesár and Rothe (2018)](https://doi.org/10.1257/aer.20160945).
- Support for plotting the data is provided by the function `RDScatter`
- The function `RDSmoothnessBound` computes a lower bound on the smoothness
constant `M`, used as a parameter by `RDHonest` to calculate the worst-case
bias of the estimator
- The function `RDTEfficiencyBound` calculates efficiency of minimax one-sided
CIs at constant functions, or efficiency of two-sided fixed-length CIs at
constant functions under second-order Taylor smoothness class.
133 changes: 133 additions & 0 deletions R/Cbound.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#' Lower bound on smoothness constant M in sharp RD designs
#'
#' Estimate a lower bound on the smoothness constant M and provide a lower
#' confidence interval for it, using method described in supplement to
#' Kolesár and Rothe (2018).
#'
#' @param object An object of class \code{"RDResults"}, typically a result of a
#' call to \code{\link{RDHonest}}.
#' @param s Number of support points that curvature estimates should average
#' over.
#' @param sclass Smoothness class, either \code{"T"} for Taylor or \code{"H"}
#' for Hölder class.
#' @param alpha determines confidence level \code{1-alpha}.
#' @param separate If \code{TRUE}, report estimates separately for data above
#' and below cutoff. If \code{FALSE}, report pooled estimates.
#' @param multiple If \code{TRUE}, use multiple curvature estimates. If
#' \code{FALSE}, only use a single curvature estimate using observations
#' closest to the cutoff.
#' @return Returns a data frame wit the following columns:
#'
#' \describe{

#' \item{\code{estimate}}{Point estimate for lower bounds for M. }
#'
#' \item{\code{conf.low}}{Lower endpoint for a one-sided confidence interval
#' for M}
#'
#' }
#'
#' The data frame has a single row if \code{separate==FALSE}; otherwise it has
#' two rows, corresponding to smoothness bound estimates and confidence
#' intervals below and above the cutoff, respectively.
#' @references{
#'
#' \cite{Michal Kolesár and Christoph Rothe. Inference in regression
#' discontinuity designs with a discrete running variable.
#' American Economic Review, 108(8):2277—-2304,
#' August 2018. \doi{10.1257/aer.20160945}}
#'
#' }
#' @examples
#' ## Subset data to increase speed
#' r <- RDHonest(log(earnings)~yearat14, data=cghs,
#' subset=abs(yearat14-1947)<10,
#' cutoff=1947, M=0.04, h=3)
#' RDSmoothnessBound(r, s=2)
#' @export
RDSmoothnessBound <- function(object, s, separate=FALSE, multiple=TRUE,
alpha=0.05, sclass="H") {
d <- PrelimVar(object$data, se.initial="EHW")

## Curvature estimate based on jth set of three points closest to zero
Dk <- function(Y, X, xu, s2, j) {
I1 <- X >= xu[3*j*s-3*s+1] & X <= xu[3*j*s-2*s]
I2 <- X >= xu[3*j*s-2*s+1] & X <= xu[3*j*s-s]
I3 <- X >= xu[3*j*s- s+1] & X <= xu[3*j*s]

lam <- (mean(X[I3])-mean(X[I2])) / (mean(X[I3])-mean(X[I1]))
if (sclass=="T") {
den <- (1-lam)*mean(X[I3]^2) + lam*mean(X[I1]^2) + mean(X[I2]^2)
} else {
den <- (1-lam)*mean(X[I3]^2) + lam*mean(X[I1]^2) - mean(X[I2]^2)
}
## Delta is lower bound on M by lemma S2 in Kolesar and Rothe
Del <- 2 * (lam*mean(Y[I1]) + (1-lam) * mean(Y[I3])-mean(Y[I2])) / den
## Variance of Delta
VD <- 4 * (lam^2*mean(s2[I1])/sum(I1) + (1-lam)^2*mean(s2[I3]) /
sum(I3) + mean(s2[I2])/sum(I2)) / den^2
c(Del, sqrt(VD), mean(Y[I1]), mean(Y[I2]), mean(Y[I3]), range(X[I1]),
range(X[I2]), range(X[I3]))
}

xp <- unique(d$X[d$p])
xm <- sort(unique(abs(d$X[d$m])))

Dpj <- function(j) Dk(d$Y[d$p], d$X[d$p], xp, d$sigma2[d$p], j)
Dmj <- function(j) Dk(d$Y[d$m], abs(d$X[d$m]), xm, d$sigma2[d$m], j)

Sp <- floor(length(xp) / (3*s))
Sm <- floor(length(xm) / (3*s))
if (min(Sp, Sm) == 0)
stop("Value of s is too big")

if (!multiple) {
Sp <- Sm <- 1
}
Dp <- vapply(seq_len(Sp), Dpj, numeric(11))
Dm <- vapply(seq_len(Sm), Dmj, numeric(11))

## Critical value
cv <- function(M, Z, sd, alpha) {
if (ncol(Z)==1) {
CVb(M/sd, alpha=alpha)
} else {
S <- Z+M*outer(rep(1, nrow(Z)), 1/sd)
maxS <- abs(S[cbind(seq_len(nrow(Z)), max.col(S))])
unname(stats::quantile(maxS, 1-alpha))
}
}

hatM <- function(D) {
ts <- abs(D[1, ]/D[2, ]) # sup_t statistic
maxt <- D[, which.max(ts)]

Z <- matrix(stats::rnorm(10000*ncol(D)), ncol=ncol(D))
## median unbiased point estimate and lower CI
hatm <- lower <- 0
if (max(ts) > cv(0, Z, D[2, ], 1/2)) {
hatm <- FindZero(function(m) max(ts)-cv(m, Z, D[2, ], 1/2),
negative=FALSE)
}
if (max(ts) >= cv(0, Z, D[2, ], alpha)) {
lower <- FindZero(function(m) max(ts)-cv(m, Z, D[2, ], alpha),
negative=FALSE)
}
list(estimate=hatm, conf.low=lower,
diagnostics=c(Delta=maxt[1], sdDelta=maxt[2],
y1=maxt[3], y2=maxt[4], y3=maxt[5],
I1=maxt[6:7], I2=maxt[8:9], I3=maxt[10:11]))
}

if (separate) {
withr::with_seed(42, po <- hatM(Dp))
withr::with_seed(42, ne <- hatM(Dm))
ret <- data.frame(rbind("Below cutoff"=unlist(ne[1:2]),
"Above cutoff"=unlist(po[1:2])))
} else {
ret <- withr::with_seed(42,
data.frame((hatM(cbind(Dm, Dp))[1:2])))
rownames(ret) <- c("Pooled")
}
ret
}
116 changes: 116 additions & 0 deletions R/NPRfunctions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
## Nonparametric Regression
##
## Calculate fuzzy or sharp RD estimate, or estimate of a conditional mean at a
## point (depending on the class of \code{d}), and its variance using local
## polynomial regression of order \code{order}.
NPReg <- function(d, h, kern="triangular", order=1, se.method="nn", J=3) {
if (!is.function(kern))
kern <- EqKern(kern, boundary=FALSE, order=0)
## Keep only positive kernel weights
X <- drop(d$X)
W <- if (h<=0) 0*X else kern(X/h)*d$w # kernel weights
## if (!is.null(d$sigma2)) d$sigma2 <- as.matrix(d$sigma2)
Z <- outer(X, 0:order, "^")
nZ <- c("(Intercept)", colnames(d$X), paste0("I(", colnames(d$X), "^",
seq_len(order), ")")[-1])
colnames(Z) <- nZ[seq_len(order+1)]
if (!inherits(d, "IP")) {
ZZ <- (X>=0)*Z
colnames(ZZ) <- c(paste0("I(", colnames(d$X), ">0)"),
paste0(paste0("I(", colnames(d$X), ">0):"),
colnames(Z))[-1])
Z <- cbind(ZZ, Z, d$covs)
}
r0 <- stats::lm.wfit(x=Z, y=d$Y, w=W)
class(r0) <- c(if (ncol(d$Y)>1) "mlm", "lm")
if (any(is.na(r0$coefficients))) {
return(list(estimate=0, se=NA, est_w=W*0, sigma2=NA*d$Y, eff.obs=0,
fs=NA, lm=r0))
}
wgt <- W*0
ok <- W!=0
wgt[ok] <- solve(qr.R(r0$qr), t(sqrt(W[W>0])*qr.Q(r0$qr)))[1, ]
## To compute effective observations, rescale against uniform kernel
Wu <- d$w * (abs(X)<=h)
q_u <- qr(sqrt(Wu) * Z)
wgt_u <- solve(qr.R(q_u), t(sqrt(Wu) * qr.Q(q_u)))[1, ]
eff.obs <- sum(Wu)*sum(wgt_u^2/d$w)/sum(wgt^2/d$w)

ny <- NCOL(r0$residuals)
## Squared residuals, allowing for multivariate Y
HC <- function(r) r[, rep(seq_len(ny), each=ny)] * r[, rep(seq_len(ny), ny)]

## For RD, compute variance separately on either side of cutoff
## TODO: better implement
NN <- function(X) {
res <- matrix(0, nrow=length(X), ncol=ny^2)
res[ok] <-
if (!inherits(d, "IP"))
rbind(as.matrix(sigmaNN(X[d$m & ok], d$Y[d$m & ok, ], J,
d$w[d$m & ok])),
as.matrix(sigmaNN(X[d$p & ok], d$Y[d$p & ok, ], J,
d$w[d$p & ok])))
else
sigmaNN(X[ok], d$Y[ok, ], J, d$w[ok])
res
}

hsigma2 <- switch(se.method, nn=NN(X), EHW=HC(as.matrix(r0$residuals)),
supplied.var=as.matrix(d$sigma2))

## Variance
if (is.null(d$clusterid)) {
V <- colSums(as.matrix(wgt^2 * hsigma2))
} else if (se.method == "supplied.var") {
V <- colSums(as.matrix(wgt^2 * hsigma2))+
d$rho * (sum(tapply(wgt, d$clusterid, sum)^2)-sum(wgt^2))
} else {
us <- apply(as.matrix(wgt*r0$residuals)[ok, , drop=FALSE], 2,
function(x) tapply(x, d$clusterid[ok], sum))
V <- as.vector(crossprod(us))
}
ret <- list(estimate=r0$coefficients[1], se=sqrt(V[1]), est_w=wgt,
sigma2=hsigma2, eff.obs=eff.obs, fs=NA, lm=r0)

if (inherits(d, "FRD")) {
ret$fs <- r0$coefficients[1, 2]
ret$estimate <- r0$coefficients[1, 1]/r0$coefficients[1, 2]
names(ret$estimate) <- colnames(r0$coefficients)[2]
ret$se <- sqrt(sum(c(1, -ret$estimate, -ret$estimate,
ret$estimate^2) * V) / ret$fs^2)
}
ret
}


## Rule of thumb for choosing M
##
## Use global quartic regression to estimate a bound on the second derivative
## for inference under under second order Hölder class. For RD, use a separate
## regression on either side of the cutoff
MROT <- function(d) {
if (inherits(d, "SRD")) {
max(MROT(data.frame(Y=d$Y[d$p], X=d$X[d$p], w=d$w[d$p])),
MROT(data.frame(Y=d$Y[d$m], X=d$X[d$m], w=d$w[d$m])))
} else if (inherits(d, "FRD")) {
c(MY=max(MROT(data.frame(Y=d$Y[d$p, 1], X=d$X[d$p], w=d$w[d$p])),
MROT(data.frame(Y=d$Y[d$m, 1], X=d$X[d$m], w=d$w[d$m]))),
MD=max(MROT(data.frame(Y=d$Y[d$p, 2], X=d$X[d$p], w=d$w[d$p])),
MROT(data.frame(Y=d$Y[d$m, 2], X=d$X[d$m], w=d$w[d$m]))))
} else {
## STEP 1: Estimate global polynomial regression
r1 <- unname(stats::lm.wfit(y=d$Y, x=outer(drop(d$X), 0:4, "^"),
w=d$w)$coefficients)
if (length(unique(d$X))<4 || any(is.na(r1)))
stop(paste0("Insufficient unique values of the running",
" variable to compute rule of thumb for M."))
f2 <- function(x) abs(2*r1[3]+6*x*r1[4]+12*x^2*r1[5])
## maximum occurs either at endpoints, or else at the extremum,
## -r1[4]/(4*r1[5]), if the extremum is in the support
f2e <- if (abs(r1[5])<=1e-10) Inf else -r1[4] / (4*r1[5])
M <- max(f2(min(d$X)), f2(max(d$X)))
if (min(d$X) < f2e && max(d$X) > f2e) M <- max(f2(f2e), M)

M
}
}

0 comments on commit 9b56813

Please sign in to comment.