-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 425ffff
Showing
13 changed files
with
608 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
Package: perccal | ||
Type: Package | ||
Title: Implementing Double Bootstrap Linear Regression Confidence | ||
Intervals Using the 'perc-cal' Method | ||
Version: 1.0 | ||
Date: 2016-06-14 | ||
Authors@R: c( person("Daniel", "McCarthy", role=c("aut","cre","ctb"), | ||
email="danielmc@wharton.upenn.edu")) | ||
Maintainer: Daniel McCarthy <danielmc@wharton.upenn.edu> | ||
URL: http://www.danielminhmccarthy.com | ||
Description: Contains functions which allow the user to compute confidence intervals quickly using the double bootstrap-based percentile calibrated ('perc-cal') method for linear regression coefficients. 'perccal_interval()' is the primary user-facing function within this package. | ||
License: GPL-3 | ||
Imports: Rcpp (>= 0.11.5) | ||
LinkingTo: Rcpp, RcppArmadillo, RcppEigen | ||
NeedsCompilation: yes | ||
Packaged: 2016-06-14 17:32:40 UTC; danielmc | ||
Author: Daniel McCarthy [aut, cre, ctb] | ||
Repository: CRAN | ||
Date/Publication: 2016-06-14 20:34:32 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
7137aa586945f221d985b0edff0f7123 *DESCRIPTION | ||
6ebd1c9a850db60126cfd6eea1bd0590 *NAMESPACE | ||
00986ec0fd6d81ef72f0ce7c715687b4 *R/RcppExports.R | ||
1c05abeb19c8d7d18e49e094e7fa3f34 *R/functions-code.R | ||
344a5a9282cf7f9e633a3514bb0ff603 *man/Cdboot_multi.Rd | ||
5923466962ecbb1553db9fbc28b0ee53 *man/Cquantile.Rd | ||
eab124c5da47cba00b908229d360fad6 *man/perccal-package.Rd | ||
d1c1e0a9d3ea030ca2bd0f1354c1c445 *man/perccal_interval.Rd | ||
1fda748862ea4c31684cf2e6a89bc93f *man/sample_rcpp.Rd | ||
899e256fbab14d837ea44d49b1c974b9 *src/Makevars | ||
fa318843a746835da902d47bc8eb2970 *src/RcppExports.cpp | ||
309ea63b5fe6fc65d4eea4484214fb06 *src/dboot_multi.cpp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
useDynLib(perccal) | ||
importFrom(Rcpp, evalCpp) | ||
importFrom("stats", "coefficients") | ||
importFrom("stats", "quantile") | ||
importFrom("stats", "lm") | ||
exportPattern("^[[:alpha:]]+") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# This file was generated by Rcpp::compileAttributes | ||
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 | ||
|
||
sample_rcpp <- function(N, nsamp) { | ||
.Call('perccal_sample_rcpp', PACKAGE = 'perccal', N, nsamp) | ||
} | ||
|
||
Cquantile <- function(xx, p) { | ||
.Call('perccal_Cquantile', PACKAGE = 'perccal', xx, p) | ||
} | ||
|
||
Cdboot_multi <- function(xxyy, lgridlo, lgridhi, B, B2, G) { | ||
.Call('perccal_Cdboot_multi', PACKAGE = 'perccal', xxyy, lgridlo, lgridhi, B, B2, G) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
################################################## | ||
### Percentile-Calibrated ('perc-cal') Confidence Interval Functions | ||
|
||
library(RcppEigen) | ||
library(Rcpp) | ||
library(RcppArmadillo) | ||
|
||
##' Calculates Percentile-Calibrated Linear Regression Confidence Intervals | ||
##' | ||
##' This is the main function of the package. It takes as inputs the | ||
##' predictor/response matrix appended together, which can be either | ||
##' a data frame or a matrix, along with the desired coverage and other | ||
##' settings, and outputs marginal confidence intervals for each of the | ||
##' predictors, including the intercept. | ||
##' @name perccal_interval | ||
##' @param Xy [n by (p+1)] matrix: X in columns 1 to p, y in column p+1. | ||
##' X is the design matrix, and is assumed to not include a vector of one's. | ||
##' @param alpha Target coverage desired. | ||
##' @param G Number of grid points to evaluate calibrated percentile method | ||
##' on each side over. | ||
##' @param B Number of 1st stage bootstrap samples. | ||
##' @param B2 Number of 2nd stage double bootstrap samples. | ||
##' @return Return a (p+1)x2 matrix containing confidence intervals for all | ||
##' regression coefficients, estimated via the perc-cal method. | ||
##' @examples | ||
##' set.seed(1234) | ||
##' n = 32 | ||
##' B = 500 | ||
##' B2 = 500 | ||
##' G=20 | ||
##' x1=rnorm(n) | ||
##' x2=rnorm(n) | ||
##' eps=rnorm(n) | ||
##' y = x1 + 2*x2 + eps | ||
##' Xy = cbind(x1,x2,y) | ||
##' alpha = .025 | ||
##' perccal_interval(Xy, alpha, G, B, B2) | ||
perccal_interval = function(Xy, alpha, G = 20, B = 999, B2 = 999){ | ||
# Inputs: | ||
# Xy: [n by (p+1)] matrix: X in columns 1 to p, y in column p+1 | ||
#(please do not include the intercept) | ||
# alpha: Target coverage desired. | ||
# G : # of grid points to evaluate calibrated percentile method on each side. | ||
# B : # of 1st stage bootstrap samples | ||
# B2 : # of 2nd stage bootstrap samples | ||
# Output: | ||
# Confidence interval for all regression coefficients, estimated via perc-cal method. | ||
|
||
# Initialize all the matrices and vectors to be used below. | ||
p = ncol(Xy)-1 | ||
theta.hat.boot = matrix(NA,nrow=p+1,ncol=B) | ||
theta.hat.dboot = matrix(NA,nrow=p+1,ncol=B2) | ||
theta.qtl.lgrid.lo = array(NA, dim=c(p+1,B,G)) | ||
theta.qtl.lgrid.hi = array(NA, dim=c(p+1,B,G)) | ||
perc.cal.ints = matrix(NA,nrow=p+1,ncol=2) | ||
|
||
l.grid.lo = seq(.0001, alpha*1.4, length.out=G) | ||
l.grid.hi = 1-l.grid.lo | ||
|
||
# Start main code | ||
X=as.matrix(Xy[,1:p]) | ||
Xm = cbind(1,X) | ||
y=Xy[,p+1] | ||
regr_output = coefficients(summary(lm(y~X))) | ||
prednames = rownames(regr_output) | ||
theta.hat = as.numeric(regr_output[,"Estimate"]) | ||
|
||
boot_output = Cdboot_multi(as.matrix(Xy), l.grid.lo, l.grid.hi, B, B2, G) | ||
|
||
for(k in 1:(p+1)){ | ||
theta.hat.boot = boot_output$theta_hat_boot[,k] | ||
theta.qtl.lgrid.lo = boot_output$theta_qtl_lgrid_lo[k,1][[1]][,,1] | ||
theta.qtl.lgrid.hi = boot_output$theta_qtl_lgrid_hi[k,1][[1]][,,1] | ||
|
||
# perc-cal method | ||
log.lgrid.lo = theta.qtl.lgrid.lo < theta.hat[k] | ||
log.lgrid.hi = theta.qtl.lgrid.hi > theta.hat[k] | ||
percs.lo = 1-colSums(log.lgrid.lo)/B | ||
percs.hi = 1-colSums(log.lgrid.hi)/B | ||
lo.log = (percs.lo[1:(G-1)]<alpha)*(percs.lo[2:G]>=alpha) | ||
hi.log = (percs.hi[1:(G-1)]<alpha)*(percs.hi[2:G]>=alpha) | ||
if(sum(lo.log)>0){l.alpha.lo = l.grid.lo[lo.log*(1:(G-1))]} | ||
if(sum(lo.log)<=0){l.alpha.lo = .0001} | ||
if(sum(hi.log)>0){l.alpha.hi = l.grid.hi[hi.log*(1:(G-1))]} | ||
if(sum(hi.log)<=0){l.alpha.hi = 1-.0001} | ||
perc.cal.ints[k,] = quantile(theta.hat.boot,c(l.alpha.lo, l.alpha.hi)) | ||
} | ||
|
||
colnames(perc.cal.ints) = c(paste("lower ",round(alpha,3),sep=""),paste("upper ",1-round(alpha,3),sep="")) | ||
rownames(perc.cal.ints) = prednames | ||
perc.cal.ints | ||
} | ||
|
||
|
||
|
||
|
||
|
||
##' Fast computation of quantiles | ||
##' | ||
##' Helper function which takes as input a vector and obtains quantiles for it. | ||
##' Number of quantiles may be greater than one. | ||
##' @name Cquantile | ||
##' @param xx Numeric vector we are obtaining quantiles for. | ||
##' @param p Numeric vector of quantiles. | ||
##' @return Numeric vector containing quantiles, possibly greater than one. | ||
Cquantile <- function(xx, p) { | ||
.Call('perccal_Cquantile', PACKAGE = 'perccal', xx, p) | ||
} | ||
|
||
|
||
##' Sample from [1,2,...,N] with replacement nsamp times in Rcpp. | ||
##' | ||
##' Helper function which samples from [1,2,...,N] with replacement | ||
##' nsamp times in Rcpp. | ||
##' @name sample_rcpp | ||
##' @param N Largest integer to sample from. | ||
##' @param nsamp number of samples from [1,2,...,N] with replacement to obtain. | ||
##' @return samps nsamp-length vector of samples from [1,2,...,N] with replacement to obtain. | ||
sample_rcpp <- function(N, nsamp) { | ||
.Call('perccal_sample_rcpp', PACKAGE = 'perccal', N, nsamp) | ||
} | ||
|
||
|
||
|
||
##' Fast computation of internal double bootstrap calculations | ||
##' | ||
##' This is the workhorse function of the package, speeding up computations | ||
##' within double bootstrap routine. | ||
##' @name Cdboot_multi | ||
##' @param xxyy (n by p+1) matrix for X (design matrix) and response vector y. | ||
##' @param lgridlo Lower quantile values of double bootstrap distribution to | ||
##' obtain. | ||
##' @param lgridhi Upper quantile values of double bootstrap distribution to | ||
##' obtain. | ||
##' @param B Number of 1st stage bootstrap samples. | ||
##' @param B2 Number of 2nd stage double bootstrap samples. | ||
##' @param G Calculate quantile-based empirical coverage at this many grid | ||
##' points | ||
##' @return theta_hat_boot first-level bootstrap estimates of all slope coefficients | ||
##' @return theta_qtl_lgrid_lo (p+1 by B by G by 1) matrix for lower quantiles at | ||
##' all grid points for all predictors over all bootstrap samples. | ||
##' @return theta_qtl_lgrid_hi (p+1 by B by G by 1) matrix for upper quantiles at | ||
##' all grid points for all predictors over all bootstrap samples. | ||
Cdboot_multi <- function(xxyy, lgridlo, lgridhi, B, B2, G) { | ||
.Call('perccal_Cdboot_multi', PACKAGE = 'perccal', xxyy, lgridlo, lgridhi, B, B2, G) | ||
} | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
% Generated by roxygen2 (4.1.1): do not edit by hand | ||
% Please edit documentation in R/functions-code.R | ||
\name{Cdboot_multi} | ||
\alias{Cdboot_multi} | ||
\title{Fast computation of internal double bootstrap calculations} | ||
\usage{ | ||
Cdboot_multi(xxyy, lgridlo, lgridhi, B, B2, G) | ||
} | ||
\arguments{ | ||
\item{xxyy}{(n by p+1) matrix for X (design matrix) and response vector y.} | ||
|
||
\item{lgridlo}{Lower quantile values of double bootstrap distribution to | ||
obtain.} | ||
|
||
\item{lgridhi}{Upper quantile values of double bootstrap distribution to | ||
obtain.} | ||
|
||
\item{B}{Number of 1st stage bootstrap samples.} | ||
|
||
\item{B2}{Number of 2nd stage double bootstrap samples.} | ||
|
||
\item{G}{Calculate quantile-based empirical coverage at this many grid | ||
points} | ||
} | ||
\value{ | ||
theta_hat_boot first-level bootstrap estimates of all slope coefficients | ||
|
||
theta_qtl_lgrid_lo (p+1 by B by G by 1) matrix for lower quantiles at | ||
all grid points for all predictors over all bootstrap samples. | ||
|
||
theta_qtl_lgrid_hi (p+1 by B by G by 1) matrix for upper quantiles at | ||
all grid points for all predictors over all bootstrap samples. | ||
} | ||
\description{ | ||
This is the workhorse function of the package, speeding up computations | ||
within double bootstrap routine. | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
% Generated by roxygen2 (4.1.1): do not edit by hand | ||
% Please edit documentation in R/functions-code.R | ||
\name{Cquantile} | ||
\alias{Cquantile} | ||
\title{Fast computation of quantiles} | ||
\usage{ | ||
Cquantile(xx, p) | ||
} | ||
\arguments{ | ||
\item{xx}{Numeric vector we are obtaining quantiles for.} | ||
|
||
\item{p}{Numeric vector of quantiles.} | ||
} | ||
\value{ | ||
Numeric vector containing quantiles, possibly greater than one. | ||
} | ||
\description{ | ||
Helper function which takes as input a vector and obtains quantiles for it. | ||
Number of quantiles may be greater than one. | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
\name{perccal-package} | ||
\alias{perccal-package} | ||
\alias{perccal} | ||
\docType{package} | ||
\title{ | ||
Computing Confidence Intervals Via Double Bootstrap 'perc-cal' Method | ||
} | ||
\description{ | ||
|
||
Contains functions which allow the user to compute confidence intervals | ||
quickly using the double bootstrap-based percentile calibrated | ||
('perc-cal') method for linear regression coefficients. | ||
|
||
} | ||
\details{ | ||
\tabular{ll}{ | ||
Package: \tab perccal\cr | ||
Type: \tab Package\cr | ||
Version: \tab 1.0\cr | ||
Date: \tab 2016-06-14\cr | ||
License: \tab GPL-3\cr | ||
} | ||
|
||
Contains functions which allow users to compute confidence intervals | ||
quickly using the double bootstrap-based percentile calibrated | ||
('perc-cal') method for linear regression coefficients. | ||
|
||
The help of Justin Bleich is strongly acknowledged. | ||
|
||
} | ||
\author{ | ||
Daniel McCarthy | ||
|
||
Maintainer: Daniel McCarthy <danielmc@wharton.upenn.edu> | ||
} | ||
\references{ | ||
|
||
Efron, Bradley; Tibshirani, Robert J. \dQuote{An Introduction to the Bootstrap} | ||
1994. Book. Publisher: CRC Press. | ||
|
||
McCarthy, Daniel; Zhang, Kai; Berk, Richard; Brown, Lawrence; Buja, Andreas; | ||
George, Edward; Zhao, Linda. \dQuote{Calibrated Percentile Double Bootstrap | ||
For Robust Linear Regression Inference} 2016. Available on arXiv: | ||
https://arxiv.org/abs/1511.00273 | ||
|
||
} | ||
|
||
\keyword{ package } | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
% Generated by roxygen2 (4.1.1): do not edit by hand | ||
% Please edit documentation in R/functions-code.R | ||
\name{perccal_interval} | ||
\alias{perccal_interval} | ||
\title{Calculates Percentile-Calibrated Linear Regression Confidence Intervals} | ||
\usage{ | ||
perccal_interval(Xy, alpha, G = 20, B = 999, B2 = 999) | ||
} | ||
\arguments{ | ||
\item{Xy}{[n by (p+1)] matrix: X in columns 1 to p, y in column p+1. | ||
X is the design matrix, and is assumed to not include a vector of one's.} | ||
\item{alpha}{Target coverage desired.} | ||
\item{G}{Number of grid points to evaluate calibrated percentile method | ||
on each side over.} | ||
\item{B}{Number of 1st stage bootstrap samples.} | ||
\item{B2}{Number of 2nd stage double bootstrap samples.} | ||
} | ||
\value{ | ||
Return a (p+1)x2 matrix containing confidence intervals for all | ||
regression coefficients, estimated via the perc-cal method. | ||
} | ||
\description{ | ||
This is the main function of the package. It takes as inputs the | ||
predictor/response matrix appended together, which can be either | ||
a data frame or a matrix, along with the desired coverage and other | ||
settings, and outputs marginal confidence intervals for each of the | ||
predictors, including the intercept. | ||
} | ||
\examples{ | ||
set.seed(1234) | ||
n = 32 | ||
B = 500 | ||
B2 = 500 | ||
G=20 | ||
x1=rnorm(n) | ||
x2=rnorm(n) | ||
eps=rnorm(n) | ||
y = x1 + 2*x2 + eps | ||
Xy = cbind(x1,x2,y) | ||
alpha = .025 | ||
perccal_interval(Xy, alpha, G, B, B2) | ||
} | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
% Generated by roxygen2 (4.1.1): do not edit by hand | ||
% Please edit documentation in R/functions-code.R | ||
\name{sample_rcpp} | ||
\alias{sample_rcpp} | ||
\title{Sample from [1,2,...,N] with replacement nsamp times in Rcpp.} | ||
\usage{ | ||
sample_rcpp(N, nsamp) | ||
} | ||
\arguments{ | ||
\item{N}{Largest integer to sample from.} | ||
|
||
\item{nsamp}{number of samples from [1,2,...,N] with replacement to obtain.} | ||
} | ||
\value{ | ||
samps nsamp-length vector of samples from [1,2,...,N] with replacement to obtain. | ||
} | ||
\description{ | ||
Helper function which samples from [1,2,...,N] with replacement | ||
nsamp times in Rcpp. | ||
} | ||
|
Oops, something went wrong.