Skip to content

Commit

Permalink
version 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel McCarthy authored and cran-robot committed Jun 14, 2016
0 parents commit 425ffff
Show file tree
Hide file tree
Showing 13 changed files with 608 additions and 0 deletions.
19 changes: 19 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,19 @@
Package: perccal
Type: Package
Title: Implementing Double Bootstrap Linear Regression Confidence
Intervals Using the 'perc-cal' Method
Version: 1.0
Date: 2016-06-14
Authors@R: c( person("Daniel", "McCarthy", role=c("aut","cre","ctb"),
email="danielmc@wharton.upenn.edu"))
Maintainer: Daniel McCarthy <danielmc@wharton.upenn.edu>
URL: http://www.danielminhmccarthy.com
Description: Contains functions which allow the user to compute confidence intervals quickly using the double bootstrap-based percentile calibrated ('perc-cal') method for linear regression coefficients. 'perccal_interval()' is the primary user-facing function within this package.
License: GPL-3
Imports: Rcpp (>= 0.11.5)
LinkingTo: Rcpp, RcppArmadillo, RcppEigen
NeedsCompilation: yes
Packaged: 2016-06-14 17:32:40 UTC; danielmc
Author: Daniel McCarthy [aut, cre, ctb]
Repository: CRAN
Date/Publication: 2016-06-14 20:34:32
12 changes: 12 additions & 0 deletions MD5
@@ -0,0 +1,12 @@
7137aa586945f221d985b0edff0f7123 *DESCRIPTION
6ebd1c9a850db60126cfd6eea1bd0590 *NAMESPACE
00986ec0fd6d81ef72f0ce7c715687b4 *R/RcppExports.R
1c05abeb19c8d7d18e49e094e7fa3f34 *R/functions-code.R
344a5a9282cf7f9e633a3514bb0ff603 *man/Cdboot_multi.Rd
5923466962ecbb1553db9fbc28b0ee53 *man/Cquantile.Rd
eab124c5da47cba00b908229d360fad6 *man/perccal-package.Rd
d1c1e0a9d3ea030ca2bd0f1354c1c445 *man/perccal_interval.Rd
1fda748862ea4c31684cf2e6a89bc93f *man/sample_rcpp.Rd
899e256fbab14d837ea44d49b1c974b9 *src/Makevars
fa318843a746835da902d47bc8eb2970 *src/RcppExports.cpp
309ea63b5fe6fc65d4eea4484214fb06 *src/dboot_multi.cpp
6 changes: 6 additions & 0 deletions NAMESPACE
@@ -0,0 +1,6 @@
useDynLib(perccal)
importFrom(Rcpp, evalCpp)
importFrom("stats", "coefficients")
importFrom("stats", "quantile")
importFrom("stats", "lm")
exportPattern("^[[:alpha:]]+")
15 changes: 15 additions & 0 deletions R/RcppExports.R
@@ -0,0 +1,15 @@
# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

sample_rcpp <- function(N, nsamp) {
.Call('perccal_sample_rcpp', PACKAGE = 'perccal', N, nsamp)
}

Cquantile <- function(xx, p) {
.Call('perccal_Cquantile', PACKAGE = 'perccal', xx, p)
}

Cdboot_multi <- function(xxyy, lgridlo, lgridhi, B, B2, G) {
.Call('perccal_Cdboot_multi', PACKAGE = 'perccal', xxyy, lgridlo, lgridhi, B, B2, G)
}

158 changes: 158 additions & 0 deletions R/functions-code.R
@@ -0,0 +1,158 @@
##################################################
### Percentile-Calibrated ('perc-cal') Confidence Interval Functions

library(RcppEigen)
library(Rcpp)
library(RcppArmadillo)

##' Calculates Percentile-Calibrated Linear Regression Confidence Intervals
##'
##' This is the main function of the package. It takes as inputs the
##' predictor/response matrix appended together, which can be either
##' a data frame or a matrix, along with the desired coverage and other
##' settings, and outputs marginal confidence intervals for each of the
##' predictors, including the intercept.
##' @name perccal_interval
##' @param Xy [n by (p+1)] matrix: X in columns 1 to p, y in column p+1.
##' X is the design matrix, and is assumed to not include a vector of one's.
##' @param alpha Target coverage desired.
##' @param G Number of grid points to evaluate calibrated percentile method
##' on each side over.
##' @param B Number of 1st stage bootstrap samples.
##' @param B2 Number of 2nd stage double bootstrap samples.
##' @return Return a (p+1)x2 matrix containing confidence intervals for all
##' regression coefficients, estimated via the perc-cal method.
##' @examples
##' set.seed(1234)
##' n = 32
##' B = 500
##' B2 = 500
##' G=20
##' x1=rnorm(n)
##' x2=rnorm(n)
##' eps=rnorm(n)
##' y = x1 + 2*x2 + eps
##' Xy = cbind(x1,x2,y)
##' alpha = .025
##' perccal_interval(Xy, alpha, G, B, B2)
perccal_interval = function(Xy, alpha, G = 20, B = 999, B2 = 999){
# Inputs:
# Xy: [n by (p+1)] matrix: X in columns 1 to p, y in column p+1
#(please do not include the intercept)
# alpha: Target coverage desired.
# G : # of grid points to evaluate calibrated percentile method on each side.
# B : # of 1st stage bootstrap samples
# B2 : # of 2nd stage bootstrap samples
# Output:
# Confidence interval for all regression coefficients, estimated via perc-cal method.

# Initialize all the matrices and vectors to be used below.
p = ncol(Xy)-1
theta.hat.boot = matrix(NA,nrow=p+1,ncol=B)
theta.hat.dboot = matrix(NA,nrow=p+1,ncol=B2)
theta.qtl.lgrid.lo = array(NA, dim=c(p+1,B,G))
theta.qtl.lgrid.hi = array(NA, dim=c(p+1,B,G))
perc.cal.ints = matrix(NA,nrow=p+1,ncol=2)

l.grid.lo = seq(.0001, alpha*1.4, length.out=G)
l.grid.hi = 1-l.grid.lo

# Start main code
X=as.matrix(Xy[,1:p])
Xm = cbind(1,X)
y=Xy[,p+1]
regr_output = coefficients(summary(lm(y~X)))
prednames = rownames(regr_output)
theta.hat = as.numeric(regr_output[,"Estimate"])

boot_output = Cdboot_multi(as.matrix(Xy), l.grid.lo, l.grid.hi, B, B2, G)

for(k in 1:(p+1)){
theta.hat.boot = boot_output$theta_hat_boot[,k]
theta.qtl.lgrid.lo = boot_output$theta_qtl_lgrid_lo[k,1][[1]][,,1]
theta.qtl.lgrid.hi = boot_output$theta_qtl_lgrid_hi[k,1][[1]][,,1]

# perc-cal method
log.lgrid.lo = theta.qtl.lgrid.lo < theta.hat[k]
log.lgrid.hi = theta.qtl.lgrid.hi > theta.hat[k]
percs.lo = 1-colSums(log.lgrid.lo)/B
percs.hi = 1-colSums(log.lgrid.hi)/B
lo.log = (percs.lo[1:(G-1)]<alpha)*(percs.lo[2:G]>=alpha)
hi.log = (percs.hi[1:(G-1)]<alpha)*(percs.hi[2:G]>=alpha)
if(sum(lo.log)>0){l.alpha.lo = l.grid.lo[lo.log*(1:(G-1))]}
if(sum(lo.log)<=0){l.alpha.lo = .0001}
if(sum(hi.log)>0){l.alpha.hi = l.grid.hi[hi.log*(1:(G-1))]}
if(sum(hi.log)<=0){l.alpha.hi = 1-.0001}
perc.cal.ints[k,] = quantile(theta.hat.boot,c(l.alpha.lo, l.alpha.hi))
}

colnames(perc.cal.ints) = c(paste("lower ",round(alpha,3),sep=""),paste("upper ",1-round(alpha,3),sep=""))
rownames(perc.cal.ints) = prednames
perc.cal.ints
}





##' Fast computation of quantiles
##'
##' Helper function which takes as input a vector and obtains quantiles for it.
##' Number of quantiles may be greater than one.
##' @name Cquantile
##' @param xx Numeric vector we are obtaining quantiles for.
##' @param p Numeric vector of quantiles.
##' @return Numeric vector containing quantiles, possibly greater than one.
Cquantile <- function(xx, p) {
.Call('perccal_Cquantile', PACKAGE = 'perccal', xx, p)
}


##' Sample from [1,2,...,N] with replacement nsamp times in Rcpp.
##'
##' Helper function which samples from [1,2,...,N] with replacement
##' nsamp times in Rcpp.
##' @name sample_rcpp
##' @param N Largest integer to sample from.
##' @param nsamp number of samples from [1,2,...,N] with replacement to obtain.
##' @return samps nsamp-length vector of samples from [1,2,...,N] with replacement to obtain.
sample_rcpp <- function(N, nsamp) {
.Call('perccal_sample_rcpp', PACKAGE = 'perccal', N, nsamp)
}



##' Fast computation of internal double bootstrap calculations
##'
##' This is the workhorse function of the package, speeding up computations
##' within double bootstrap routine.
##' @name Cdboot_multi
##' @param xxyy (n by p+1) matrix for X (design matrix) and response vector y.
##' @param lgridlo Lower quantile values of double bootstrap distribution to
##' obtain.
##' @param lgridhi Upper quantile values of double bootstrap distribution to
##' obtain.
##' @param B Number of 1st stage bootstrap samples.
##' @param B2 Number of 2nd stage double bootstrap samples.
##' @param G Calculate quantile-based empirical coverage at this many grid
##' points
##' @return theta_hat_boot first-level bootstrap estimates of all slope coefficients
##' @return theta_qtl_lgrid_lo (p+1 by B by G by 1) matrix for lower quantiles at
##' all grid points for all predictors over all bootstrap samples.
##' @return theta_qtl_lgrid_hi (p+1 by B by G by 1) matrix for upper quantiles at
##' all grid points for all predictors over all bootstrap samples.
Cdboot_multi <- function(xxyy, lgridlo, lgridhi, B, B2, G) {
.Call('perccal_Cdboot_multi', PACKAGE = 'perccal', xxyy, lgridlo, lgridhi, B, B2, G)
}












38 changes: 38 additions & 0 deletions man/Cdboot_multi.Rd
@@ -0,0 +1,38 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/functions-code.R
\name{Cdboot_multi}
\alias{Cdboot_multi}
\title{Fast computation of internal double bootstrap calculations}
\usage{
Cdboot_multi(xxyy, lgridlo, lgridhi, B, B2, G)
}
\arguments{
\item{xxyy}{(n by p+1) matrix for X (design matrix) and response vector y.}

\item{lgridlo}{Lower quantile values of double bootstrap distribution to
obtain.}

\item{lgridhi}{Upper quantile values of double bootstrap distribution to
obtain.}

\item{B}{Number of 1st stage bootstrap samples.}

\item{B2}{Number of 2nd stage double bootstrap samples.}

\item{G}{Calculate quantile-based empirical coverage at this many grid
points}
}
\value{
theta_hat_boot first-level bootstrap estimates of all slope coefficients

theta_qtl_lgrid_lo (p+1 by B by G by 1) matrix for lower quantiles at
all grid points for all predictors over all bootstrap samples.

theta_qtl_lgrid_hi (p+1 by B by G by 1) matrix for upper quantiles at
all grid points for all predictors over all bootstrap samples.
}
\description{
This is the workhorse function of the package, speeding up computations
within double bootstrap routine.
}

21 changes: 21 additions & 0 deletions man/Cquantile.Rd
@@ -0,0 +1,21 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/functions-code.R
\name{Cquantile}
\alias{Cquantile}
\title{Fast computation of quantiles}
\usage{
Cquantile(xx, p)
}
\arguments{
\item{xx}{Numeric vector we are obtaining quantiles for.}

\item{p}{Numeric vector of quantiles.}
}
\value{
Numeric vector containing quantiles, possibly greater than one.
}
\description{
Helper function which takes as input a vector and obtains quantiles for it.
Number of quantiles may be greater than one.
}

49 changes: 49 additions & 0 deletions man/perccal-package.Rd
@@ -0,0 +1,49 @@
\name{perccal-package}
\alias{perccal-package}
\alias{perccal}
\docType{package}
\title{
Computing Confidence Intervals Via Double Bootstrap 'perc-cal' Method
}
\description{

Contains functions which allow the user to compute confidence intervals
quickly using the double bootstrap-based percentile calibrated
('perc-cal') method for linear regression coefficients.

}
\details{
\tabular{ll}{
Package: \tab perccal\cr
Type: \tab Package\cr
Version: \tab 1.0\cr
Date: \tab 2016-06-14\cr
License: \tab GPL-3\cr
}

Contains functions which allow users to compute confidence intervals
quickly using the double bootstrap-based percentile calibrated
('perc-cal') method for linear regression coefficients.

The help of Justin Bleich is strongly acknowledged.

}
\author{
Daniel McCarthy

Maintainer: Daniel McCarthy <danielmc@wharton.upenn.edu>
}
\references{

Efron, Bradley; Tibshirani, Robert J. \dQuote{An Introduction to the Bootstrap}
1994. Book. Publisher: CRC Press.

McCarthy, Daniel; Zhang, Kai; Berk, Richard; Brown, Lawrence; Buja, Andreas;
George, Edward; Zhao, Linda. \dQuote{Calibrated Percentile Double Bootstrap
For Robust Linear Regression Inference} 2016. Available on arXiv:
https://arxiv.org/abs/1511.00273

}

\keyword{ package }

47 changes: 47 additions & 0 deletions man/perccal_interval.Rd
@@ -0,0 +1,47 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/functions-code.R
\name{perccal_interval}
\alias{perccal_interval}
\title{Calculates Percentile-Calibrated Linear Regression Confidence Intervals}
\usage{
perccal_interval(Xy, alpha, G = 20, B = 999, B2 = 999)
}
\arguments{
\item{Xy}{[n by (p+1)] matrix: X in columns 1 to p, y in column p+1.
X is the design matrix, and is assumed to not include a vector of one's.}
\item{alpha}{Target coverage desired.}
\item{G}{Number of grid points to evaluate calibrated percentile method
on each side over.}
\item{B}{Number of 1st stage bootstrap samples.}
\item{B2}{Number of 2nd stage double bootstrap samples.}
}
\value{
Return a (p+1)x2 matrix containing confidence intervals for all
regression coefficients, estimated via the perc-cal method.
}
\description{
This is the main function of the package. It takes as inputs the
predictor/response matrix appended together, which can be either
a data frame or a matrix, along with the desired coverage and other
settings, and outputs marginal confidence intervals for each of the
predictors, including the intercept.
}
\examples{
set.seed(1234)
n = 32
B = 500
B2 = 500
G=20
x1=rnorm(n)
x2=rnorm(n)
eps=rnorm(n)
y = x1 + 2*x2 + eps
Xy = cbind(x1,x2,y)
alpha = .025
perccal_interval(Xy, alpha, G, B, B2)
}
21 changes: 21 additions & 0 deletions man/sample_rcpp.Rd
@@ -0,0 +1,21 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/functions-code.R
\name{sample_rcpp}
\alias{sample_rcpp}
\title{Sample from [1,2,...,N] with replacement nsamp times in Rcpp.}
\usage{
sample_rcpp(N, nsamp)
}
\arguments{
\item{N}{Largest integer to sample from.}

\item{nsamp}{number of samples from [1,2,...,N] with replacement to obtain.}
}
\value{
samps nsamp-length vector of samples from [1,2,...,N] with replacement to obtain.
}
\description{
Helper function which samples from [1,2,...,N] with replacement
nsamp times in Rcpp.
}

0 comments on commit 425ffff

Please sign in to comment.