Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit eb91609
Showing
56 changed files
with
2,815 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
Package: RCBR | ||
Title: Random Coefficient Binary Response Estimation | ||
Description: Nonparametric maximum likelihood estimation methods | ||
for random coefficient binary response models and some related | ||
functionality for sequential processing of hyperplane arrangements. | ||
See J. Gu and R. Koenker (2020) <DOI:10.1080/01621459.2020.1802284>. | ||
Version: 0.5.9 | ||
Authors@R: c( | ||
person( "Roger", "Koenker", email= "rkoenker@uiuc.edu", role = c("aut", "cre")), | ||
person("Jiaying", "Gu", email = "jiaying.gu@utoronto.ca", role = "aut") | ||
) | ||
Maintainer: Roger Koenker <rkoenker@uiuc.edu> | ||
Depends: R (>= 2.10), Matrix | ||
Suggests: knitr, digest | ||
Imports: methods, Rmosek, REBayes, orthopolynom, Formula, mvtnorm | ||
LazyData: TRUE | ||
SystemRequirements: MOSEK (http://www.mosek.com) and MOSEK License for | ||
use of Rmosek, | ||
License: GPL (>= 2) | ||
URL: https://www.r-project.org | ||
NeedsCompilation: no | ||
Encoding: UTF-8 | ||
Author: Roger Koenker [aut, cre], | ||
Jiaying Gu [aut] | ||
RoxygenNote: 7.1.1 | ||
Packaged: 2020-11-11 11:25:06 UTC; roger | ||
Repository: CRAN | ||
Date/Publication: 2020-11-16 10:10:05 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
6463f928a270c1854f74e7e74f0fc3cd *DESCRIPTION | ||
c097f7d691d1d6191eab46f6a442aa5c *NAMESPACE | ||
2962f1d4e92124f713cb7a9920754590 *R/GH.R | ||
0f2e389e4333069910eef986c61a3d46 *R/GK.control.R | ||
b8f33220bd1cf807ab7386d3f68cdf8b *R/Horowitz93.R | ||
2ea1d73c5e15a8dcf1cc594728842c2c *R/KW.control.R | ||
11bc6dfbed399d5a412adcd89a9e35a0 *R/KWDual.R | ||
75b48c6bdc30bae92dfbef23331ccb95 *R/NICER.R | ||
f845b032ee4ebb5e484c55670417edc3 *R/NICERd.R | ||
936b23212dbc0792ddacf6988342fa8f *R/bounds.KW2.R | ||
1822ced17224dbb59fe4d390a3ecc5ae *R/logLik.GK.R | ||
282a791bc6131e110347d7c5f649b8f8 *R/logLik.KW1.R | ||
71e07ae13e7b568a02b05527f0408e96 *R/neighbours.R | ||
f9dc32dccca9ddc2c9ec454b05224666 *R/plot.GK.R | ||
df3e03849cd776f7afa95d1e0f8d8760 *R/plot.KW2.R | ||
da95ef02fdb10ffee52406ba9b287fa1 *R/polycount.R | ||
a49cbe63fa3ce2703666580100e6da6f *R/polyzone.R | ||
c9a74646bcdc601ac2a757e66451127c *R/prcbr.R | ||
30a6dcb3d763d6cc89e1afef4b6df5f8 *R/predict.GK.R | ||
a06884f4f99bfdd976bba01e819a839f *R/predict.KW2.R | ||
d52e87845eef1d984859641ebbe4be65 *R/rcbr.R | ||
2dea5b8f37a0c06ab38e853a202be018 *R/rcbr.fit.GK.R | ||
72239ceeb3bc726d23130c8bb17f841a *R/rcbr.fit.KW1.R | ||
4eb4018c0ffebcf404358866c4729f44 *R/rcbr.fit.KW2.R | ||
e0815632758add8b2409788f66f7121e *R/rcbr.fit.R | ||
35326241aa1f0239afc99fcf44dc2cde *R/witness.R | ||
1dc0cc29fd3354a3d6c0feedde8cad34 *data/Horowitz93.rda | ||
47996ff93c4b502f2b3b3b9a9a762b15 *data/Horowitz93.txt.gz | ||
0c581f570087874bbb619c73af6fe803 *demo/00Index | ||
d610a7469196c5d7e040e88a92d33169 *demo/nonu.R | ||
31f04885824a9615450b11eb3879041f *demo/testGH.R | ||
89ff29ac88536748cfc94105e8ccb44e *inst/ChangeLog | ||
34f20c23a623c088ccb1c89975cdbe1d *man/GH.Rd | ||
3ce707b4601849fd944a69051a5fa4be *man/GH.se.Rd | ||
bd072fb131540dc1ad206174e4a228d9 *man/GK.control.Rd | ||
2e5fc0c9993e0f4a121394e6871dccbb *man/Horowitz93.Rd | ||
aaa93c51428a7e5646497697394e73ef *man/KW.control.Rd | ||
69844a7d7eb99698bbc840cc26b19b18 *man/KWDual.Rd | ||
6fc4dd32fe75537d5cdc6c9fef83b638 *man/NICER.Rd | ||
4f0ceb8d034a64850c2346ad3dd29adb *man/NICERd.Rd | ||
fb6ac0992530a52b704c6352c46ffeb8 *man/bounds.KW2.Rd | ||
b692cfde4c4b6c73256cec8bf5d220a6 *man/neighbours.Rd | ||
e0766123f08b999842f5a8b844cef5c5 *man/plot.GK.Rd | ||
8c338990d2229c23cb7b156516157d99 *man/plot.KW2.Rd | ||
11a92c530e63f9a1e7bade3a064e4aef *man/polycount.Rd | ||
dd3982fbedc1c2c1d737d210a832745d *man/polyzone.Rd | ||
7dc3c7cf7e93224e94530dce98a4c099 *man/prcbr.Rd | ||
df325c3e51880e0a5b9aec91c316dba8 *man/predict.GK.Rd | ||
9a73cf5c3c3a3f2eedb61b5367249d23 *man/predict.KW2.Rd | ||
ad29487d494e9ec6aa841666cd746706 *man/rcbr.Rd | ||
79ed94be6cb1c3e074dddab15dd9d32c *man/rcbr.fit.GK.Rd | ||
98c1807a34b75986c915cba4ee92247e *man/rcbr.fit.KW1.Rd | ||
883d1d5e34a6471ccc7e92ec24788e23 *man/rcbr.fit.KW2.Rd | ||
68e34fdf0bf276340104cabc5d8999ed *man/rcbr.fit.Rd | ||
b4f53f68d3c1d7033f9eb47e23657ddc *man/witness.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
S3method(plot,GK) | ||
S3method(plot,KW2) | ||
S3method(predict,GK) | ||
S3method(predict,KW2) | ||
export(GH) | ||
export(GH.se) | ||
export(GK.control) | ||
export(KW.control) | ||
export(KWDual) | ||
export(NICER) | ||
export(NICERd) | ||
export(bounds.KW2) | ||
export(neighbours) | ||
export(polycount) | ||
export(polyzone) | ||
export(prcbr) | ||
export(rcbr) | ||
export(rcbr.fit) | ||
export(rcbr.fit.GK) | ||
export(rcbr.fit.KW1) | ||
export(rcbr.fit.KW2) | ||
export(witness) | ||
import(Matrix) | ||
importFrom(Formula,Formula) | ||
importFrom(REBayes,Cosslett) | ||
importFrom(grDevices,heat.colors) | ||
importFrom(graphics,contour) | ||
importFrom(methods,as) | ||
importFrom(methods,new) | ||
importFrom(stats,logLik) | ||
importFrom(stats,model.matrix) | ||
importFrom(stats,model.response) | ||
importFrom(stats,optim) | ||
importFrom(stats,sd) | ||
importFrom(stats,stepfun) | ||
importFrom(stats,terms) | ||
importFrom(stats,var) | ||
importFrom(utils,combn) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#' Current Status Linear Regression | ||
#' | ||
#' Groeneboom and Hendrickx semiparametric binary response estimator (scalar case) | ||
#' score estimator based on NPMLE avoids any smoothing | ||
#' proposed by Groneboom and Hendrickx (2018). | ||
#' | ||
#' @param b parameter vector (fix last entry as a known number, usually 1 or -1, for normalization) | ||
#' @param X design matrix | ||
#' @param y binary response vector | ||
#' @param eps trimming tolerance parameter | ||
#' @return A list with components: | ||
#' \itemize{ | ||
#' \item evaluation of a score function at parameter value | ||
#' \item estimated standard error | ||
#' \item sindex single index linear predictor | ||
#' } | ||
#' @references | ||
#' Groeneboom, P. and K. Hendrickx (2018) Current Status Linear Regression, | ||
#' Annals of Statistics, 46, 1415-1444, | ||
#' @importFrom REBayes Cosslett | ||
#' @export | ||
GH <- function(b, X, y, eps = 0.001){ | ||
# input b is a vector with the last entry fixed (normalization) | ||
sindex = as.numeric(X%*%b) | ||
o = order(sindex) | ||
fstar <- Cosslett(sindex, y, v = sort(sindex)) | ||
Fhat = cumsum(fstar$y) | ||
crossprod(X[o,1], (y[o] - Fhat)*(1*(Fhat >= eps & Fhat <= 1-eps)))/nrow(X) | ||
} | ||
#' Current Status Linear Regression Standard Errors | ||
#' | ||
#' Groeneboom and Hendrickx semiparametric binary response estimator (scalar case) | ||
#' score estimator based on NPMLE avoids any smoothing | ||
#' proposed by Groneboom and Hendrickx (2018). | ||
#' | ||
#' @param bstar parameter vector (fix last entry as a known number, usually 1 or -1, for normalization) | ||
#' @param X design matrix | ||
#' @param y binary response vector | ||
#' @param hc kernel bandwidth (used for the standard error estimation) | ||
#' @param eps trimming tolerance parameter | ||
#' @return A list with components: | ||
#' \itemize{ | ||
#' \item evaluation of a score function at parameter value | ||
#' \item estimated standard error | ||
#' \item sindex single index linear predictor | ||
#' } | ||
#' @references | ||
#' Groeneboom, P. and K. Hendrickx (2018) Current Status Linear Regression, | ||
#' Annals of Statistics, 46, 1415-1444, | ||
#' @importFrom stats sd | ||
#' @importFrom stats var | ||
#' @importFrom REBayes Cosslett | ||
#' @export | ||
GH.se <- function(bstar, X, y, eps = 0.001, hc = 2){ | ||
# covariance involves a density function, approximated by kernel smoothed estimates. | ||
# input b is a vector with the last entry fixed (normalization) | ||
p = ncol(X) | ||
if (p > 2) stop("only scalar case allowed") | ||
dX = X[,1]-mean(X[,1]) | ||
sindex = as.numeric(X%*%bstar) | ||
o = order(sindex) | ||
n = length(y) | ||
h = hc * sd(sindex) * n^(-1/7) | ||
K <- function(u){ | ||
(1*(abs(u)<=1))*35*(1-u^2)^3/32 | ||
} | ||
fstar <- Cosslett(sindex, y, v = sort(sindex)) | ||
Fhat <- cumsum(fstar$y) | ||
f <- rep(NA, n) | ||
for (i in 1:n){ | ||
Kz <- K((sindex[i] - sort(sindex))/h)/h | ||
f[i] <- crossprod(Kz, fstar$y) | ||
} | ||
fhat = f[o] | ||
A = mean(fhat*(dX[o]^2)*(1*(Fhat >= eps & Fhat <= 1-eps))) | ||
#B = sum(Fhat[-trim] * (1-Fhat[-trim]) * (X[o,2][-trim]-mean(X[-trim,2]))^2)/nrow(X) | ||
B = var(X[o,1] * (y[o] - Fhat)*(1*(Fhat >= eps & Fhat <= 1-eps))) | ||
sqrt(B/(A^2)/nrow(X)) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#' Control parameters for Gautier-Kitamura bivariate random coefficient binary response | ||
#' | ||
#' These parameters can be passed via the \code{...} argument of the \code{rcbr} function. | ||
#' defaults as suggested in Gautier and Kitamura matlab code | ||
#' | ||
#' @param n the sample size | ||
#' @param u grid values for intercept coordinate | ||
#' @param v grid values for slope coordinate | ||
#' @param T Truncation parameter for numerator must grow "sufficiently slowly with n" | ||
#' @param TX Truncation parameter for denomerator must grow "sufficiently slowly with n" | ||
#' @param Mn Trimming parameter "chosen to go to 0 slowly with n" | ||
#' | ||
#' @return updated list | ||
#' @export | ||
GK.control <- function(n, u = -20:20/10, v = -20:20/10, T = 3, TX = 10, Mn = 1/log(n)^2) { | ||
list(u = u, v = v, T = T, TX = TX, Mn = Mn, tol = .Machine$double.eps^0.5) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#' Horowitz (1993) Modal Choice Data | ||
#' | ||
#' Modal choice data for journey to work in the Washington DC area | ||
#' from the late 1960's. The variables are: | ||
#' * `DCOST`: difference in cost of car versus transit (transit - car) | ||
#' * `CARS`: number of cars at home | ||
#' * `DOVTT`: difference in out of vehicle time (transit - car) | ||
#' * `DIVTT`: difference in in vehicle time (transit - car) | ||
#' * `DEPEND`: coded 1 if by car, 0 if by mass transit | ||
#' | ||
#' @format A data frame with 842 observations on 5 variables: | ||
#' @source \url{https://www.gams.com/latest/gamslib_ml/libhtml/gamslib_mws.html} | ||
#' @references Horowitz, J L, (1993) Semiparametric estimation of a work-trip mode choice model. | ||
#' Journal of Econometrics, 58, 49-70. | ||
#' @keywords datasets | ||
"Horowitz93" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#' Control parameters for NPMLE of bivariate random coefficient binary response | ||
#' | ||
#' These parameters can be passed via the \code{...} argument of the \code{rcbr} function. | ||
#' The first three arguments are only relevant if full cell enumeration is employed for | ||
#' bivariate version of the NPMLE. | ||
#' | ||
#' @param uv matrix of evaluation points for potential mass points | ||
#' @param u grid of evaluation points for potential mass points | ||
#' @param v grid of evaluation points for potential mass points | ||
#' @param initial initial point for cell enumeration algorithm | ||
#' @param epsbound controls how close witness points can be to vertices of a cell | ||
#' @param epstol zero tolerance for witness solutions | ||
#' @param presolve controls whether Mosek does a presolve of the LP | ||
#' @param verb controls verbosity of Mosek solver 0 implies it is quiet | ||
#' | ||
#' @return updated list | ||
#' @export | ||
KW.control <- function(uv = NULL, u = NULL, v = NULL, initial = c(0,0), | ||
epsbound = 1, epstol = 1e-07, presolve = 1, verb = 0) | ||
list(uv = uv, u = u, v = v, initial = initial, epsbound = epsbound, | ||
epstol = epstol, presolve = presolve, verb = verb) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
#' Dual optimization for Kiefer-Wolfowitz problems | ||
#' | ||
#' Interface function for calls to optimizer from various REBayes functions | ||
#' There is currently only one option for the optimization that based on Mosek. | ||
#' It relies on the \pkg{Rmosek} interface to R see installation instructions in | ||
#' the Readme file in the inst directory of this package. This version of the function | ||
#' is intended to work with versions of Mosek after 7.0. A more experimental option | ||
#' employing the \pkg{pogs} package available from \url{https://github.com/foges/pogs} | ||
#' and employing an ADMM (Alternating Direction Method of Multipliers) approach has | ||
#' been deprecated, those interested could try installing version 1.4 of REBayes, and | ||
#' following the instructions provided there. | ||
#' | ||
#' @param A Linear constraint matrix | ||
#' @param d constraint vector | ||
#' @param w weights for \code{x} should sum to one. | ||
#' @param ... other parameters passed to control optimization: These may | ||
#' include \code{rtol} the relative tolerance for dual gap convergence criterion, | ||
#' \code{verb} to control verbosity desired from mosek, \code{verb = 0} is quiet, | ||
#' \code{verb = 5} produces a fairly detailed iteration log, | ||
#' \code{control} is a control list consisting of sublists \code{iparam}, | ||
#' \code{dparam}, and \code{sparam}, containing elements of various mosek | ||
#' control parameters. See the Rmosek and Mosek manuals for further details. | ||
#' A prime example is \code{rtol} which should eventually be deprecated and | ||
#' folded into \code{control}, but will persist for a while for compatibility | ||
#' reasons. The default for \code{rtol} is 1e-6, but in some cases it is | ||
#' desirable to tighten this, say to 1e-10. Another example that motivated the introduction of | ||
#' \code{control} would be \code{control = list(iparam = list(num_threads = | ||
#' 1))}, which forces Mosek to use a single threaded process. The default | ||
#' allows Mosek to uses multiple threads (cores) if available, which is | ||
#' generally desirable, but may have unintended (undesirable) consequences when running | ||
#' simulations on clusters. | ||
#' @return Returns a list with components: \item{f}{dual solution vector, the | ||
#' mixing density} \item{g}{primal solution vector, the mixture density | ||
#' evaluated at the data points} \item{logLik}{log likelihood} | ||
#' \item{status}{return status from Mosek}. Mosek termination messages are | ||
#' treated as warnings from an R perspective since solutions producing, for example, | ||
#' MSK_RES_TRM_STALL: The optimizer is terminated due to slow progress, may still | ||
#' provide a satisfactory solution, especially when the return status variable is | ||
#' "optimal". | ||
#' @author R. Koenker | ||
#' @references | ||
#' Koenker, R and I. Mizera, (2013) ``Convex Optimization, Shape Constraints, | ||
#' Compound Decisions, and Empirical Bayes Rules,'' \emph{JASA}, 109, 674--685. | ||
#' | ||
#' Mosek Aps (2015) Users Guide to the R-to-Mosek Optimization Interface, | ||
#' \url{https://docs.mosek.com/8.1/rmosek/index.html}. | ||
#' | ||
#' Koenker, R. and J. Gu, (2017) REBayes: An {R} Package for Empirical Bayes Mixture Methods, | ||
#' \emph{Journal of Statistical Software}, 82, 1--26. | ||
#' @keywords nonparametrics | ||
#' @importFrom methods as new | ||
#' @import Matrix | ||
#' @export | ||
KWDual <- function(A, d, w, ...){ | ||
# Dual Kiefer-Wolfowitz MLE for Mixture Problems | ||
# | ||
# This version implements a class of density estimators solving: | ||
# | ||
# min_x {F(x) := sum -log (x_i)} s.t. A' x <= d, 0 <= x, | ||
# | ||
# | ||
# where e.g. A = phi(outer(Y,g,"fun")), with Y data and g a grid on the support of Y, | ||
# and "fun" is some function representing the dependence of the base distribution. | ||
# | ||
#------------------------------------------------------------------------------------- | ||
# | ||
# Roger Koenker | ||
# | ||
# First version:24 Feb 2012 | ||
# Revised: 10 Jun 2015 # Simplified signature | ||
# Revised: 2 Jul 2015 # Added pogs method | ||
# Revised 30 Jan 2019 # Removed pogs method, added Mosek V9 option | ||
# Revised 27 Sep 2019 # changed stop to warning for stalled mosek | ||
|
||
n <- nrow(A) | ||
m <- ncol(A) | ||
A <- t(A) | ||
A <- Matrix::Matrix(A, sparse = TRUE) | ||
|
||
dots <- list(...) | ||
|
||
# Default mosek method | ||
rtol <- ifelse(length(dots$rtol), dots$rtol, 1e-6) | ||
verb <- ifelse(length(dots$verb), dots$verb, 0) | ||
if(length(dots$control)) control <- dots$control | ||
else control <- NULL | ||
|
||
if(utils::packageVersion("Rmosek") < 9){ | ||
P <- list(sense = "min") | ||
P$c <- rep(0, n) | ||
P$A <- A | ||
P$bc <- rbind(rep(0,m),d) | ||
P$bx <- rbind(rep(0,n),rep(Inf,n)) | ||
opro <- matrix ( list (), nrow =5, ncol = n) | ||
rownames ( opro ) <- c(" type ","j","f","g","h") | ||
|
||
opro[1,] <- as.list(rep('log',n)) | ||
opro[2,] <- as.list(1:n) | ||
opro[3,] <- as.list(-w) | ||
opro[4,] <- as.list(rep(1,n)) | ||
opro[5,] <- as.list(rep(0,n)) | ||
P$scopt<- list(opro = opro) | ||
P$dparam$intpnt_nl_tol_rel_gap <- rtol | ||
} | ||
else { #Mosek Version => 9 | ||
P <- list(sense = "min") | ||
A0 <- Matrix::Matrix(0, m, n) | ||
P$c <- c(rep(0,n), -w) | ||
P$A <- cbind(A, A0) | ||
P$bc <- rbind(rep(0, m), d) | ||
P$bx <- rbind(c(rep(0, n), rep(-Inf,n)), rep(Inf, 2*n)) | ||
P$F <- Matrix::sparseMatrix(c(seq(1,3*n, by = 3), seq(3, 3*n, by = 3)), | ||
c(1:n, (n+1):(2*n)), x = rep(1,2*n)) | ||
P$g <- rep(c(0,1,0), n) | ||
P$cones <- matrix(list("PEXP", 3, NULL), nrow = 3, ncol = n) | ||
rownames(P$cones) <- c("type", "dim", "conepar") | ||
P$dparam$intpnt_co_tol_rel_gap <- rtol | ||
} | ||
if(length(control)){ | ||
P$iparam <- control$iparam | ||
P$dparam <- control$dparam | ||
P$sparam <- control$sparam | ||
} | ||
z <- Rmosek::mosek(P, opts = list(verbose = verb)) | ||
status <- z$sol$itr$solsta | ||
if (status != "OPTIMAL") | ||
warning(paste("Solution status = ", status)) | ||
f <- z$sol$itr$suc | ||
if(min(f) < -rtol) | ||
warning("estimated mixing distribution has some negative values: consider reducing rtol") | ||
else f[f < 0] <- 0 | ||
g <- as.vector(t(A) %*% (f * d)) | ||
list(f = f, g = g, status = status) | ||
} |
Oops, something went wrong.