Skip to content

Commit

Permalink
version 0.5.9
Browse files Browse the repository at this point in the history
  • Loading branch information
rudjer authored and cran-robot committed Nov 16, 2020
0 parents commit eb91609
Show file tree
Hide file tree
Showing 56 changed files with 2,815 additions and 0 deletions.
28 changes: 28 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,28 @@
Package: RCBR
Title: Random Coefficient Binary Response Estimation
Description: Nonparametric maximum likelihood estimation methods
for random coefficient binary response models and some related
functionality for sequential processing of hyperplane arrangements.
See J. Gu and R. Koenker (2020) <DOI:10.1080/01621459.2020.1802284>.
Version: 0.5.9
Authors@R: c(
person( "Roger", "Koenker", email= "rkoenker@uiuc.edu", role = c("aut", "cre")),
person("Jiaying", "Gu", email = "jiaying.gu@utoronto.ca", role = "aut")
)
Maintainer: Roger Koenker <rkoenker@uiuc.edu>
Depends: R (>= 2.10), Matrix
Suggests: knitr, digest
Imports: methods, Rmosek, REBayes, orthopolynom, Formula, mvtnorm
LazyData: TRUE
SystemRequirements: MOSEK (http://www.mosek.com) and MOSEK License for
use of Rmosek,
License: GPL (>= 2)
URL: https://www.r-project.org
NeedsCompilation: no
Encoding: UTF-8
Author: Roger Koenker [aut, cre],
Jiaying Gu [aut]
RoxygenNote: 7.1.1
Packaged: 2020-11-11 11:25:06 UTC; roger
Repository: CRAN
Date/Publication: 2020-11-16 10:10:05 UTC
55 changes: 55 additions & 0 deletions MD5
@@ -0,0 +1,55 @@
6463f928a270c1854f74e7e74f0fc3cd *DESCRIPTION
c097f7d691d1d6191eab46f6a442aa5c *NAMESPACE
2962f1d4e92124f713cb7a9920754590 *R/GH.R
0f2e389e4333069910eef986c61a3d46 *R/GK.control.R
b8f33220bd1cf807ab7386d3f68cdf8b *R/Horowitz93.R
2ea1d73c5e15a8dcf1cc594728842c2c *R/KW.control.R
11bc6dfbed399d5a412adcd89a9e35a0 *R/KWDual.R
75b48c6bdc30bae92dfbef23331ccb95 *R/NICER.R
f845b032ee4ebb5e484c55670417edc3 *R/NICERd.R
936b23212dbc0792ddacf6988342fa8f *R/bounds.KW2.R
1822ced17224dbb59fe4d390a3ecc5ae *R/logLik.GK.R
282a791bc6131e110347d7c5f649b8f8 *R/logLik.KW1.R
71e07ae13e7b568a02b05527f0408e96 *R/neighbours.R
f9dc32dccca9ddc2c9ec454b05224666 *R/plot.GK.R
df3e03849cd776f7afa95d1e0f8d8760 *R/plot.KW2.R
da95ef02fdb10ffee52406ba9b287fa1 *R/polycount.R
a49cbe63fa3ce2703666580100e6da6f *R/polyzone.R
c9a74646bcdc601ac2a757e66451127c *R/prcbr.R
30a6dcb3d763d6cc89e1afef4b6df5f8 *R/predict.GK.R
a06884f4f99bfdd976bba01e819a839f *R/predict.KW2.R
d52e87845eef1d984859641ebbe4be65 *R/rcbr.R
2dea5b8f37a0c06ab38e853a202be018 *R/rcbr.fit.GK.R
72239ceeb3bc726d23130c8bb17f841a *R/rcbr.fit.KW1.R
4eb4018c0ffebcf404358866c4729f44 *R/rcbr.fit.KW2.R
e0815632758add8b2409788f66f7121e *R/rcbr.fit.R
35326241aa1f0239afc99fcf44dc2cde *R/witness.R
1dc0cc29fd3354a3d6c0feedde8cad34 *data/Horowitz93.rda
47996ff93c4b502f2b3b3b9a9a762b15 *data/Horowitz93.txt.gz
0c581f570087874bbb619c73af6fe803 *demo/00Index
d610a7469196c5d7e040e88a92d33169 *demo/nonu.R
31f04885824a9615450b11eb3879041f *demo/testGH.R
89ff29ac88536748cfc94105e8ccb44e *inst/ChangeLog
34f20c23a623c088ccb1c89975cdbe1d *man/GH.Rd
3ce707b4601849fd944a69051a5fa4be *man/GH.se.Rd
bd072fb131540dc1ad206174e4a228d9 *man/GK.control.Rd
2e5fc0c9993e0f4a121394e6871dccbb *man/Horowitz93.Rd
aaa93c51428a7e5646497697394e73ef *man/KW.control.Rd
69844a7d7eb99698bbc840cc26b19b18 *man/KWDual.Rd
6fc4dd32fe75537d5cdc6c9fef83b638 *man/NICER.Rd
4f0ceb8d034a64850c2346ad3dd29adb *man/NICERd.Rd
fb6ac0992530a52b704c6352c46ffeb8 *man/bounds.KW2.Rd
b692cfde4c4b6c73256cec8bf5d220a6 *man/neighbours.Rd
e0766123f08b999842f5a8b844cef5c5 *man/plot.GK.Rd
8c338990d2229c23cb7b156516157d99 *man/plot.KW2.Rd
11a92c530e63f9a1e7bade3a064e4aef *man/polycount.Rd
dd3982fbedc1c2c1d737d210a832745d *man/polyzone.Rd
7dc3c7cf7e93224e94530dce98a4c099 *man/prcbr.Rd
df325c3e51880e0a5b9aec91c316dba8 *man/predict.GK.Rd
9a73cf5c3c3a3f2eedb61b5367249d23 *man/predict.KW2.Rd
ad29487d494e9ec6aa841666cd746706 *man/rcbr.Rd
79ed94be6cb1c3e074dddab15dd9d32c *man/rcbr.fit.GK.Rd
98c1807a34b75986c915cba4ee92247e *man/rcbr.fit.KW1.Rd
883d1d5e34a6471ccc7e92ec24788e23 *man/rcbr.fit.KW2.Rd
68e34fdf0bf276340104cabc5d8999ed *man/rcbr.fit.Rd
b4f53f68d3c1d7033f9eb47e23657ddc *man/witness.Rd
40 changes: 40 additions & 0 deletions NAMESPACE
@@ -0,0 +1,40 @@
# Generated by roxygen2: do not edit by hand

S3method(plot,GK)
S3method(plot,KW2)
S3method(predict,GK)
S3method(predict,KW2)
export(GH)
export(GH.se)
export(GK.control)
export(KW.control)
export(KWDual)
export(NICER)
export(NICERd)
export(bounds.KW2)
export(neighbours)
export(polycount)
export(polyzone)
export(prcbr)
export(rcbr)
export(rcbr.fit)
export(rcbr.fit.GK)
export(rcbr.fit.KW1)
export(rcbr.fit.KW2)
export(witness)
import(Matrix)
importFrom(Formula,Formula)
importFrom(REBayes,Cosslett)
importFrom(grDevices,heat.colors)
importFrom(graphics,contour)
importFrom(methods,as)
importFrom(methods,new)
importFrom(stats,logLik)
importFrom(stats,model.matrix)
importFrom(stats,model.response)
importFrom(stats,optim)
importFrom(stats,sd)
importFrom(stats,stepfun)
importFrom(stats,terms)
importFrom(stats,var)
importFrom(utils,combn)
80 changes: 80 additions & 0 deletions R/GH.R
@@ -0,0 +1,80 @@
#' Current Status Linear Regression
#'
#' Groeneboom and Hendrickx semiparametric binary response estimator (scalar case)
#' score estimator based on NPMLE avoids any smoothing
#' proposed by Groneboom and Hendrickx (2018).
#'
#' @param b parameter vector (fix last entry as a known number, usually 1 or -1, for normalization)
#' @param X design matrix
#' @param y binary response vector
#' @param eps trimming tolerance parameter
#' @return A list with components:
#' \itemize{
#' \item evaluation of a score function at parameter value
#' \item estimated standard error
#' \item sindex single index linear predictor
#' }
#' @references
#' Groeneboom, P. and K. Hendrickx (2018) Current Status Linear Regression,
#' Annals of Statistics, 46, 1415-1444,
#' @importFrom REBayes Cosslett
#' @export
GH <- function(b, X, y, eps = 0.001){
# input b is a vector with the last entry fixed (normalization)
sindex = as.numeric(X%*%b)
o = order(sindex)
fstar <- Cosslett(sindex, y, v = sort(sindex))
Fhat = cumsum(fstar$y)
crossprod(X[o,1], (y[o] - Fhat)*(1*(Fhat >= eps & Fhat <= 1-eps)))/nrow(X)
}
#' Current Status Linear Regression Standard Errors
#'
#' Groeneboom and Hendrickx semiparametric binary response estimator (scalar case)
#' score estimator based on NPMLE avoids any smoothing
#' proposed by Groneboom and Hendrickx (2018).
#'
#' @param bstar parameter vector (fix last entry as a known number, usually 1 or -1, for normalization)
#' @param X design matrix
#' @param y binary response vector
#' @param hc kernel bandwidth (used for the standard error estimation)
#' @param eps trimming tolerance parameter
#' @return A list with components:
#' \itemize{
#' \item evaluation of a score function at parameter value
#' \item estimated standard error
#' \item sindex single index linear predictor
#' }
#' @references
#' Groeneboom, P. and K. Hendrickx (2018) Current Status Linear Regression,
#' Annals of Statistics, 46, 1415-1444,
#' @importFrom stats sd
#' @importFrom stats var
#' @importFrom REBayes Cosslett
#' @export
GH.se <- function(bstar, X, y, eps = 0.001, hc = 2){
# covariance involves a density function, approximated by kernel smoothed estimates.
# input b is a vector with the last entry fixed (normalization)
p = ncol(X)
if (p > 2) stop("only scalar case allowed")
dX = X[,1]-mean(X[,1])
sindex = as.numeric(X%*%bstar)
o = order(sindex)
n = length(y)
h = hc * sd(sindex) * n^(-1/7)
K <- function(u){
(1*(abs(u)<=1))*35*(1-u^2)^3/32
}
fstar <- Cosslett(sindex, y, v = sort(sindex))
Fhat <- cumsum(fstar$y)
f <- rep(NA, n)
for (i in 1:n){
Kz <- K((sindex[i] - sort(sindex))/h)/h
f[i] <- crossprod(Kz, fstar$y)
}
fhat = f[o]
A = mean(fhat*(dX[o]^2)*(1*(Fhat >= eps & Fhat <= 1-eps)))
#B = sum(Fhat[-trim] * (1-Fhat[-trim]) * (X[o,2][-trim]-mean(X[-trim,2]))^2)/nrow(X)
B = var(X[o,1] * (y[o] - Fhat)*(1*(Fhat >= eps & Fhat <= 1-eps)))
sqrt(B/(A^2)/nrow(X))
}

17 changes: 17 additions & 0 deletions R/GK.control.R
@@ -0,0 +1,17 @@
#' Control parameters for Gautier-Kitamura bivariate random coefficient binary response
#'
#' These parameters can be passed via the \code{...} argument of the \code{rcbr} function.
#' defaults as suggested in Gautier and Kitamura matlab code
#'
#' @param n the sample size
#' @param u grid values for intercept coordinate
#' @param v grid values for slope coordinate
#' @param T Truncation parameter for numerator must grow "sufficiently slowly with n"
#' @param TX Truncation parameter for denomerator must grow "sufficiently slowly with n"
#' @param Mn Trimming parameter "chosen to go to 0 slowly with n"
#'
#' @return updated list
#' @export
GK.control <- function(n, u = -20:20/10, v = -20:20/10, T = 3, TX = 10, Mn = 1/log(n)^2) {
list(u = u, v = v, T = T, TX = TX, Mn = Mn, tol = .Machine$double.eps^0.5)
}
16 changes: 16 additions & 0 deletions R/Horowitz93.R
@@ -0,0 +1,16 @@
#' Horowitz (1993) Modal Choice Data
#'
#' Modal choice data for journey to work in the Washington DC area
#' from the late 1960's. The variables are:
#' * `DCOST`: difference in cost of car versus transit (transit - car)
#' * `CARS`: number of cars at home
#' * `DOVTT`: difference in out of vehicle time (transit - car)
#' * `DIVTT`: difference in in vehicle time (transit - car)
#' * `DEPEND`: coded 1 if by car, 0 if by mass transit
#'
#' @format A data frame with 842 observations on 5 variables:
#' @source \url{https://www.gams.com/latest/gamslib_ml/libhtml/gamslib_mws.html}
#' @references Horowitz, J L, (1993) Semiparametric estimation of a work-trip mode choice model.
#' Journal of Econometrics, 58, 49-70.
#' @keywords datasets
"Horowitz93"
21 changes: 21 additions & 0 deletions R/KW.control.R
@@ -0,0 +1,21 @@
#' Control parameters for NPMLE of bivariate random coefficient binary response
#'
#' These parameters can be passed via the \code{...} argument of the \code{rcbr} function.
#' The first three arguments are only relevant if full cell enumeration is employed for
#' bivariate version of the NPMLE.
#'
#' @param uv matrix of evaluation points for potential mass points
#' @param u grid of evaluation points for potential mass points
#' @param v grid of evaluation points for potential mass points
#' @param initial initial point for cell enumeration algorithm
#' @param epsbound controls how close witness points can be to vertices of a cell
#' @param epstol zero tolerance for witness solutions
#' @param presolve controls whether Mosek does a presolve of the LP
#' @param verb controls verbosity of Mosek solver 0 implies it is quiet
#'
#' @return updated list
#' @export
KW.control <- function(uv = NULL, u = NULL, v = NULL, initial = c(0,0),
epsbound = 1, epstol = 1e-07, presolve = 1, verb = 0)
list(uv = uv, u = u, v = v, initial = initial, epsbound = epsbound,
epstol = epstol, presolve = presolve, verb = verb)
134 changes: 134 additions & 0 deletions R/KWDual.R
@@ -0,0 +1,134 @@
#' Dual optimization for Kiefer-Wolfowitz problems
#'
#' Interface function for calls to optimizer from various REBayes functions
#' There is currently only one option for the optimization that based on Mosek.
#' It relies on the \pkg{Rmosek} interface to R see installation instructions in
#' the Readme file in the inst directory of this package. This version of the function
#' is intended to work with versions of Mosek after 7.0. A more experimental option
#' employing the \pkg{pogs} package available from \url{https://github.com/foges/pogs}
#' and employing an ADMM (Alternating Direction Method of Multipliers) approach has
#' been deprecated, those interested could try installing version 1.4 of REBayes, and
#' following the instructions provided there.
#'
#' @param A Linear constraint matrix
#' @param d constraint vector
#' @param w weights for \code{x} should sum to one.
#' @param ... other parameters passed to control optimization: These may
#' include \code{rtol} the relative tolerance for dual gap convergence criterion,
#' \code{verb} to control verbosity desired from mosek, \code{verb = 0} is quiet,
#' \code{verb = 5} produces a fairly detailed iteration log,
#' \code{control} is a control list consisting of sublists \code{iparam},
#' \code{dparam}, and \code{sparam}, containing elements of various mosek
#' control parameters. See the Rmosek and Mosek manuals for further details.
#' A prime example is \code{rtol} which should eventually be deprecated and
#' folded into \code{control}, but will persist for a while for compatibility
#' reasons. The default for \code{rtol} is 1e-6, but in some cases it is
#' desirable to tighten this, say to 1e-10. Another example that motivated the introduction of
#' \code{control} would be \code{control = list(iparam = list(num_threads =
#' 1))}, which forces Mosek to use a single threaded process. The default
#' allows Mosek to uses multiple threads (cores) if available, which is
#' generally desirable, but may have unintended (undesirable) consequences when running
#' simulations on clusters.
#' @return Returns a list with components: \item{f}{dual solution vector, the
#' mixing density} \item{g}{primal solution vector, the mixture density
#' evaluated at the data points} \item{logLik}{log likelihood}
#' \item{status}{return status from Mosek}. Mosek termination messages are
#' treated as warnings from an R perspective since solutions producing, for example,
#' MSK_RES_TRM_STALL: The optimizer is terminated due to slow progress, may still
#' provide a satisfactory solution, especially when the return status variable is
#' "optimal".
#' @author R. Koenker
#' @references
#' Koenker, R and I. Mizera, (2013) ``Convex Optimization, Shape Constraints,
#' Compound Decisions, and Empirical Bayes Rules,'' \emph{JASA}, 109, 674--685.
#'
#' Mosek Aps (2015) Users Guide to the R-to-Mosek Optimization Interface,
#' \url{https://docs.mosek.com/8.1/rmosek/index.html}.
#'
#' Koenker, R. and J. Gu, (2017) REBayes: An {R} Package for Empirical Bayes Mixture Methods,
#' \emph{Journal of Statistical Software}, 82, 1--26.
#' @keywords nonparametrics
#' @importFrom methods as new
#' @import Matrix
#' @export
KWDual <- function(A, d, w, ...){
# Dual Kiefer-Wolfowitz MLE for Mixture Problems
#
# This version implements a class of density estimators solving:
#
# min_x {F(x) := sum -log (x_i)} s.t. A' x <= d, 0 <= x,
#
#
# where e.g. A = phi(outer(Y,g,"fun")), with Y data and g a grid on the support of Y,
# and "fun" is some function representing the dependence of the base distribution.
#
#-------------------------------------------------------------------------------------
#
# Roger Koenker
#
# First version:24 Feb 2012
# Revised: 10 Jun 2015 # Simplified signature
# Revised: 2 Jul 2015 # Added pogs method
# Revised 30 Jan 2019 # Removed pogs method, added Mosek V9 option
# Revised 27 Sep 2019 # changed stop to warning for stalled mosek

n <- nrow(A)
m <- ncol(A)
A <- t(A)
A <- Matrix::Matrix(A, sparse = TRUE)

dots <- list(...)

# Default mosek method
rtol <- ifelse(length(dots$rtol), dots$rtol, 1e-6)
verb <- ifelse(length(dots$verb), dots$verb, 0)
if(length(dots$control)) control <- dots$control
else control <- NULL

if(utils::packageVersion("Rmosek") < 9){
P <- list(sense = "min")
P$c <- rep(0, n)
P$A <- A
P$bc <- rbind(rep(0,m),d)
P$bx <- rbind(rep(0,n),rep(Inf,n))
opro <- matrix ( list (), nrow =5, ncol = n)
rownames ( opro ) <- c(" type ","j","f","g","h")

opro[1,] <- as.list(rep('log',n))
opro[2,] <- as.list(1:n)
opro[3,] <- as.list(-w)
opro[4,] <- as.list(rep(1,n))
opro[5,] <- as.list(rep(0,n))
P$scopt<- list(opro = opro)
P$dparam$intpnt_nl_tol_rel_gap <- rtol
}
else { #Mosek Version => 9
P <- list(sense = "min")
A0 <- Matrix::Matrix(0, m, n)
P$c <- c(rep(0,n), -w)
P$A <- cbind(A, A0)
P$bc <- rbind(rep(0, m), d)
P$bx <- rbind(c(rep(0, n), rep(-Inf,n)), rep(Inf, 2*n))
P$F <- Matrix::sparseMatrix(c(seq(1,3*n, by = 3), seq(3, 3*n, by = 3)),
c(1:n, (n+1):(2*n)), x = rep(1,2*n))
P$g <- rep(c(0,1,0), n)
P$cones <- matrix(list("PEXP", 3, NULL), nrow = 3, ncol = n)
rownames(P$cones) <- c("type", "dim", "conepar")
P$dparam$intpnt_co_tol_rel_gap <- rtol
}
if(length(control)){
P$iparam <- control$iparam
P$dparam <- control$dparam
P$sparam <- control$sparam
}
z <- Rmosek::mosek(P, opts = list(verbose = verb))
status <- z$sol$itr$solsta
if (status != "OPTIMAL")
warning(paste("Solution status = ", status))
f <- z$sol$itr$suc
if(min(f) < -rtol)
warning("estimated mixing distribution has some negative values: consider reducing rtol")
else f[f < 0] <- 0
g <- as.vector(t(A) %*% (f * d))
list(f = f, g = g, status = status)
}

0 comments on commit eb91609

Please sign in to comment.