-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit aa964d1
Showing
17 changed files
with
1,785 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
Package: jointCalib | ||
Type: Package | ||
Title: A Joint Calibration of Totals and Quantiles | ||
Version: 0.1.0 | ||
Authors@R: | ||
c(person(given = "Maciej", | ||
family = "Beręsewicz", | ||
role = c("aut", "cre"), | ||
email = "maciej.beresewicz@ue.poznan.pl", | ||
comment = c(ORCID = "0000-0002-8281-4301"))) | ||
Description: A small package containing functions to perform a joint calibration of totals and quantiles. The calibration for totals is based on Deville and Särndal (1992) <doi:10.1080/01621459.1992.10475217>, the calibration for quantiles is based on Harms and Duchesne (2006) <https://www150.statcan.gc.ca/n1/en/catalogue/12-001-X20060019255>. The package uses standard calibration via the 'survey', 'sampling' or 'laeken' packages. In addition, entropy balancing via the 'ebal' package and empirical likelihood based on codes from Wu (2005) <https://www150.statcan.gc.ca/n1/pub/12-001-x/2005002/article/9051-eng.pdf> can be used. See the paper by Beręsewicz and Szymkowiak (2023) for details <arXiv:2308.13281>. | ||
License: GPL-3 | ||
Encoding: UTF-8 | ||
RdMacros: mathjaxr | ||
Depends: R (>= 3.5.0) | ||
URL: https://github.com/ncn-foreigners/jointCalib, | ||
https://ncn-foreigners.github.io/jointCalib/ | ||
BugReports: https://github.com/ncn-foreigners/jointCalib/issues | ||
RoxygenNote: 7.2.3 | ||
Imports: laeken, sampling, mathjaxr, survey, MASS, ebal | ||
NeedsCompilation: no | ||
Packaged: 2023-09-06 11:40:07 UTC; berenz | ||
Author: Maciej Beręsewicz [aut, cre] (<https://orcid.org/0000-0002-8281-4301>) | ||
Maintainer: Maciej Beręsewicz <maciej.beresewicz@ue.poznan.pl> | ||
Repository: CRAN | ||
Date/Publication: 2023-09-07 08:30:06 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
fbbe99afadea6d9109830a8920fb1968 *DESCRIPTION | ||
b197971c633ff14b3071fbf4d88145db *NAMESPACE | ||
549d963935e251710f36b5a72d3283f0 *R/calib_el.R | ||
81c4bd6b4fc0f032de105a8a5763c859 *R/controls.R | ||
627598dc89c32650bdd2488825976542 *R/joint_calib.R | ||
9242b8b778c5b48495aa337d77a2ff95 *R/joint_calib_create_matrix.R | ||
e7a2d4e989792d5cc42a4fcb8cc85914 *R/methods.R | ||
a7252e11d3b0670b014236e06a3f5ef7 *README.md | ||
99ef76ec5b9583fb2bb39fe77755dbf9 *build/jointCalib.pdf | ||
62ca2f2beebe8ef91134883c48a4316e *build/stage23.rdb | ||
2f673b77995fc83d38014a93d9b41e0c *inst/tinytest/test_create_matrix.R | ||
5e6abf714c16b892d9402fe761e42de8 *inst/tinytest/test_joint_calib.R | ||
6b7b01cf24dfc7b3b35d5dcdd8e278de *man/calib_el.Rd | ||
e71a8b79de9c07fba9c3cdbbafccd646 *man/control_calib.Rd | ||
713ccc40a6ef169421c3390a5f2d297c *man/joint_calib.Rd | ||
1d8fa83949efb8df2981ae1ee6bebdd9 *man/joint_calib_create_matrix.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
S3method(print,jointCalib) | ||
export(calib_el) | ||
export(control_calib) | ||
export(joint_calib) | ||
export(joint_calib_create_matrix) | ||
import(mathjaxr) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#' @import mathjaxr | ||
NULL | ||
#' @title An internal function for calibration of weights using empirical likelihood method | ||
#' | ||
#' @author Maciej Beręsewicz based on Wu (2005) and Zhang, Han and Wu (2022) | ||
#' | ||
#' \loadmathjax | ||
#' @description | ||
#' \code{calib_el} performs calibration using empirical likelihood (EL) method. The function is taken from Wu (2005), if algorithm has problem with convergence codes from Zhang, Han and Wu (2022) using \code{constrOptim} is used. | ||
#' | ||
#' In (pseudo) EL the following (pseudo) EL function is maximized | ||
#' | ||
#' \mjsdeqn{\sum_{i \in r} d_i\log(p_i),} | ||
#' | ||
#' under the following constraint | ||
#' | ||
#' \mjsdeqn{\sum_{i \in r} p_i = 1,} | ||
#' | ||
#' with constraints on quantiles (with notation as in Harms and Duchesne (2006)) | ||
#' | ||
#' \mjsdeqn{\sum_{i \in r} p_i(a_{i} - \alpha/N) = 0,} | ||
#' | ||
#' where \mjseqn{a_{i}} is created using \code{joint_calib_create_matrix} function, and possibly means | ||
#' | ||
#' \mjsdeqn{\sum_{i \in r} p_i(x_{i} - \mu_{x}) = 0,} | ||
#' | ||
#' where \mjseqn{\mu_{x}} is known population mean of X. For simplicity of notation we assume only one quantile and one mean is known. This can be generalized to multiple quantiles and means. | ||
#' | ||
#' @param X matrix of variables for calibration of quantiles and totals (first column should be intercept), | ||
#' @param d initial d-weights for calibration (e.g. design-weights), | ||
#' @param totals vector of totals (where 1 element is the population size), | ||
#' @param maxit a numeric value giving the maximum number of iterations, | ||
#' @param tol the desired accuracy for the iterative procedure, | ||
#' @param eps the desired accuracy for computing the Moore-Penrose generalized inverse (see [MASS::ginv()]), | ||
#' @param ... arguments passed to [stats::optim] via [stats::constrOptim]. | ||
#' | ||
#' @references | ||
#' Wu, C. (2005). Algorithms and R codes for the pseudo empirical likelihood method in survey sampling. Survey Methodology, 31(2), 239 (code is taken from \url{https://sas.uwaterloo.ca/~cbwu/Rcodes/LagrangeM2.txt}). | ||
#' | ||
#' Zhang, S., Han, P., and Wu, C. (2023) Calibration Techniques Encompassing Survey Sampling, Missing Data Analysis and Causal Inference. International Statistical Review, 91: 165–192. https://doi.org/10.1111/insr.12518 (code is taken from Supplementary Materials). | ||
#' | ||
#' @return Returns a vector of empirical likelihood g-weights | ||
#' | ||
#' @examples | ||
#' ## generate data based on Haziza and Lesage (2016) | ||
#' set.seed(123) | ||
#' N <- 1000 | ||
#' x <- runif(N, 0, 80) | ||
#' y <- exp(-0.1 + 0.1*x) + rnorm(N, 0, 300) | ||
#' p <- rbinom(N, 1, prob = exp(-0.2 - 0.014*x)) | ||
#' totals_known <- c(N=N, x=sum(x)) | ||
#' df <- data.frame(x, y, p) | ||
#' df_resp <- df[df$p == 1, ] | ||
#' df_resp$d <- N/nrow(df_resp) | ||
#' res <- calib_el(X = model.matrix(~x, df_resp), | ||
#' d = df_resp$d, | ||
#' totals = totals_known) | ||
#' data.frame(known = totals_known, estimated=colSums(res*df_resp$d*model.matrix(~x, df_resp))) | ||
#' | ||
#' @export | ||
calib_el <- function(X, d, totals, maxit=50, tol=1e-8, eps = .Machine$double.eps, ...) { | ||
n_col <- NCOL(X[, -1]) | ||
n_row <- NROW(X) | ||
N <- totals[1] | ||
mu <- totals[-1]/N | ||
U <- X[, -1] - rep(1, n_row) %*% t(mu) | ||
lambda <- numeric(n_col) | ||
dif <- 1 | ||
k <- 0 | ||
while (dif > tol & k <= maxit) { | ||
p <- (1 + U %*% lambda) | ||
D1 <- t(U) %*% as.vector(d / p) | ||
DD <- -t(U) %*% (as.vector(d / p^2) * U) | ||
D2 <- try(MASS::ginv(DD, tol = eps) %*% D1) | ||
if (class(D2)[1] == "try-error") break | ||
dif <- max(abs(D2)) | ||
################################################ | ||
## here we use the same stoping rule as in sampling::calib | ||
## tr <- crossprod(X, d/p) | ||
## dif <- max(abs(tr - totals)/totals) | ||
################################################ | ||
rule <- 1 | ||
while (rule > 0) { | ||
rule <- 0 | ||
if (min(1 + t(lambda - D2) %*% t(U)) <= 0) | ||
rule <- rule + 1 | ||
if (rule > 0) | ||
D2 <- D2 / 2 | ||
} | ||
lambda <- lambda - D2 | ||
|
||
k <- k + 1 | ||
} | ||
if (k >= maxit | class(D2)[1] == "try-error") { | ||
|
||
message("Maximum iteration exceeded or MASS::ginv resulted in error. Changing to stats::constrOptim.") | ||
|
||
ll_fun <- function(d, rho, g_hat) { | ||
-sum(d*log(1 + g_hat %*% rho)) | ||
} | ||
ll_grad <- function(d, rho, g_hat) { | ||
-colSums(d*g_hat / c(1 + g_hat %*% rho)) | ||
} | ||
|
||
rho_hat <- | ||
stats::constrOptim( | ||
theta = rep(0, n_col), | ||
f = ll_fun, | ||
grad = ll_grad, | ||
method = "BFGS", | ||
ui = U, | ||
ci = rep(1 / n_row - 1, n_row), | ||
g_hat = U, | ||
d = d, | ||
control = list(maxit = maxit, ...) | ||
) | ||
lambda <- rho_hat$par | ||
} | ||
|
||
gweight <- as.numeric(1/(1 + U %*% lambda)) | ||
return(gweight) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#' @title control parameters | ||
#' @author Maciej Beręsewicz | ||
#' | ||
#' @description | ||
#' \code{control_calib} is function that contains control parameters for \code{joint_calib_create_matrix} | ||
#' | ||
#' @param interpolation type of interpolation: \code{logit} or \code{linear}, | ||
#' @param logit_const constant for \code{logit} interpolation, | ||
#' @param survey_sparse whether to use sparse matrices via \code{Matrix} package in [survey::grake()] (currently not supported), | ||
#' @param ebal_constraint_tolerance This is the tolerance level used by ebalance to decide if the moments in the reweighted data are equal to the target moments (see [ebal::ebalance()]), | ||
#' @param ebal_print_level Controls the level of printing: 0 (normal printing), 2 (detailed), and 3 (very detailed) (see [ebal::ebalance()]). | ||
#' | ||
#' @return a list with parameters | ||
#' | ||
#' @export | ||
control_calib <- function(interpolation = c("logit", "linear"), | ||
logit_const = -1000, | ||
survey_sparse = FALSE, | ||
ebal_constraint_tolerance=1, | ||
ebal_print_level=0) { | ||
|
||
if (missing(interpolation)) interpolation <- "logit" | ||
|
||
return(list(interpolation = interpolation, | ||
logit_const = logit_const, | ||
survey_sparse = survey_sparse, | ||
ebal_constraint_tolerance = ebal_constraint_tolerance, | ||
ebal_print_level = ebal_print_level)) | ||
} |
Oops, something went wrong.