Skip to content

Commit

Permalink
Merge pull request #61 from ipums/ipums-website-bug
Browse files Browse the repository at this point in the history
Fix `ipums_website()` bugs
  • Loading branch information
robe2037 committed Oct 12, 2023
2 parents 187c89b + 78628bb commit dcd298b
Show file tree
Hide file tree
Showing 8 changed files with 581 additions and 223 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ S3method(ipums_var_info,default)
S3method(ipums_var_info,ipums_ddi)
S3method(ipums_var_info,list)
S3method(ipums_var_label,default)
S3method(ipums_website,default)
S3method(ipums_website,character)
S3method(ipums_website,ipums_ddi)
S3method(print,ipums_ddi)
S3method(print,ipums_extract)
Expand Down
23 changes: 8 additions & 15 deletions R/api_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,14 @@
#' @examples
#' ipums_data_collections()
ipums_data_collections <- function() {
tibble::tribble(
~collection_name, ~collection_type, ~code_for_api, ~api_support,
"IPUMS USA", "microdata", "usa", TRUE,
"IPUMS CPS", "microdata", "cps", TRUE,
"IPUMS International", "microdata", "ipumsi", TRUE,
"IPUMS NHGIS", "aggregate data", "nhgis", TRUE,
"IPUMS IHGIS", "aggregate data", "ihgis", FALSE,
"IPUMS AHTUS", "microdata", "ahtus", FALSE,
"IPUMS MTUS", "microdata", "mtus", FALSE,
"IPUMS ATUS", "microdata", "atus", FALSE,
"IPUMS DHS", "microdata", "dhs", FALSE,
"IPUMS Higher Ed", "microdata", "highered", FALSE,
"IPUMS MEPS", "microdata", "meps", FALSE,
"IPUMS NHIS", "microdata", "nhis", FALSE,
"IPUMS PMA", "microdata", "pma", FALSE
purrr::map_dfr(
proj_config(),
~ tibble::tibble(
collection_name = .x$proj_name,
collection_type = .x$collection_type,
code_for_api = .x$code_for_api,
api_support = .x$api_support
)
)
}

Expand Down
329 changes: 249 additions & 80 deletions R/proj_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,99 +26,268 @@
# NHGIS Ex: https://data2.nhgis.org/main (can't get to specific variable...)

# Project specific configurations ------
proj_config <- list()

proj_config[["IPUMS-USA"]] <- list(
var_url = TRUE,
url_function = function(var) {
paste0("https://usa.ipums.org/usa-action/variables/", var)
}
)

proj_config[["IPUMS-CPS"]] <- list(
var_url = TRUE,
url_function = function(var) {
paste0("https://cps.ipums.org/cps-action/variables/", var)
}
)

proj_config[["IPUMS-INTERNATIONAL"]] <- list(
var_url = TRUE,
url_function = function(var) {
paste0(
"https://international.ipums.org/international-action/variables/",
var
#' List IPUMS projects with relevant metadata
#'
#' @description
#' Consolidate general information about all current IPUMS projects, including
#' project names, collection types, API codes, API support, and website URLs.
#'
#' If new IPUMS projects are added, supported by the API, or get updated
#' URLs, these paramteres can be adjusted here.
#'
#' For use in `ipums_data_collections()`, `ipums_website()`, and `ipums_view()`
#'
#' @noRd
proj_config <- function() {
list(
new_proj_config(
"IPUMS USA",
url_name = "usa",
collection_type = "microdata",
api_support = TRUE
),
new_proj_config(
"IPUMS CPS",
url_name = "cps",
collection_type = "microdata",
api_support = TRUE
),
new_proj_config(
"IPUMS International",
url_name = "international",
collection_type = "microdata",
code_for_api = "ipumsi",
api_support = TRUE
),
new_proj_config(
"IPUMS NHGIS",
url_name = "nhgis",
collection_type = "aggregate data",
api_support = TRUE,
has_var_url = FALSE,
home_url = "https://nhgis.org/",
var_url = function(var = NULL) {
"https://data2.nhgis.org/main/"
}
),
new_proj_config(
"IPUMS IHGIS",
url_name = "ihgis",
collection_type = "aggregate data",
has_var_url = FALSE,
home_url = "https://ihgis.ipums.org/",
var_url = function(var = NULL) {
"https://data.ihgis.ipums.org/main"
}
),
new_proj_config(
"IPUMS ATUS",
url_name = "atus",
collection_type = "microdata",
home_url = "https://www.atusdata.org/atus/",
var_url = function(var = "group") {
get_var_url("atusdata", "atus", var = var, ipums_domain = FALSE)
}
),
new_proj_config(
"IPUMS AHTUS",
url_name = "ahtus",
collection_type = "microdata",
home_url = "https://www.ahtusdata.org/ahtus/",
var_url = function(var = "group") {
get_var_url("ahtusdata", "ahtus", var = var, ipums_domain = FALSE)
}
),
new_proj_config(
"IPUMS MTUS",
url_name = "mtus",
collection_type = "microdata",
home_url = "https://www.mtusdata.org/mtus/",
var_url = function(var = "group") {
get_var_url("mtusdata", "mtus", var = var, ipums_domain = FALSE)
}
),
new_proj_config(
"IPUMS DHS",
url_name = "idhs",
collection_type = "microdata",
code_for_api = "dhs",
home_url = "https://www.idhsdata.org/",
var_url = function(var = "group") {
get_var_url("idhsdata", "idhs", var = var, ipums_domain = FALSE)
}
),
new_proj_config(
"IPUMS PMA",
url_name = "pma",
collection_type = "microdata"
),
new_proj_config(
"IPUMS MICS",
url_name = "mics",
collection_type = "microdata"
),
new_proj_config(
"IPUMS NHIS",
url_name = "nhis",
collection_type = "microdata"
),
new_proj_config(
"IPUMS MEPS",
url_name = "meps",
collection_type = "microdata"
),
new_proj_config(
"IPUMS Higher Ed",
url_name = "highered",
collection_type = "microdata"
)
}
)
)
}

# Currently no DDI's for DHS so it is not supported
proj_config[["IPUMS-DHS"]] <- list(
var_url = TRUE,
url_function = function(var) {
paste0("https://www.idhsdata.org/idhs-action/variables/", var)
}
)
default_config <- function() {
new_proj_config(
proj_name = "IPUMS",
url_name = NULL,
collection_type = NULL,
code_for_api = NULL,
api_support = NULL,
has_var_url = FALSE,
home_url = "https://www.ipums.org",
var_url = function(var = NULL) {
"https://www.ipums.org"
}
)
}

proj_config[["NHGIS"]] <- list(
var_url = FALSE,
url_function = function(var) {
paste0("https://data2.nhgis.org/main")
}
)
#' Specify the configuration for a new IPUMS project
#'
#' @param proj_name Name of the IPUMS project. Should generally be consistent
#' with the names found in the DDI files for that project, if any.
#' @param url_name Name of the project as used in that project's website URL.
#' For instance, IPUMS International uses `"international"` in its URLs.
#' @param collection_type Either `"microdata"` or `"aggregate data"` indicating
#' the type of data this collection provides.
#' @param code_for_api The name of the project used when interacting with the
#' IPUMS API (for collections that are supported by the API). For instance,
#' `"ipumsi"` is used when submitting extract requests to the API for IPUMS
#' International.
#'
#' For collections that are not yet supported by the API, a similar placeholder
#' value is used.
#' @param api_support Logical indicating whether the collection is supported by
#' the IPUMS API.
#' @param has_var_url Logical indicating whether the collection has
#' variable-specific URLs.
#' @param home_url URL for the project's homepage. If `NULL`, is generated
#' from the provided `url_name`, with the form
#' `"https://{url_name}.ipums.org"`
#' @param var_url Function of `var` that returns a variable-specific URL for
#' the project. For projects that use standard IPUMS URL constructions, use
#' `get_var_url()`. For projects that do not use standard
#' URL constructions, you can write your own function that returns an
#' appropriate URL.
#'
#' @noRd
new_proj_config <- function(proj_name,
url_name,
collection_type,
code_for_api = url_name,
api_support = FALSE,
has_var_url = TRUE,
home_url = NULL,
var_url = NULL) {
list(
proj_name = proj_name,
url_name = url_name,
collection_type = collection_type,
code_for_api = code_for_api,
api_support = api_support,
has_var_url = has_var_url,
home_url = home_url %||% paste0("https://", url_name, ".ipums.org/"),
var_url = var_url %||% function(var = NULL) {
get_var_url(url_name, var = var)
}
)
}

proj_config[["ATUS-X"]] <- list(
var_url = TRUE,
url_function = function(var) {
paste0("https://atus.ipums.org/atus-action/variables/", var)
}
)
# Get the configuration for a specified IPUMS project
get_proj_config <- function(proj, default_if_missing = TRUE, verbose = TRUE) {
proj <- get_proj_name(proj)

proj_config[["AHTUS-X"]] <- list(
var_url = TRUE,
url_function = function(var) {
paste0("https://ahtus.ipums.org/ahtus-action/variables/", var)
}
)
config <- purrr::flatten(
purrr::keep(
proj_config(),
~ tolower(.x$proj_name) == tolower(proj)
)
)

proj_config[["MTUS-X"]] <- list(
var_url = TRUE,
url_function = function(var) {
paste0("https://mtus.ipums.org/mtus-action/variables/", var)
if (rlang::is_empty(config)) {
if (!default_if_missing) {
rlang::abort(c(
"Project not found. Available projects:",
paste0("\"", ipums_data_collections()$collection_name, "\"")
))
} else {
if (verbose) {
rlang::warn("Project not found. Redirecting to IPUMS homepage.")
}
config <- default_config()
}
}
)

proj_config[["NHIS"]] <- list(
var_url = TRUE,
url_function = function(var) {
paste0("https://ihis.ipums.org/ihis-action/variables/", var)
}
)
config
}

proj_config[["HIGHER ED"]] <- list(
var_url = TRUE,
url_function = function(var) {
paste0("https://highered.ipums.org/highered-action/variables/", var)
}
)
#' Construct a variable-specific URL for an IPUMS project
#'
#' @param domain_proj Project name as used in the domain of the URL. For
#' instance, IPUMS USA uses `"usa"` (for `"usa.ipums.org"`), while IPUMS ATUS
#' uses `"atusdata"`.
#' @param path_proj Project name as used in the path of the URL. For instance,
#' IPUMS USA uses `"usa"` (for `"/usa-action/variables"`). Defaults to
#' `domain_proj`.
#' @param var Variable to include in the URL. If `NULL`, uses `"group"`, which
#' navigates to the general variable-selection webpage.
#' @param ipums_domain Logical indicating whether to include `"ipums"` in the
#' domain name of the URL. For instance, IPUMS USA uses `"usa.ipums.org"`,
#' while IPUMS ATUS uses `"atusdata.org"`.
#'
#' @noRd
get_var_url <- function(domain_proj,
path_proj = domain_proj,
var = NULL,
ipums_domain = TRUE) {
var <- var %||% "group"

default_config <- list(
var_url = FALSE,
url_function = function(var) {
"https://www.ipums.org"
if (ipums_domain) {
ipums_path <- ".ipums.org/"
} else {
ipums_path <- ".org/"
}
)

get_proj_config <- function(proj) {
out <- proj_config[[toupper(proj)]] # Ignore case
if (is.null(out)) out <- default_config
out
paste0(
"https://", domain_proj, ipums_path,
path_proj, "-action/variables/", var
)
}

# TODO: These project names are somewhat out of date and should be reconciled
# with language in rest of package. Also we should probably support the
# lowercase collection names used in API here. This can be included when we fix
# ipums_website() to be platform agnostic and include tests
all_proj_names <- function() {
names(proj_config)
# Helper to ignore case in project names and allow use of API code names
# instead of full-length project names
get_proj_name <- function(proj) {
collections <- ipums_data_collections()
proj <- tolower(proj)

if (proj %in% collections$code_for_api) {
proj <- collections$collection_name[collections$code_for_api == proj]
} else {
# Included for compatibility with previous project names, which used
# hyphens. Current naming conventions do not use hyphens, though
# old DDI files (and current ones for IPUMS International) may still
# include hyphens.
proj <- fostr_replace_all(proj, "-", " ")
}

toupper(proj)
}
Loading

0 comments on commit dcd298b

Please sign in to comment.