/
ipumsr-package.R
187 lines (162 loc) · 4.71 KB
/
ipumsr-package.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# This file is part of the ipumsr R package created by IPUMS.
# For copyright and licensing information, see the NOTICE and LICENSE files
# in this project's top-level directory, and also on-line at:
# https://github.com/ipums/ipumsr
# Imports ------------------
#' @keywords internal
#' @importFrom R6 R6Class
#' @importFrom utils packageVersion
#' @importFrom utils tail
#' @importFrom dplyr %>%
#' @import rlang
"_PACKAGE"
# Re-exports ---------------
# --- Haven ----
# Import and reexport helpful label functions from haven
#' @importFrom haven as_factor
#' @export
haven::as_factor
#' @importFrom haven zap_labels
#' @export
haven::zap_labels
#' @importFrom haven is.labelled
#' @export
haven::is.labelled
#' @importFrom zeallot %<-%
#' @export
zeallot::`%<-%`
# --- readr import diagnostics ---
#' @importFrom readr problems
#' @export
readr::problems
#' @importFrom readr spec
#' @export
readr::spec
# ---- tidyselect select helpers ----
#' tidyselect selection language in ipumsr
#'
#' @description
#' Slightly modified implementation of tidyselect
#' [selection language][tidyselect::language] in ipumsr.
#'
#' ## Syntax
#' In general, the selection language in ipumsr operates the same as in
#' tidyselect.
#'
#' Where applicable, variables can be selected with:
#'
#' - A character vector of variable names (`c("var1", "var2")`)
#' - A bare vector of variable names (`c(var1, var2)`)
#' - A selection helper from tidyselect (`starts_with("var")`). See below for
#' a list of helpers.
#'
#' ## Primary differences
#' - tidyselect selection is generally intended for use with column variables
#' in data.frame-like objects. In contrast, ipumsr allows selection language
#' syntax in other cases as well (for instance, when selecting files
#' from within a .zip archive). ipumsr functions will indicate whether they
#' support the selection language.
#' - Selection with [`where()`][tidyselect::where] is not consistently
#' supported.
#'
#' ## Selection helpers (from tidyselect)
#'
#' - `var1`:`var10`: variables lying between `var1` on the left and `var10`
#' on the right.
#' - `starts_with("a")`: names that start with `"a"`
#' - `ends_with("z")`: names that end with `"z"`
#' - `contains("b")`: names that contain `"b"`
#' - `matches("x.y")`: names that match regular expression `x.y`
#' - `num_range(x, 1:4)`: names following the pattern `x1, x2, ..., x4`
#' - `all_of(vars)`/`any_of(vars)`: matches names stored in the character vector
#' `vars`. `all_of(vars)` will error if the variables aren't present;
#' `any_of(vars)` will match just the variables that exist.
#' - `everything()`: all variables
#' - `last_col()`: furthest column to the right
#'
#' Operators for combining those selections:
#'
#' - `!selection`: only variables that don't match `selection`
#' - `selection1 & selection2`: only variables included in both `selection1`
#' and `selection2`
#' - `selection1 | selection2`: all variables that match either `selection1` or
#' `selection2`
#'
#' @name selection_language
#'
#' @keywords internal
#'
#' @examples
#' cps_file <- ipums_example("cps_00157.xml")
#'
#' # Load 3 variables by name
#' read_ipums_micro(
#' cps_file,
#' vars = c("YEAR", "MONTH", "PERNUM"),
#' verbose = FALSE
#' )
#'
#' # "Bare" variables are supported
#' read_ipums_micro(
#' cps_file,
#' vars = c(YEAR, MONTH, PERNUM),
#' verbose = FALSE
#' )
#'
#' # Standard tidyselect selectors are also supported
#' read_ipums_micro(cps_file, vars = starts_with("ASEC"), verbose = FALSE)
#'
#' # Selection methods can be combined
#' read_ipums_micro(
#' cps_file,
#' vars = c(YEAR, MONTH, contains("INC")),
#' verbose = FALSE
#' )
#'
#' read_ipums_micro(
#' cps_file,
#' vars = starts_with("S") & ends_with("P"),
#' verbose = FALSE
#' )
#'
#' # Other selection arguments also support this syntax.
#' # For instance, load a particular file based on a tidyselect match:
#' read_nhgis(
#' ipums_example("nhgis0731_csv.zip"),
#' file_select = contains("nominal_state"),
#' verbose = FALSE
#' )
NULL
#' @importFrom tidyselect starts_with
#' @export
tidyselect::starts_with
#' @importFrom tidyselect ends_with
#' @export
tidyselect::ends_with
#' @importFrom tidyselect contains
#' @export
tidyselect::contains
#' @importFrom tidyselect matches
#' @export
tidyselect::matches
#' @importFrom tidyselect num_range
#' @export
tidyselect::num_range
#' @importFrom tidyselect one_of
#' @export
tidyselect::one_of
#' @importFrom tidyselect all_of
#' @export
tidyselect::all_of
#' @importFrom tidyselect any_of
#' @export
tidyselect::any_of
#' @importFrom tidyselect everything
#' @export
tidyselect::everything
#' @importFrom tidyselect last_col
#' @export
tidyselect::last_col
# --- Lifecycle ---
#' @importFrom lifecycle deprecated
lifecycle::deprecated