-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtype_convert.R
112 lines (95 loc) · 3.25 KB
/
type_convert.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#' Re-convert character columns in existing data frame
#'
#' This is useful if you need to do some manual munging - you can read the
#' columns in as character, clean it up with (e.g.) regular expressions and
#' then let readr take another stab at parsing it. The name is a homage to
#' the base [utils::type.convert()].
#'
#' @param df A data frame.
#' @param col_types One of `NULL`, a [cols()] specification, or
#' a string. See `vignette("readr")` for more details.
#'
#' If `NULL`, column types will be imputed using all rows.
#' @inheritParams guess_parser
#' @note `type_convert()` removes a 'spec' attribute,
#' because it likely modifies the column data types.
#' (see [spec()] for more information about column specifications).
#' @export
#' @examples
#' df <- data.frame(
#' x = as.character(runif(10)),
#' y = as.character(sample(10)),
#' stringsAsFactors = FALSE
#' )
#' str(df)
#' str(type_convert(df))
#'
#' df <- data.frame(x = c("NA", "10"), stringsAsFactors = FALSE)
#' str(type_convert(df))
type_convert <- function(df, col_types = NULL, na = c("", "NA"), trim_ws = TRUE,
locale = default_locale(), guess_integer = FALSE) {
stopifnot(is.data.frame(df))
is_character <- vapply(df, is.character, logical(1))
if (!any(is_character)) {
warning("`type_convert()` only converts columns of type 'character'.\n- `df` has no columns of type 'character'", call. = FALSE)
}
char_cols <- df[is_character]
col_types <- keep_character_col_types(df, col_types)
guesses <- lapply(
char_cols,
guess_parser,
locale = locale,
na = na,
guess_integer = guess_integer
)
specs <- col_spec_standardise(
col_types = col_types,
col_names = names(char_cols),
guessed_types = guesses
)
## if (is.null(col_types) && !is_testing()) {
if (is.null(col_types)) {
show_cols_spec(specs)
}
df[is_character] <- lapply(seq_along(char_cols), function(i) {
type_convert_col(char_cols[[i]], specs$cols[[i]], which(is_character)[i],
locale_ = locale, na = na, trim_ws = trim_ws
)
})
attr(df, "spec") <- NULL
df
}
keep_character_col_types <- function(df, col_types) {
if (is.null(col_types)) {
return(col_types)
}
is_character <- vapply(df, is.character, logical(1))
if (is.character(col_types)) {
if (length(col_types) != 1) {
stop("`col_types` must be a single string.", call. = FALSE)
}
if (nchar(col_types) != length(df)) {
stop(
"`df` and `col_types` must have consistent lengths:\n",
" * `df` has length ", length(df), "\n",
" * `col_types` has length ", nchar(col_types),
call. = FALSE
)
}
idx <- which(is_character)
col_types <- paste(substring(col_types, idx, idx), collapse = "")
return(col_types)
}
char_cols <- names(df)[is_character]
col_types$cols <- col_types$cols[names(col_types$cols) %in% char_cols]
col_types
}
#' @rdname parse_guess
#' @param guess_integer If `TRUE`, guess integer types for whole numbers, if
#' `FALSE` guess numeric type for all numbers.
#' @export
guess_parser <- function(x, locale = default_locale(), guess_integer = FALSE, na = c("", "NA")) {
x[x %in% na] <- NA_character_
stopifnot(is.locale(locale))
collectorGuess(x, locale, guessInteger = guess_integer)
}