/
s3-data.frame.R
158 lines (134 loc) · 5.51 KB
/
s3-data.frame.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
constructors$data.frame <- new.env()
#' Constructive options for class 'data.frame'
#'
#' These options will be used on objects of class 'data.frame'.
#'
#' Depending on `constructor`, we construct the object as follows:
#' * `"data.frame"` (default): Wrap the column definitions in a `data.frame()` call. If some
#' columns are lists or data frames, we wrap the column definitions in `tibble::tibble()`.
#' then use `as.data.frame()`.
#' * `"read.table"` : We build the object using `read.table()` if possible, or fall
#' back to `data.frame()`.
#' * `"next"` : Use the constructor for the next supported class. Call `.class2()`
#' on the object to see in which order the methods will be tried.
#' * `"list"` : Use `list()` and treat the class as a regular attribute.
#'
#' @param constructor String. Name of the function used to construct the object, see Details section.
#' @inheritParams opts_atomic
#' @return An object of class <constructive_options/constructive_options_data.frame>
#' @export
opts_data.frame <- function(constructor = c("data.frame", "read.table", "next", "list"), ...) {
.cstr_combine_errors(
constructor <- .cstr_match_constructor(constructor, "data.frame"),
check_dots_empty()
)
.cstr_options("data.frame", constructor = constructor)
}
#' @export
.cstr_construct.data.frame <- function(x, ...) {
opts <- .cstr_fetch_opts("data.frame", ...)
if (is_corrupted_data.frame(x) || opts$constructor == "next") return(NextMethod())
constructor <- constructors$data.frame[[opts$constructor]]
constructor(x, ...)
}
is_corrupted_data.frame <- function(x) {
if (!is.list(x) || any(sapply(unclass(x), is.null))) return(TRUE)
attrs <- attributes(x)
if (!all(c("names", "class", "row.names") %in% names(attrs))) return(TRUE)
if (!is.character(attrs$names) || length(attrs$names) != length(x)) return(TRUE)
elements_and_row_names_all_have_same_length <-
length(unique(vapply(c(list(attrs$row.names), x), NROW, integer(1)))) == 1
if (!elements_and_row_names_all_have_same_length) return(TRUE)
# this might not really be corruption but data.frame() and read.table()
# can't create columns that don't have a as.data.frame method
# so we fall back on the next class constructor for those
methods_ <- gsub("^as.data.frame.(.*)?\\*?$", "\\1", methods("as.data.frame"))
has_method <- function(x) {
any(class(x) %in% methods_)
}
if (!all(sapply(x, has_method))) return(TRUE)
FALSE
}
constructors$data.frame$list <- function(x, ...) {
.cstr_construct.list(x, ...)
}
constructors$data.frame$read.table <- function(x, ...) {
# Fall back on data.frame constructor if relevant
if (!nrow(x)) {
return(constructors$data.frame$data.frame(x, ...))
}
rn <- attr(x, "row.names")
numeric_row_names_are_not_default <- is.numeric(rn) && !identical(rn, seq_len(nrow(x)))
if (numeric_row_names_are_not_default) {
return(constructors$data.frame$data.frame(x, ...))
}
some_cols_are_not_atomic_vectors <-
any(!vapply(x, function(x) is.atomic(x) && is.vector(x), logical(1)))
if (some_cols_are_not_atomic_vectors) {
return(constructors$data.frame$data.frame(x, ...))
}
some_cols_are_problematic_char <-
any(vapply(x, FUN.VALUE = logical(1), FUN = function(x) {
is.character(x) &&
!any(is.na(suppressWarnings(as.numeric(x)))) &&
!grepl("[\"']", x)
}))
if (some_cols_are_problematic_char) {
return(constructors$data.frame$data.frame(x, ...))
}
# fill a data frame with deparsed values
code_df <- x
code_df[] <- lapply(x, function(x) {
if (is.character(x)) sprintf("'%s'", x) else sapply(x, .cstr_construct)
})
dbl_cols <- sapply(x, is.double)
# make sure double values will be read as double by adding a dot at the end of integerish values
# and align them
code_df[dbl_cols] <- lapply(code_df[dbl_cols], function(col) align_numerics(sub("^(\\d+)$", "\\1.", col)))
# include headers and row names in the table
code_df <- rbind(names(x), code_df)
rn <- rownames(x)
if (is.character(attr(x, "row.names"))) {
code_df <- cbind(c("", sprintf("'%s'", rownames(x))), code_df)
}
code_df[] <- lapply(code_df, format, justify = "right")
# collapse table into code
code <- paste(
c("read.table(header = TRUE, text = \"", do.call(paste, code_df), "\")"),
collapse = if (list(...)$one_liner) "\\n" else "\n"
)
# repair
repair_attributes_data.frame(x, code, ...)
}
align_numerics <- function(x) {
dot_pos <- unlist(gregexpr(".", x, fixed = TRUE))
dot_pos[dot_pos == -1] <- NA
digits <- nchar(x) - dot_pos
digits[is.na(digits)] <- 0
paste0(x, strrep(" ", max(digits) - digits))
}
constructors$data.frame$data.frame <- function(x, ...) {
# Fall back on list constructor if relevant
df_has_list_cols <- any(sapply(x, function(col) is.list(col) && ! inherits(col, "AsIs")))
if (df_has_list_cols) return(.cstr_construct.list(x, ...))
args <- x
# include row.names arg only if necessary
rn <- attr(x, "row.names")
if (!identical(rn, seq_len(nrow(x)))) args <- c(args, list(row.names = rn))
# include check.names arg only if necessary
if (any(!is_syntactic(names(x)))) args <- c(args, list(check.names = FALSE))
# build code recursively
code <- .cstr_apply(args, fun = "data.frame", ...)
# repair
repair_attributes_data.frame(x, code, ...)
}
repair_attributes_data.frame <- function(x, code, ..., pipe = NULL) {
ignore <- "row.names"
if (identical(names(x), character())) ignore <- c(ignore, "names")
.cstr_repair_attributes(
x, code, ...,
pipe = pipe,
ignore = ignore,
idiomatic_class = "data.frame"
)
}