-
Notifications
You must be signed in to change notification settings - Fork 6
/
export.R
340 lines (300 loc) · 13.4 KB
/
export.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
## Export functions =======
#' Export data to R Data Archive (.rda) (deprecated)
#'
#' This function is deprecated. Please use \code{\link{iso_save}} instead to save collections of isofiles.
#'
#' @inheritParams iso_get_raw_data
#' @param filepath the path (folder and filename) to the export file. The correct file extension is automatically added if not already in the filename, i.e. filename can be provided with or without extension.
#' @family export functions
#' @return returns the iso_files object invisibly for use in pipelines
#' @export
iso_export_to_rda <- function(iso_files, filepath, quiet = default(quiet)) {
# throw deprecation warning
log_warning("'iso_export_to_rda' is deprecated and will call 'iso_save()'. Please call 'iso_save()' directly to avoid this warning.")
# call iso_save
iso_save(iso_files, filepath, quiet)
}
#' Export data to Excel
#'
#' This function exports the passed in iso_files to Excel. The different kinds of data (raw data, file info, methods info, etc.) are exported to separate tabs within the excel file. Use the various \code{include_...} parameters to specify what information to include. Note that in rare instances where vectorized data columns exist in the file information (e.g. measurement_info), they are concatenated with ', ' in the excel export.
#'
#' @inheritParams iso_save
#' @inheritParams iso_get_all_data
#' @param include_method_info deprecated in favor of the more specific include_standards and include_resistors
#' @family export functions
#' @return returns the iso_files object invisibly for use in pipelines
#' @export
iso_export_to_excel <- function(
iso_files, filepath,
include_file_info = everything(), include_raw_data = everything(),
include_standards = !!enexpr(include_method_info), include_resistors = !!enquo(include_method_info),
include_vendor_data_table = everything(), include_problems = everything(),
with_explicit_units = FALSE,
include_method_info = everything(),
with_ratios = NULL,
quiet = default(quiet)) {
# safety checks
if(!iso_is_object(iso_files)) stop("can only export iso files or lists of iso files", call. = FALSE)
export_iso_files <- iso_as_file_list(iso_files)
filepath <- get_excel_export_filepath(export_iso_files, filepath)
# info message
if (!quiet) {
sprintf("Info: exporting data from %d iso_files into Excel '%s'", length(export_iso_files),
str_replace(filepath, "^\\.(/|\\\\)", "")) %>% message()
}
# include method info message
if (!missing(include_method_info)) {
warning("the 'include_method_info' parameter was deprecated in favor of the more specific 'include_resistors' and 'include_standards' parameters. Please use those directly instead in the future.", immediate. = TRUE, call. = FALSE)
}
# deprecated parameter
if (!missing(with_ratios)) {
warning("the 'with_ratios' parameter is deprecated, please use the column selection parameter 'include_standards' to explicitly include or exclude ratio columns", immediate. = TRUE, call. = FALSE)
}
# get all data
all_data <- iso_get_all_data(
export_iso_files,
include_file_info = !!enexpr(include_file_info),
include_raw_data = !!enexpr(include_raw_data),
include_standards = !!enexpr(include_standards),
include_resistors = !!enexpr(include_resistors),
include_vendor_data_table = !!enexpr(include_vendor_data_table),
include_problems = !!enexpr(include_problems),
with_explicit_units = with_explicit_units,
quiet = FALSE
)
# make excel workbook
wb <- createWorkbook()
# file info
if ("file_info" %in% names(all_data)) {
# note: collapse_list_columns takes care of nested vectors, they get concatenated with ', '
file_info <-
all_data %>% select(.data$file_id, .data$file_info) %>%
unnest(.data$file_info) %>%
collapse_list_columns()
add_excel_sheet(wb, "file info", file_info)
}
# raw data
if ("raw_data" %in% names(all_data)) {
raw_data <- all_data %>% select(.data$file_id, .data$raw_data) %>% unnest(.data$raw_data)
add_excel_sheet(wb, "raw data", raw_data)
}
# standards
if ("standards" %in% names(all_data)) {
standards <- all_data %>% select(.data$file_id, standards) %>% unnest(standards)
add_excel_sheet(wb, "standards", standards)
}
# resistors
if ("resistors" %in% names(all_data)) {
resistors <- all_data %>% select(.data$file_id, .data$resistors) %>% unnest(.data$resistors)
add_excel_sheet(wb, "resistors", resistors)
}
# vendor data table
if ("vendor_data_table" %in% names(all_data)) {
vendor_data <- all_data %>% select(.data$file_id, .data$vendor_data_table) %>%
unnest(.data$vendor_data_table) %>% iso_strip_units()
add_excel_sheet(wb, "vendor data table", vendor_data)
}
# problems
if ("problems" %in% names(all_data)) {
problems <- all_data %>% select(.data$file_id, .data$problems) %>% unnest(.data$problems)
add_excel_sheet(wb, "problems", problems)
}
saveWorkbook(wb, filepath, overwrite = TRUE)
return(invisible(iso_files))
}
# add an excel sheet to a workbook
# @param ... the data frames
# @param dbl_digits how many digits to export for dbls
# @param col_max_width maximum column width
add_excel_sheet <- function(wb, sheet_name, ..., dbl_digits = 2, col_max_width = 75) {
# sheet
addWorksheet(wb, sheet_name)
hs <- createStyle(textDecoration = "bold") # header style
# data
sheet_data_sets <- list(...)
start_row <- 1L
for (sheet_data in sheet_data_sets) {
if (ncol(sheet_data) > 0) {
writeData(wb, sheet_name, sheet_data, startRow = start_row, headerStyle = hs)
int_cols <- which(purrr::map_lgl(sheet_data, is.integer))
dbl_cols <- setdiff(which(purrr::map_lgl(sheet_data, is.numeric)), int_cols)
if (dbl_digits < 1) {
int_cols <- c(int_cols, dbl_cols)
dbl_cols <- integer()
}
# integer column formatting
if (length(int_cols) > 0) {
openxlsx::addStyle(
wb, sheet_name, style = createStyle(numFmt = "0"),
rows = (start_row + 1L):(start_row + 1L + nrow(sheet_data)),
cols = int_cols, gridExpand = TRUE)
}
# double column formatting
if (length(dbl_cols) > 0) {
dbl_format <- paste0("0.", paste(rep("0", dbl_digits), collapse = ""))
openxlsx::addStyle(
wb, sheet_name, style = createStyle(numFmt = dbl_format),
rows = (start_row + 1L):(start_row + 1L + nrow(sheet_data)),
cols = dbl_cols, gridExpand = TRUE)
}
# new start row
start_row <- start_row + nrow(sheet_data) + 2L
}
}
# calculate header widths
header_widths <-
sheet_data_sets %>%
# account for bold width
purrr::map(~nchar(names(.x)))
max_n_cols <- purrr::map_int(header_widths, length) %>% max()
# calculate data widths
if (max_n_cols > 0) {
calculate_data_width <- function(x) {
if (is.integer(x)) x <- sprintf("%d", x)
else if (is.numeric(x)) x <- sprintf(paste0("%.", dbl_digits, "f"), x)
else x <- as.character(x)
return(max(c(0, nchar(x)), na.rm = TRUE))
}
data_widths <-
sheet_data_sets %>%
purrr::map(
~dplyr::summarise_all(.x, list(calculate_data_width)) %>%
unlist(use.names = FALSE)
)
max_widths <- purrr::map2(header_widths, data_widths , ~{
widths <- if (is.null(.y)) .x else pmax(.x, .y, 0)
widths <- pmin(col_max_width, widths)
c(widths, rep(0L, times = max_n_cols - length(widths)))
})
col_widths <- do.call(pmax, args = max_widths)
openxlsx::setColWidths(wb, sheet_name, cols = 1:length(col_widths), widths = col_widths)
}
}
#' Export to feather
#'
#' This function exports the passed in iso_files to the Python and R shared feather file format. The different kinds of data (raw data, file info, methods info, etc.) are exported to separate feather files that are saved with the provided \code{filepath_prefix} as prefix. All are only exported if the corresponding \code{include_} parameter is set to \code{TRUE} and only for data types for which this type of data is available and was read (see \code{\link{iso_read_dual_inlet}}, \code{\link{iso_read_continuous_flow}} for details on read parameters). Note that in rare instances where vectorized data columns exist in the file information (e.g. measurement_info), they are concatenated with ', ' in feather output.
#'
#' @inheritParams iso_save
#' @inheritParams iso_export_to_excel
#' @param filepath_prefix what to use as the prefix for the feather file names (e.g. name of the data collection or current date)
#' @family export functions
#' @return returns the iso_files object invisibly for use in pipelines
#' @export
iso_export_to_feather <- function(
iso_files, filepath_prefix,
include_file_info = everything(), include_raw_data = everything(),
include_standards = !!enexpr(include_method_info), include_resistors = !!enquo(include_method_info),
include_vendor_data_table = everything(), include_problems = everything(),
with_explicit_units = FALSE,
include_method_info = everything(),
quiet = default(quiet)) {
# safety checks
if(!iso_is_object(iso_files)) stop("can only export iso files or lists of iso files", call. = FALSE)
export_iso_files <- iso_as_file_list(iso_files)
filepaths <- get_feather_export_filepaths(export_iso_files, filepath_prefix)
# include method info message
if (!missing(include_method_info)) {
warning("the 'include_method_info' parameter was deprecated in favor of the more specific 'include_resistors' and 'include_standards' parameters. Please use those directly instead in the future.", immediate. = TRUE, call. = FALSE)
}
# info
if (!quiet) {
sprintf("Info: exporting data from %d iso_files into %s files at '%s'", length(iso_as_file_list(iso_files)),
filepaths[['ext']], str_replace(filepaths[['base']], "^\\.(/|\\\\)", "")) %>% message()
}
# get all data
all_data <- iso_get_all_data(
export_iso_files,
include_file_info = !!enexpr(include_file_info),
include_raw_data = !!enexpr(include_raw_data),
include_standards = !!enexpr(include_standards),
include_resistors = !!enexpr(include_resistors),
include_vendor_data_table = !!enexpr(include_vendor_data_table),
include_problems = !!enexpr(include_problems),
with_explicit_units = with_explicit_units,
quiet = FALSE
)
# create feather files in temporary dir
# file info
if ("file_info" %in% names(all_data)) {
# note: collapse_list_columns takes care of nested vectors, they get concatenated with ', '
all_data %>% select(.data$file_id, .data$file_info) %>%
unnest(.data$file_info) %>%
collapse_list_columns() %>%
write_feather(filepaths[['file_info']])
}
# raw data
if ("raw_data" %in% names(all_data)) {
all_data %>% select(.data$file_id, .data$raw_data) %>% unnest(.data$raw_data) %>%
write_feather(filepaths[['raw_data']])
}
# standards
if ("standards" %in% names(all_data)) {
all_data %>% select(.data$file_id, .data$standards) %>% unnest(.data$standards) %>%
write_feather(filepaths[['method_info_standards']])
}
# resistors
if ("resistors" %in% names(all_data)) {
all_data %>% select(.data$file_id, .data$resistors) %>% unnest(.data$resistors) %>%
write_feather(filepaths[['method_info_resistors']])
}
# vendor data table
if ("vendor_data_table" %in% names(all_data)) {
all_data %>% select(.data$file_id, .data$vendor_data_table) %>%
unnest(.data$vendor_data_table) %>% iso_strip_units() %>%
write_feather(filepaths[['vendor_data_table']])
}
# problems
if ("problems" %in% names(all_data)) {
all_data %>% select(.data$file_id, .data$problems) %>% unnest(.data$problems) %>%
write_feather(filepaths[['problems']])
}
return(invisible(iso_files))
}
# utility functions ====
# convenience function for export file paths (extension checks and addition)
get_export_filepath <- function(filepath, ext) {
# file name and folder
if (missing(filepath)) stop("no filepath provided", call. = FALSE)
filename <- basename(filepath)
folder <- dirname(filepath)
if (!file.exists(folder)) stop("the folder '", folder, "' does not exist", call. = FALSE)
if (!is.null(ext))
filename <- filename %>% str_replace(fixed(ext), "") %>% str_c(ext) # to make sure correct extension
return(file.path(folder, filename))
}
# excel export filephat
get_excel_export_filepath <- function(iso_files, filepath) {
if (iso_is_continuous_flow(iso_files))
ext <- ".cf.xlsx"
else if (iso_is_dual_inlet(iso_files))
ext <- ".di.xlsx"
else if (iso_is_scan(iso_files))
ext <- ".scan.xlsx"
else
stop("Excel export of this type of iso_files not yet supported", call. = FALSE)
return(get_export_filepath(filepath, ext))
}
# feather export filepath
get_feather_export_filepaths <- function(iso_files, filepath) {
if (iso_is_continuous_flow(iso_files))
ext <- ".cf.feather"
else if (iso_is_dual_inlet(iso_files))
ext <- ".di.feather"
else if (iso_is_scan(iso_files))
ext <- ".scan.feather"
else
stop("Feather export of this type of iso_files not yet supported", call. = FALSE)
filepath <- get_export_filepath(filepath, NULL)
return(
c(
base = filepath,
ext = ext,
raw_data = str_c(filepath, "_raw_data", ext),
file_info = str_c(filepath, "_file_info", ext),
method_info_standards = str_c(filepath, "_standards", ext),
method_info_resistors = str_c(filepath, "_resistors", ext),
vendor_data_table = str_c(filepath, "_vendor_data_table", ext),
problems = str_c(filepath, "_problems", ext)
)
)
}