`readr` and `vroom` as a replacement of `read.fwf2` #381

chainsawriot · 2023-09-20T09:31:16Z

Actually, haven depends on readr for 6 years now.

https://github.com/tidyverse/haven/blob/9b3b21b5e9b64867eb53818faa7e9a22480f347d/DESCRIPTION#L25

And readr depends on vroom. We don't need this code to parse fwf files as readr can do this much faster.

Actually, it also raises the question of whether we need datable::fread() and datatable::fwrite() anymore. But let's keep this discussion to fwf.

The text was updated successfully, but these errors were encountered:

chainsawriot · 2023-09-20T10:02:48Z

Just reuse this old code

rio/R/import_methods.R

Lines 85 to 126 in 067a0ae

    
           if (isTRUE(readr)) { 
        
               .check_pkg_availability("readr") 
        
               if (is.null(widths)) { 
        
                   if (!missing(col.names)) { 
        
                       widths <- readr::fwf_empty(file = file, col_names = col.names) 
        
                   } else { 
        
                       widths <- readr::fwf_empty(file = file) 
        
                   } 
        
                   readr::read_fwf(file = file, col_positions = widths, progress = progress, comment = comment, ...) 
        
               } else if (is.numeric(widths)) { 
        
                   if (any(widths < 0)) { 
        
                       if (!"col_types" %in% names(a)) { 
        
                           col_types <- rep("?", length(widths)) 
        
                           col_types[widths < 0] <- "?" 
        
                           col_types <- paste0(col_types, collapse = "") 
        
                       } 
        
                       if (!missing(col.names)) { 
        
                           widths <- readr::fwf_widths(abs(widths), col_names = col.names) 
        
                       } else { 
        
                           widths <- readr::fwf_widths(abs(widths)) 
        
                       } 
        
                       readr::read_fwf(file = file, col_positions = widths, 
        
                                       col_types = col_types, progress = progress, 
        
                                       comment = comment, ...) 
        
                   } else { 
        
                       if (!missing(col.names)) { 
        
                           widths <- readr::fwf_widths(abs(widths), col_names = col.names) 
        
                       } else { 
        
                           widths <- readr::fwf_widths(abs(widths)) 
        
                       } 
        
                       readr::read_fwf(file = file, col_positions = widths, progress = progress, comment = comment, ...) 
        
                   } 
        
               } else if (is.list(widths)) { 
        
                   if (!c("begin", "end") %in% names(widths)) { 
        
                       if (!missing(col.names)) { 
        
                           widths <- readr::fwf_widths(widths, col_names = col.names) 
        
                       } else { 
        
                           widths <- readr::fwf_widths(widths) 
        
                       } 
        
                   } 
        
                   readr::read_fwf(file = file, col_positions = widths, progress = progress, comment = comment, ...) 
        
               }

chainsawriot · 2023-09-26T08:29:32Z

This whole bunch of code is barely tested.

rio/R/export_methods.R

Lines 42 to 98 in 840d27b

    
           .export.rio_fwf <- function(file, x, verbose = getOption("verbose", FALSE), sep = "", row.names = FALSE, quote = FALSE, col.names = FALSE, digits = getOption("digits", 7), ...) { 
        
               dat <- lapply(x, function(col) { 
        
                   if (is.character(col)) { 
        
                       col <- as.numeric(as.factor(col)) 
        
                   } else if (is.factor(col)) { 
        
                       col <- as.integer(col) 
        
                   } 
        
                   if (is.integer(col)) { 
        
                       return(sprintf("%i", col)) 
        
                   } 
        
                   if (is.numeric(col)) { 
        
                       decimals <- strsplit(as.character(col), ".", fixed = TRUE) 
        
                       m1 <- max(nchar(unlist(lapply(decimals, `[`, 1))), na.rm = TRUE) 
        
                       decimals_2 <- unlist(lapply(decimals, `[`, 2)) 
        
                       decimals_2_nchar <- nchar(decimals_2[!is.na(decimals_2)]) 
        
                       if (length(decimals_2_nchar)) { 
        
                           m2 <- max(decimals_2_nchar, na.rm = TRUE) 
        
                       } else { 
        
                           m2 <- 0 
        
                       } 
        
                       if (!is.finite(m2)) { 
        
                           m2 <- digits 
        
                       } 
        
                       return(formatC(sprintf(fmt = paste0("%0.", m2, "f"), col), width = (m1 + m2 + 1))) 
        
                   } else if (is.logical(col)) { 
        
                       return(sprintf("%i", col)) 
        
                   } 
        
               }) 
        
               dat <- do.call(cbind, dat) 
        
               n <- nchar(dat[1, ]) + c(rep(nchar(sep), ncol(dat) - 1), 0) 
        
               col_classes <- vapply(x, class, character(1)) 
        
               col_classes[col_classes == "factor"] <- "integer" 
        
               dict <- cbind.data.frame( 
        
                   variable = names(n), 
        
                   class = col_classes, 
        
                   width = unname(n), 
        
                   columns = paste0(c(1, cumsum(n) + 1)[-length(n)], "-", cumsum(n)), 
        
                   stringsAsFactors = FALSE 
        
               ) 
        
               if (isTRUE(verbose)) { 
        
                   message("Columns:") 
        
                   message(paste0(utils::capture.output(dict), collapse = "\n")) 
        
                   if (sep == "") { 
        
                       message(paste0( 
        
                           "\nRead in with:\n", 
        
                           'import("', file, '",\n', 
        
                           "       widths = c(", paste0(n, collapse = ","), "),\n", 
        
                           '       col.names = c("', paste0(names(n), collapse = '","'), '"),\n', 
        
                           '       colClasses = c("', paste0(col_classes, collapse = '","'), '"))\n' 
        
                       ), domain = NA) 
        
                   } 
        
               } 
        
               .write_as_utf8(paste0("#", utils::capture.output(utils::write.csv(dict, row.names = FALSE, quote = FALSE))), file = file, sep = "\n") 
        
               .docall(utils::write.table, ..., 
        
                       args = list(x = dat, file = file, append = TRUE, row.names = row.names, sep = sep, quote = quote, 
        
                                   col.names = col.names)) 
        
           }

chainsawriot added a commit that referenced this issue Sep 26, 2023

Fix #381

8f09f4f

chainsawriot closed this as completed in 3c15739 Sep 27, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

`readr` and `vroom` as a replacement of `read.fwf2` #381

`readr` and `vroom` as a replacement of `read.fwf2` #381

chainsawriot commented Sep 20, 2023 •

edited

Loading

chainsawriot commented Sep 20, 2023

chainsawriot commented Sep 26, 2023 •

edited

Loading

readr and vroom as a replacement of read.fwf2 #381

readr and vroom as a replacement of read.fwf2 #381

Comments

chainsawriot commented Sep 20, 2023 • edited Loading

chainsawriot commented Sep 20, 2023

chainsawriot commented Sep 26, 2023 • edited Loading

`readr` and `vroom` as a replacement of `read.fwf2` #381

`readr` and `vroom` as a replacement of `read.fwf2` #381

chainsawriot commented Sep 20, 2023 •

edited

Loading

chainsawriot commented Sep 26, 2023 •

edited

Loading