From 8e2fa2d9ac12f9c0dfb9715e8b1d9dcd0467125a Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Tue, 25 Jun 2019 16:27:20 -0700
Subject: [PATCH 1/5] Some cleanup of pkgdown site prep and DESCRIPTION. Start
 on implementing readr::read_csv arguments

---
 r/DESCRIPTION           |  26 ++++----
 r/NEWS.md               |   3 +
 r/R/csv.R               | 143 ++++++++++++++++++++++++++++++----------
 r/_pkgdown.yml          |  13 +++-
 r/man/read_csv_arrow.Rd |  15 +++--
 5 files changed, 146 insertions(+), 54 deletions(-)
 create mode 100644 r/NEWS.md

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 45edda18a15d..d0009eced89e 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -3,10 +3,10 @@ Title: Integration to 'Apache' 'Arrow'
 Version: 0.13.0.9000
 Authors@R: c(
     person("Romain", "Fran\u00e7ois", email = "romain@rstudio.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2444-4226")),
-    person("Javier", "Luraschi", email = "javier@rstudio.com", role = c("ctb")),
-    person("Jeffrey", "Wong", email = "jeffreyw@netflix.com", role = c("ctb")),
     person("Jeroen", "Ooms", email = "jeroen@berkeley.edu", role = c("aut")),
     person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut")),
+    person("Javier", "Luraschi", email = "javier@rstudio.com", role = c("ctb")),
+    person("Jeffrey", "Wong", email = "jeffreyw@netflix.com", role = c("ctb")),
     person("Apache Arrow", email = "dev@arrow.apache.org", role = c("aut", "cph"))
   )
 Description: 'Apache' 'Arrow' <https://arrow.apache.org/> is a cross-language
@@ -24,27 +24,27 @@ SystemRequirements: C++11
 LinkingTo:
     Rcpp (>= 1.0.1)
 Imports:
-    utils,
-    Rcpp (>= 1.0.1),
-    rlang,
-    purrr,
     assertthat,
-    R6,
-    fs,
     bit64,
-    tidyselect
+    fs,
+    purrr,
+    R6,
+    Rcpp (>= 1.0.1),
+    rlang,
+    tidyselect,
+    utils
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 6.1.1
 Suggests:
-    tibble,
     covr,
+    hms,
+    lubridate,
     pkgdown,
     rmarkdown,
     roxygen2,
     testthat,
-    lubridate,
-    vctrs,
-    hms
+    tibble,
+    vctrs
 Collate:
     'enums.R'
     'R6.R'
diff --git a/r/NEWS.md b/r/NEWS.md
new file mode 100644
index 000000000000..bbad0aefcd12
--- /dev/null
+++ b/r/NEWS.md
@@ -0,0 +1,3 @@
+# arrow 0.13.0.9000
+
+Initial CRAN release of `arrow` package
diff --git a/r/R/csv.R b/r/R/csv.R
index 03a4b7de77dc..c494fa27ef98 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -15,6 +15,100 @@
 # specific language governing permissions and limitations
 # under the License.
 
+#' Read a CSV or other delimited file with Arrow
+#'
+#' Use arrow::csv::TableReader from [csv_table_reader()]
+#'
+#' @inheritParams csv_table_reader
+#'
+#' @param col_select [tidy selection specification][tidyselect::vars_select] of columns
+#' @param as_tibble Should the [arrow::Table][arrow__Table] be converted to a data frame.
+#'
+#' @return
+#' @export
+read_csv_arrow <- function(file,
+                           delim = ",",
+                           quote = '"',
+                           escape_double = TRUE,
+                           escape_backslash = FALSE,
+                           col_names = TRUE,
+                           # col_types = TRUE,
+                           col_select = NULL,
+                           # na = c("", "NA"),
+                           # quoted_na = TRUE,
+                           skip_empty_rows = TRUE,
+                           skip = 0L,
+                           parse_options = NULL,
+                           convert_options = NULL,
+                           read_options = csv_read_options(),
+                           as_tibble = TRUE) {
+
+  if (is.null(parse_options)) {
+    if (isTRUE(col_names)) {
+      # Add one row to skip, to match arrow's header_rows
+      skip <- skip + 1L
+    }
+    parse_options <- readr_to_csv_parse_options(
+      delim,
+      quote,
+      escape_double,
+      escape_backslash,
+      skip_empty_rows,
+      skip
+    )
+  }
+
+  if (is.null(convert_options)) {
+    # TODO:
+    # * na strings (needs wiring in csv_convert_options)
+    # * col_types (needs wiring in csv_convert_options). Note that we can't do
+    # col_types if col_names is strings because the column type specification
+    # requires a map of name: type, but the CSV reader doesn't handle user-
+    # provided names--they're renamed after the fact.
+    convert_options <- csv_convert_options()
+  }
+
+  reader <- csv_table_reader(
+    file,
+    read_options = read_options,
+    parse_options = parse_options,
+    convert_options = convert_options
+  )
+
+  tab <- reader$Read()$select(!!enquo(col_select))
+  if (is.character(col_names)) {
+    # TODO: Rename `tab`'s columns
+    # See https://github.com/apache/arrow/pull/4557
+  }
+
+  if (isTRUE(as_tibble)) {
+    tab <- as.data.frame(tab)
+  }
+
+  tab
+}
+
+readr_to_csv_parse_options <- function(delim = ",",
+                                       quote = '"',
+                                       escape_double = TRUE,
+                                       escape_backslash = FALSE,
+                                       skip_empty_rows = TRUE,
+                                       skip = 0L) {
+  # This function translates from the readr argument list to the arrow arg names
+  # TODO: validate inputs
+  csv_parse_options(
+    delimiter = delim,
+    quoting = nzchar(quote),
+    quote_char = quote,
+    double_quote = escape_double,
+    escaping = escape_backslash,
+    escape_char = '\\',
+    newlines_in_values = escape_backslash,
+    ignore_empty_lines = skip_empty_rows,
+    header_rows = skip
+  )
+}
+
 #' @include R6.R
 
 `arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`,
@@ -80,7 +174,22 @@ csv_parse_options <- function(
 #' @param check_utf8 Whether to check UTF8 validity of string columns
 #'
 #' @export
-csv_convert_options <- function(check_utf8 = TRUE){
+csv_convert_options <- function(check_utf8 = TRUE) {
+  # TODO: there are more conversion options available:
+  # // Whether to check UTF8 validity of string columns
+  # bool check_utf8 = true;
+  # // Optional per-column types (disabling type inference on those columns)
+  # std::unordered_map<std::string, std::shared_ptr<DataType>> column_types;
+  # // Recognized spellings for null values
+  # std::vector<std::string> null_values;
+  # // Recognized spellings for boolean values
+  # std::vector<std::string> true_values;
+  # std::vector<std::string> false_values;
+  # // Whether string / binary columns can have null values.
+  # // If true, then strings in "null_values" are considered null for string columns.
+  # // If false, then all strings are valid string values.
+  # bool strings_can_be_null = false;
+
   shared_ptr(`arrow::csv::ConvertOptions`, csv___ConvertOptions__initialize(
     list(
       check_utf8 = check_utf8
@@ -167,35 +276,3 @@ csv_table_reader.default <- function(file,
 ){
   file
 }
-
-#' Read csv file into an arrow::Table
-#'
-#' Use arrow::csv::TableReader from [csv_table_reader()]
-#'
-#' @inheritParams csv_table_reader
-#'
-#' @param col_select [tidy selection specification][tidyselect::vars_select] of columns
-#' @param as_tibble Should the [arrow::Table][arrow__Table] be converted to a data frame.
-#'
-#' @export
-read_csv_arrow <- function(file,
-  read_options = csv_read_options(),
-  parse_options = csv_parse_options(),
-  convert_options = csv_convert_options(),
-  col_select = NULL,
-  as_tibble = TRUE
-  )
-{
-  reader <- csv_table_reader(file,
-    read_options = read_options,
-    parse_options = parse_options,
-    convert_options = convert_options)
-
-  tab <- reader$Read()$select(!!enquo(col_select))
-
-  if (isTRUE(as_tibble)) {
-    tab <- as.data.frame(tab)
-  }
-
-  tab
-}
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index 69c02e0e4641..1eadc75c1900 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -39,17 +39,28 @@ navbar:
       text: Reference
       href: reference/index.html
 reference:
+- title: Installation helpers
+  contents:
+  - arrow_available
+  - install_arrow
 - title: Reading and writing files
   contents:
   - read_csv_arrow
+  - read_json_arrow
   - read_feather
   - read_parquet
   - write_arrow
   - write_feather
+  - write_parquet
   - csv_convert_options
   - csv_parse_options
   - csv_read_options
   - csv_table_reader
+  - json_parse_options
+  - json_read_options
+  - parquet_arrow_reader_properties
+  - json_table_reader
+  - parquet_file_reader
 - title: Arrow data containers
   contents:
   - buffer
@@ -92,6 +103,7 @@ reference:
   - arrow__io__RandomAccessFile
   - arrow__io__Readable
   - arrow__io__ReadableFile
+  - arrow__json__TableReader
   - arrow__ipc__Message
   - arrow__ipc__MessageReader
   - arrow__ipc__RecordBatchFileReader
@@ -117,7 +129,6 @@ reference:
   - RecordBatchFileWriter
   - RecordBatchStreamReader
   - RecordBatchStreamWriter
-  - threadpool
   - cast_options
   - compression_codec
   - default_memory_pool
diff --git a/r/man/read_csv_arrow.Rd b/r/man/read_csv_arrow.Rd
index 47e5158488a7..42c9478d8994 100644
--- a/r/man/read_csv_arrow.Rd
+++ b/r/man/read_csv_arrow.Rd
@@ -2,23 +2,24 @@
 % Please edit documentation in R/csv.R
 \name{read_csv_arrow}
 \alias{read_csv_arrow}
-\title{Read csv file into an arrow::Table}
+\title{Read a CSV or other delimited file with Arrow}
 \usage{
-read_csv_arrow(file, read_options = csv_read_options(),
-  parse_options = csv_parse_options(),
-  convert_options = csv_convert_options(), col_select = NULL,
-  as_tibble = TRUE)
+read_csv_arrow(file, delim = ",", quote = "\\"",
+  escape_double = TRUE, escape_backslash = FALSE, col_names = TRUE,
+  col_select = NULL, skip_empty_rows = TRUE, skip = 0L,
+  parse_options = NULL, convert_options = NULL,
+  read_options = csv_read_options(), as_tibble = TRUE)
 }
 \arguments{
 \item{file}{file}
 
-\item{read_options}{see \code{\link[=csv_read_options]{csv_read_options()}}}
+\item{col_select}{\link[tidyselect:vars_select]{tidy selection specification} of columns}
 
 \item{parse_options}{see \code{\link[=csv_parse_options]{csv_parse_options()}}}
 
 \item{convert_options}{see \code{\link[=csv_convert_options]{csv_convert_options()}}}
 
-\item{col_select}{\link[tidyselect:vars_select]{tidy selection specification} of columns}
+\item{read_options}{see \code{\link[=csv_read_options]{csv_read_options()}}}
 
 \item{as_tibble}{Should the \link[=arrow__Table]{arrow::Table} be converted to a data frame.}
 }

From fb75af1fa0732530e935e214d355d08d0b3a85aa Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 26 Jun 2019 12:59:24 -0700
Subject: [PATCH 2/5] More docs and tests for csv parse options; skip a few
 that aren't supported

---
 r/DESCRIPTION                     |   2 +-
 r/NEWS.md                         |   6 +-
 r/R/csv.R                         | 121 ++++++++++++++++++++----------
 r/man/arrow-package.Rd            |   1 -
 r/man/csv_parse_options.Rd        |   4 +-
 r/man/csv_table_reader.Rd         |  12 ++-
 r/man/read_csv_arrow.Rd           |  55 +++++++++++---
 r/tests/testthat/test-arrow-csv.R |  85 +++++++++++++++++++--
 8 files changed, 221 insertions(+), 65 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index d0009eced89e..47eccc8834f0 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -16,7 +16,7 @@ Description: 'Apache' 'Arrow' <https://arrow.apache.org/> is a cross-language
     package provides an interface to the Arrow C++ library.
 Depends: R (>= 3.1)
 License: Apache License (>= 2.0)
-URL: https://arrow.apache.org/docs/r/, https://github.com/apache/arrow/
+URL: https://github.com/apache/arrow/
 BugReports: https://issues.apache.org/jira/projects/ARROW/issues
 Encoding: UTF-8
 LazyData: true
diff --git a/r/NEWS.md b/r/NEWS.md
index bbad0aefcd12..123d1d6a73a7 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -1,3 +1,7 @@
 # arrow 0.13.0.9000
 
-Initial CRAN release of `arrow` package
+Initial CRAN release of the `arrow` package. Key features include:
+
+* Read and write support for various file formats, including Parquet, Feather/Arrow, CSV, and JSON.
+* API bindings to the C++ library for Arrow data types and objects, as well as mapping between Arrow types and R data types.
+* Tools for helping with C++ library configuration and installation.
diff --git a/r/R/csv.R b/r/R/csv.R
index c494fa27ef98..7d7913e97261 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -17,32 +17,66 @@
 
 #' Read a CSV or other delimited file with Arrow
 #'
-#' Use arrow::csv::TableReader from [csv_table_reader()]
+#' This function uses the Arrow C++ CSV reader to read into a `data.frame`.
+#' Arrow C++ options have been mapped to argument names that follow those of
+#' [readr::read_delim()], and `col_select` was inspired by [vroom::vroom()].
 #'
-#' @inheritParams csv_table_reader
+#' Note that not all `readr` options are currently implemented here. Please file
+#' an issue if you encounter one that `arrow` should support.
 #'
-#' @param col_select [tidy selection specification][tidyselect::vars_select] of columns
-#' @param as_tibble Should the [arrow::Table][arrow__Table] be converted to a data frame.
+#' If you need to control Arrow-specific reader parameters that don't have an
+#' equivalent in `readr::read_csv()`, you can either provide them in the
+#' `parse_options`, `convert_options`, or `read_options` arguments, or you can
+#' call [csv_table_reader()] directly for lower-level access.
 #'
-#' @return
+#' @param file A character path to a local file, or an Arrow input stream
+#' @param delim Single character used to separate fields within a record.
+#' @param quote Single character used to quote strings.
+#' @param escape_double Does the file escape quotes by doubling them?
+#' i.e. If this option is `TRUE`, the value `""""` represents
+#' a single quote, `\"`.
+#' @param escape_backslash Does the file use backslashes to escape special
+#' characters? This is more general than `escape_double` as backslashes
+#' can be used to escape the delimiter character, the quote character, or
+#' to add special characters like `\\n`.
+# #' @param col_names If `TRUE`, the first row of the input will be used as the
+# #' column names and will not be included in the data frame. Note that `FALSE`
+# #' is not currently supported, nor is specifying a character vector of column
+# #' names.
+#' @param col_select A [tidy selection specification][tidyselect::vars_select]
+#' of columns, as used in `dplyr::select()`.
+#' @param skip_empty_rows Should blank rows be ignored altogether? If
+#' `TRUE`, blank rows will not be represented at all. If `FALSE`, they will be
+#' filled with missings.
+# #' @param skip Number of lines to skip before reading data.
+#' @param parse_options see [csv_parse_options()]. If given, this overrides any
+#' parsing options provided in other arguments (e.g. `delim`, `quote`, etc.).
+#' @param convert_options see [csv_convert_options()]
+#' @param read_options see [csv_read_options()]
+#' @param as_tibble Should the function return a `data.frame` or an
+#' [arrow::Table][arrow__Table]?
+#'
+#' @return A `data.frame`, or an `arrow::Table` if `as_tibble = FALSE`.
 #' @export
 read_csv_arrow <- function(file,
                            delim = ",",
                            quote = '"',
                            escape_double = TRUE,
                            escape_backslash = FALSE,
-                           col_names = TRUE,
+                           # col_names = TRUE,
                            # col_types = TRUE,
                            col_select = NULL,
                            # na = c("", "NA"),
                            # quoted_na = TRUE,
                            skip_empty_rows = TRUE,
-                           skip = 0L,
+                           # skip = 0L,
                            parse_options = NULL,
                            convert_options = NULL,
                            read_options = csv_read_options(),
                            as_tibble = TRUE) {
 
+  col_names <- TRUE # Hardcoded pending fix
+  skip <- 0L # Hardcoded pending fix
   if (is.null(parse_options)) {
     if (isTRUE(col_names)) {
       # Add one row to skip, to match arrow's header_rows
@@ -88,27 +122,6 @@ read_csv_arrow <- function(file,
   tab
 }
 
-readr_to_csv_parse_options <- function(delim = ",",
-                                       quote = '"',
-                                       escape_double = TRUE,
-                                       escape_backslash = FALSE,
-                                       skip_empty_rows = TRUE,
-                                       skip = 0L) {
-  # This function translates from the readr argument list to the arrow arg names
-  # TODO: validate inputs
-  csv_parse_options(
-    delimiter = delim,
-    quoting = nzchar(quote),
-    quote_char = quote,
-    double_quote = escape_double,
-    escaping = escape_backslash,
-    escape_char = '\\',
-    newlines_in_values = escape_backslash,
-    ignore_empty_lines = skip_empty_rows,
-    header_rows = skip
-  )
-}
-
 #' @include R6.R
 
 `arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`,
@@ -135,7 +148,29 @@ csv_read_options <- function(block_size = 1048576L) {
   ))
 }
 
-#' Parsing options
+readr_to_csv_parse_options <- function(delim = ",",
+                                       quote = '"',
+                                       escape_double = TRUE,
+                                       escape_backslash = FALSE,
+                                       skip_empty_rows = TRUE,
+                                       skip = 0L) {
+  # This function translates from the readr argument list to the arrow arg names
+  # TODO: validate inputs
+  csv_parse_options(
+    delimiter = delim,
+    quoting = nzchar(quote),
+    quote_char = quote,
+    double_quote = escape_double,
+    escaping = escape_backslash,
+    escape_char = '\\',
+    newlines_in_values = escape_backslash,
+    ignore_empty_lines = skip_empty_rows,
+    header_rows = skip
+  )
+}
+
+#' CSV parsing options
+#'
 #'
 #' @param delimiter Field delimiter
 #' @param quoting Whether quoting is used
@@ -148,12 +183,16 @@ csv_read_options <- function(block_size = 1048576L) {
 #' @param header_rows Number of header rows to skip (including the first row containing column names)
 #'
 #' @export
-csv_parse_options <- function(
-  delimiter = ",", quoting = TRUE, quote_char = '"',
-  double_quote = TRUE, escaping = FALSE, escape_char = '\\',
-  newlines_in_values = FALSE, ignore_empty_lines = TRUE,
-  header_rows = 1L
-){
+csv_parse_options <- function(delimiter = ",",
+                              quoting = TRUE,
+                              quote_char = '"',
+                              double_quote = TRUE,
+                              escaping = FALSE,
+                              escape_char = '\\',
+                              newlines_in_values = FALSE,
+                              ignore_empty_lines = TRUE,
+                              header_rows = 1L) {
+
   shared_ptr(`arrow::csv::ParseOptions`, csv___ParseOptions__initialize(
     list(
       delimiter = delimiter,
@@ -176,8 +215,6 @@ csv_parse_options <- function(
 #' @export
 csv_convert_options <- function(check_utf8 = TRUE) {
   # TODO: there are more conversion options available:
-  # // Whether to check UTF8 validity of string columns
-  # bool check_utf8 = true;
   # // Optional per-column types (disabling type inference on those columns)
   # std::unordered_map<std::string, std::shared_ptr<DataType>> column_types;
   # // Recognized spellings for null values
@@ -197,14 +234,20 @@ csv_convert_options <- function(check_utf8 = TRUE) {
   ))
 }
 
-#' CSV table reader
+#' Arrow CSV table reader
+#'
+#' These methods wrap the Arrow C++ CSV table reader.
+#' For an interface to the CSV reader that's more familiar for R users, see
+#' [read_csv_arrow()]
 #'
-#' @param file file
+#' @param file A character path to a local file, or an Arrow input stream
 #' @param read_options, see [csv_read_options()]
 #' @param parse_options, see [csv_parse_options()]
 #' @param convert_options, see [csv_convert_options()]
 #' @param ... additional parameters.
 #'
+#' @return An `arrow::csv::TableReader` R6 object. Call `$Read()` on it to get
+#' an Arrow Table.
 #' @export
 csv_table_reader <- function(file,
   read_options = csv_read_options(),
diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd
index 1f4b5fbcd04e..c3da92dc00f4 100644
--- a/r/man/arrow-package.Rd
+++ b/r/man/arrow-package.Rd
@@ -15,7 +15,6 @@
 \seealso{
 Useful links:
 \itemize{
-  \item \url{https://arrow.apache.org/docs/r/}
   \item \url{https://github.com/apache/arrow/}
   \item Report bugs at \url{https://issues.apache.org/jira/projects/ARROW/issues}
 }
diff --git a/r/man/csv_parse_options.Rd b/r/man/csv_parse_options.Rd
index 7e6ab77d4395..ac9826232b43 100644
--- a/r/man/csv_parse_options.Rd
+++ b/r/man/csv_parse_options.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/csv.R
 \name{csv_parse_options}
 \alias{csv_parse_options}
-\title{Parsing options}
+\title{CSV parsing options}
 \usage{
 csv_parse_options(delimiter = ",", quoting = TRUE,
   quote_char = "\\"", double_quote = TRUE, escaping = FALSE,
@@ -29,5 +29,5 @@ csv_parse_options(delimiter = ",", quoting = TRUE,
 \item{header_rows}{Number of header rows to skip (including the first row containing column names)}
 }
 \description{
-Parsing options
+CSV parsing options
 }
diff --git a/r/man/csv_table_reader.Rd b/r/man/csv_table_reader.Rd
index 029cd0b5923c..862aefbcd7ac 100644
--- a/r/man/csv_table_reader.Rd
+++ b/r/man/csv_table_reader.Rd
@@ -2,14 +2,14 @@
 % Please edit documentation in R/csv.R
 \name{csv_table_reader}
 \alias{csv_table_reader}
-\title{CSV table reader}
+\title{Arrow CSV table reader}
 \usage{
 csv_table_reader(file, read_options = csv_read_options(),
   parse_options = csv_parse_options(),
   convert_options = csv_convert_options(), ...)
 }
 \arguments{
-\item{file}{file}
+\item{file}{A character path to a local file, or an Arrow input stream}
 
 \item{read_options, }{see \code{\link[=csv_read_options]{csv_read_options()}}}
 
@@ -19,6 +19,12 @@ csv_table_reader(file, read_options = csv_read_options(),
 
 \item{...}{additional parameters.}
 }
+\value{
+An \code{arrow::csv::TableReader} R6 object. Call \code{$Read()} on it to get
+an Arrow Table.
+}
 \description{
-CSV table reader
+These methods wrap the Arrow C++ CSV table reader.
+For an interface to the CSV reader that's more familiar for R users, see
+\code{\link[=read_csv_arrow]{read_csv_arrow()}}
 }
diff --git a/r/man/read_csv_arrow.Rd b/r/man/read_csv_arrow.Rd
index 42c9478d8994..c7315a8903f5 100644
--- a/r/man/read_csv_arrow.Rd
+++ b/r/man/read_csv_arrow.Rd
@@ -5,24 +5,57 @@
 \title{Read a CSV or other delimited file with Arrow}
 \usage{
 read_csv_arrow(file, delim = ",", quote = "\\"",
-  escape_double = TRUE, escape_backslash = FALSE, col_names = TRUE,
-  col_select = NULL, skip_empty_rows = TRUE, skip = 0L,
-  parse_options = NULL, convert_options = NULL,
-  read_options = csv_read_options(), as_tibble = TRUE)
+  escape_double = TRUE, escape_backslash = FALSE, col_select = NULL,
+  skip_empty_rows = TRUE, parse_options = NULL,
+  convert_options = NULL, read_options = csv_read_options(),
+  as_tibble = TRUE)
 }
 \arguments{
-\item{file}{file}
+\item{file}{A character path to a local file, or an Arrow input stream}
 
-\item{col_select}{\link[tidyselect:vars_select]{tidy selection specification} of columns}
+\item{delim}{Single character used to separate fields within a record.}
 
-\item{parse_options}{see \code{\link[=csv_parse_options]{csv_parse_options()}}}
+\item{quote}{Single character used to quote strings.}
 
-\item{convert_options}{see \code{\link[=csv_convert_options]{csv_convert_options()}}}
+\item{escape_double}{Does the file escape quotes by doubling them?
+i.e. If this option is \code{TRUE}, the value \code{""""} represents
+a single quote, \code{\"}.}
 
-\item{read_options}{see \code{\link[=csv_read_options]{csv_read_options()}}}
+\item{escape_backslash}{Does the file use backslashes to escape special
+characters? This is more general than \code{escape_double} as backslashes
+can be used to escape the delimiter character, the quote character, or
+to add special characters like \code{\\n}.}
 
-\item{as_tibble}{Should the \link[=arrow__Table]{arrow::Table} be converted to a data frame.}
+\item{col_select}{A \link[tidyselect:vars_select]{tidy selection specification}
+of columns, as used in \code{dplyr::select()}.}
+
+\item{skip_empty_rows}{Should blank rows be ignored altogether? i.e. If this
+option is \code{TRUE} then blank rows will not be represented at all.}
+
+\item{parse_options, }{see \code{\link[=csv_parse_options]{csv_parse_options()}}. If given, this overrides any
+parsing options provided in other arguments (e.g. \code{delim}, \code{quote}, etc.).}
+
+\item{convert_options, }{see \code{\link[=csv_convert_options]{csv_convert_options()}}}
+
+\item{read_options, }{see \code{\link[=csv_read_options]{csv_read_options()}}}
+
+\item{as_tibble}{Should the function return a \code{data.frame} or an
+\link[=arrow__Table]{arrow::Table}?}
+}
+\value{
+A \code{data.frame}, or an \code{arrow::Table} if \code{as_tibble = FALSE}.
 }
 \description{
-Use arrow::csv::TableReader from \code{\link[=csv_table_reader]{csv_table_reader()}}
+This function uses the Arrow C++ CSV reader to read into a \code{data.frame}.
+Arrow C++ options have been mapped to argument names that follow those of
+\code{\link[readr:read_delim]{readr::read_delim()}}, and \code{col_select} was inspired by \code{\link[vroom:vroom]{vroom::vroom()}}.
+}
+\details{
+Note that not all \code{readr} options are currently implemented here. Please file
+an issue if you encounter one that \code{arrow} should support.
+
+If you need to control Arrow-specific reader parameters that don't have an
+equivalent in \code{readr::read_csv()}, you can either provide them in the
+\code{parse_options}, \code{convert_options}, or \code{read_options} arguments, or you can
+call \code{\link[=csv_table_reader]{csv_table_reader()}} directly for lower-level access.
 }
diff --git a/r/tests/testthat/test-arrow-csv.R b/r/tests/testthat/test-arrow-csv.R
index 7f0c1ae497d9..330e17b5a9a7 100644
--- a/r/tests/testthat/test-arrow-csv.R
+++ b/r/tests/testthat/test-arrow-csv.R
@@ -19,8 +19,9 @@ context("arrow::csv::TableReader")
 
 test_that("Can read csv file", {
   tf <- tempfile()
+  on.exit(unlink(tf))
 
-  write.csv(iris, tf, row.names = FALSE, quote = FALSE)
+  write.csv(iris, tf, row.names = FALSE)
 
   tab1 <- read_csv_arrow(tf, as_tibble = FALSE)
   tab2 <- read_csv_arrow(mmap_open(tf), as_tibble = FALSE)
@@ -31,14 +32,13 @@ test_that("Can read csv file", {
   expect_equal(tab0, tab1)
   expect_equal(tab0, tab2)
   expect_equal(tab0, tab3)
-
-  unlink(tf)
 })
 
 test_that("read_csv_arrow(as_tibble=TRUE)", {
   tf <- tempfile()
+  on.exit(unlink(tf))
 
-  write.csv(iris, tf, row.names = FALSE, quote = FALSE)
+  write.csv(iris, tf, row.names = FALSE)
 
   tab1 <- read_csv_arrow(tf, as_tibble = TRUE)
   tab2 <- read_csv_arrow(mmap_open(tf), as_tibble = TRUE)
@@ -48,12 +48,85 @@ test_that("read_csv_arrow(as_tibble=TRUE)", {
   expect_equivalent(iris, tab1)
   expect_equivalent(iris, tab2)
   expect_equivalent(iris, tab3)
+})
+
+test_that("read_csv_arrow parsing options: delim", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write.table(iris, tf, sep = "\t", row.names = FALSE)
+  tab1 <- read_csv_arrow(tf, delim = "\t")
+
+  iris$Species <- as.character(iris$Species)
+  expect_equivalent(iris, tab1)
+})
 
-  unlink(tf)
+test_that("read_csv_arrow parsing options: quote", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  df <- data.frame(a=c(1, 2), b=c("'abc'", "'def'"))
+  write.table(df, sep=";", tf, row.names = FALSE, quote = FALSE)
+  tab1 <- read_csv_arrow(tf, delim = ";", quote = "'")
+
+  # Is this a problem?
+  # Component “a”: target is integer64, current is numeric
+  tab1$a <- as.numeric(tab1$a)
+  expect_equivalent(
+    tab1,
+    data.frame(a=c(1, 2), b=c("abc", "def"), stringsAsFactors = FALSE)
+  )
 })
 
+test_that("read_csv_arrow parsing options: col_names", {
+  skip("Invalid: Empty CSV file")
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write.table(iris, tf, sep = ",", row.names = FALSE, col.names = FALSE)
+  tab1 <- read_csv_arrow(tf, col_names = FALSE)
+
+  expect_identical(names(tab1), names(iris))
+  iris$Species <- as.character(iris$Species)
+  expect_equivalent(iris, tab1)
+})
+
+test_that("read_csv_arrow parsing options: skip", {
+  skip("Invalid: Empty CSV file")
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  cat("asdf\nqwer\n", file = tf)
+  suppressWarnings(write.table(iris, tf, sep = ",", row.names = FALSE, append = TRUE))
+  # This works:
+  # print(head(readr::read_csv(tf, skip = 2)))
+
+  # This errors:
+  tab1 <- read_csv_arrow(tf, skip = 2)
+
+  expect_identical(names(tab1), names(iris))
+  iris$Species <- as.character(iris$Species)
+  expect_equivalent(iris, tab1)
+})
+
+test_that("read_csv_arrow parsing options: skip_empty_rows", {
+  skip("Invalid: Empty CSV file")
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write.csv(iris, tf, row.names = FALSE)
+  cat("\n\n", file = tf, append = TRUE)
+
+  tab1 <- read_csv_arrow(tf, skip_empty_rows = FALSE)
+
+  expect_equal(nrow(tab1), nrow(iris) + 2)
+  expect_true(is.na(tail(iris, 1)[[1]]))
+})
+
+
 test_that("read_csv_arrow() respects col_select", {
   tf <- tempfile()
+  on.exit(unlink(tf))
 
   write.csv(iris, tf, row.names = FALSE, quote = FALSE)
 
@@ -62,6 +135,4 @@ test_that("read_csv_arrow() respects col_select", {
 
   tib <- read_csv_arrow(tf, col_select = starts_with("Sepal"), as_tibble = TRUE)
   expect_equal(tib, tibble::tibble(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width))
-
-  unlink(tf)
 })

From fc156e3e8f2907bbcb9f30ee1c575b267561fbd5 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 26 Jun 2019 15:00:20 -0700
Subject: [PATCH 3/5] Doc :nailcare:, add read_delim_arrow and read_tsv_arrow

---
 r/NAMESPACE                                   |  2 +
 r/R/csv.R                                     | 93 +++++++++++++++----
 ...{read_csv_arrow.Rd => read_delim_arrow.Rd} | 34 +++++--
 r/tests/testthat/test-arrow-csv.R             | 10 +-
 4 files changed, 108 insertions(+), 31 deletions(-)
 rename r/man/{read_csv_arrow.Rd => read_delim_arrow.Rd} (65%)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index e82b30a4fea2..e4b367d0eaf5 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -162,6 +162,7 @@ export(parquet_arrow_reader_properties)
 export(parquet_file_reader)
 export(read_arrow)
 export(read_csv_arrow)
+export(read_delim_arrow)
 export(read_feather)
 export(read_json_arrow)
 export(read_message)
@@ -169,6 +170,7 @@ export(read_parquet)
 export(read_record_batch)
 export(read_schema)
 export(read_table)
+export(read_tsv_arrow)
 export(record_batch)
 export(schema)
 export(starts_with)
diff --git a/r/R/csv.R b/r/R/csv.R
index 7d7913e97261..8f4370ab7d31 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -17,10 +17,13 @@
 
 #' Read a CSV or other delimited file with Arrow
 #'
-#' This function uses the Arrow C++ CSV reader to read into a `data.frame`.
+#' These functions uses the Arrow C++ CSV reader to read into a `data.frame`.
 #' Arrow C++ options have been mapped to argument names that follow those of
 #' [readr::read_delim()], and `col_select` was inspired by [vroom::vroom()].
 #'
+#' `read_csv_arrow()` and `read_tsv_arrow()` are wrappers around
+#' `read_delim_arrow()` that specify a delimiter.
+#'
 #' Note that not all `readr` options are currently implemented here. Please file
 #' an issue if you encounter one that `arrow` should support.
 #'
@@ -58,29 +61,33 @@
 #'
 #' @return A `data.frame`, or an `arrow::Table` if `as_tibble = FALSE`.
 #' @export
-read_csv_arrow <- function(file,
-                           delim = ",",
-                           quote = '"',
-                           escape_double = TRUE,
-                           escape_backslash = FALSE,
-                           # col_names = TRUE,
-                           # col_types = TRUE,
-                           col_select = NULL,
-                           # na = c("", "NA"),
-                           # quoted_na = TRUE,
-                           skip_empty_rows = TRUE,
-                           # skip = 0L,
-                           parse_options = NULL,
-                           convert_options = NULL,
-                           read_options = csv_read_options(),
-                           as_tibble = TRUE) {
+read_delim_arrow <- function(file,
+                             delim = ",",
+                             quote = '"',
+                             escape_double = TRUE,
+                             escape_backslash = FALSE,
+                             # col_names = TRUE,
+                             # col_types = TRUE,
+                             col_select = NULL,
+                             # na = c("", "NA"),
+                             # quoted_na = TRUE,
+                             skip_empty_rows = TRUE,
+                             # skip = 0L,
+                             parse_options = NULL,
+                             convert_options = NULL,
+                             read_options = csv_read_options(),
+                             as_tibble = TRUE) {
+
+  # These are hardcoded pending https://issues.apache.org/jira/browse/ARROW-5747
+  col_names <- TRUE
+  skip <- 0L
 
-  col_names <- TRUE # Hardcoded pending fix
-  skip <- 0L # Hardcoded pending fix
   if (is.null(parse_options)) {
     if (isTRUE(col_names)) {
       # Add one row to skip, to match arrow's header_rows
       skip <- skip + 1L
+      # Note that with the hardcoding, header_rows is always 1, which
+      # turns out to be the only value that works meaningfully
     }
     parse_options <- readr_to_csv_parse_options(
       delim,
@@ -122,6 +129,54 @@ read_csv_arrow <- function(file,
   tab
 }
 
+#' @rdname read_delim_arrow
+#' @export
+read_csv_arrow <- function(file,
+                           quote = '"',
+                           escape_double = TRUE,
+                           escape_backslash = FALSE,
+                           # col_names = TRUE,
+                           # col_types = TRUE,
+                           col_select = NULL,
+                           # na = c("", "NA"),
+                           # quoted_na = TRUE,
+                           skip_empty_rows = TRUE,
+                           # skip = 0L,
+                           parse_options = NULL,
+                           convert_options = NULL,
+                           read_options = csv_read_options(),
+                           as_tibble = TRUE) {
+
+  mc <- match.call()
+  mc$delim <- ","
+  mc[[1]] <- as.name("read_delim_arrow")
+  eval.parent(mc)
+}
+
+#' @rdname read_delim_arrow
+#' @export
+read_tsv_arrow <- function(file,
+                           quote = '"',
+                           escape_double = TRUE,
+                           escape_backslash = FALSE,
+                           # col_names = TRUE,
+                           # col_types = TRUE,
+                           col_select = NULL,
+                           # na = c("", "NA"),
+                           # quoted_na = TRUE,
+                           skip_empty_rows = TRUE,
+                           # skip = 0L,
+                           parse_options = NULL,
+                           convert_options = NULL,
+                           read_options = csv_read_options(),
+                           as_tibble = TRUE) {
+
+  mc <- match.call()
+  mc$delim <- "\t"
+  mc[[1]] <- as.name("read_delim_arrow")
+  eval.parent(mc)
+}
+
 #' @include R6.R
 
 `arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`,
diff --git a/r/man/read_csv_arrow.Rd b/r/man/read_delim_arrow.Rd
similarity index 65%
rename from r/man/read_csv_arrow.Rd
rename to r/man/read_delim_arrow.Rd
index c7315a8903f5..e1ca16f0d776 100644
--- a/r/man/read_csv_arrow.Rd
+++ b/r/man/read_delim_arrow.Rd
@@ -1,14 +1,28 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/csv.R
-\name{read_csv_arrow}
+\name{read_delim_arrow}
+\alias{read_delim_arrow}
 \alias{read_csv_arrow}
+\alias{read_tsv_arrow}
 \title{Read a CSV or other delimited file with Arrow}
 \usage{
-read_csv_arrow(file, delim = ",", quote = "\\"",
+read_delim_arrow(file, delim = ",", quote = "\\"",
   escape_double = TRUE, escape_backslash = FALSE, col_select = NULL,
   skip_empty_rows = TRUE, parse_options = NULL,
   convert_options = NULL, read_options = csv_read_options(),
   as_tibble = TRUE)
+
+read_csv_arrow(file, quote = "\\"", escape_double = TRUE,
+  escape_backslash = FALSE, col_select = NULL,
+  skip_empty_rows = TRUE, parse_options = NULL,
+  convert_options = NULL, read_options = csv_read_options(),
+  as_tibble = TRUE)
+
+read_tsv_arrow(file, quote = "\\"", escape_double = TRUE,
+  escape_backslash = FALSE, col_select = NULL,
+  skip_empty_rows = TRUE, parse_options = NULL,
+  convert_options = NULL, read_options = csv_read_options(),
+  as_tibble = TRUE)
 }
 \arguments{
 \item{file}{A character path to a local file, or an Arrow input stream}
@@ -29,15 +43,16 @@ to add special characters like \code{\\n}.}
 \item{col_select}{A \link[tidyselect:vars_select]{tidy selection specification}
 of columns, as used in \code{dplyr::select()}.}
 
-\item{skip_empty_rows}{Should blank rows be ignored altogether? i.e. If this
-option is \code{TRUE} then blank rows will not be represented at all.}
+\item{skip_empty_rows}{Should blank rows be ignored altogether? If
+\code{TRUE}, blank rows will not be represented at all. If \code{FALSE}, they will be
+filled with missings.}
 
-\item{parse_options, }{see \code{\link[=csv_parse_options]{csv_parse_options()}}. If given, this overrides any
+\item{parse_options}{see \code{\link[=csv_parse_options]{csv_parse_options()}}. If given, this overrides any
 parsing options provided in other arguments (e.g. \code{delim}, \code{quote}, etc.).}
 
-\item{convert_options, }{see \code{\link[=csv_convert_options]{csv_convert_options()}}}
+\item{convert_options}{see \code{\link[=csv_convert_options]{csv_convert_options()}}}
 
-\item{read_options, }{see \code{\link[=csv_read_options]{csv_read_options()}}}
+\item{read_options}{see \code{\link[=csv_read_options]{csv_read_options()}}}
 
 \item{as_tibble}{Should the function return a \code{data.frame} or an
 \link[=arrow__Table]{arrow::Table}?}
@@ -46,11 +61,14 @@ parsing options provided in other arguments (e.g. \code{delim}, \code{quote}, et
 A \code{data.frame}, or an \code{arrow::Table} if \code{as_tibble = FALSE}.
 }
 \description{
-This function uses the Arrow C++ CSV reader to read into a \code{data.frame}.
+These functions uses the Arrow C++ CSV reader to read into a \code{data.frame}.
 Arrow C++ options have been mapped to argument names that follow those of
 \code{\link[readr:read_delim]{readr::read_delim()}}, and \code{col_select} was inspired by \code{\link[vroom:vroom]{vroom::vroom()}}.
 }
 \details{
+\code{read_csv_arrow()} and \code{read_tsv_arrow()} are wrappers around
+\code{read_delim_arrow()} that specify a delimiter.
+
 Note that not all \code{readr} options are currently implemented here. Please file
 an issue if you encounter one that \code{arrow} should support.
 
diff --git a/r/tests/testthat/test-arrow-csv.R b/r/tests/testthat/test-arrow-csv.R
index 330e17b5a9a7..aed96387a829 100644
--- a/r/tests/testthat/test-arrow-csv.R
+++ b/r/tests/testthat/test-arrow-csv.R
@@ -50,24 +50,26 @@ test_that("read_csv_arrow(as_tibble=TRUE)", {
   expect_equivalent(iris, tab3)
 })
 
-test_that("read_csv_arrow parsing options: delim", {
+test_that("read_delim_arrow parsing options: delim", {
   tf <- tempfile()
   on.exit(unlink(tf))
 
   write.table(iris, tf, sep = "\t", row.names = FALSE)
-  tab1 <- read_csv_arrow(tf, delim = "\t")
+  tab1 <- read_tsv_arrow(tf)
+  tab2 <- read_delim_arrow(tf, delim = "\t")
+  expect_equivalent(tab1, tab2)
 
   iris$Species <- as.character(iris$Species)
   expect_equivalent(iris, tab1)
 })
 
-test_that("read_csv_arrow parsing options: quote", {
+test_that("read_delim_arrow parsing options: quote", {
   tf <- tempfile()
   on.exit(unlink(tf))
 
   df <- data.frame(a=c(1, 2), b=c("'abc'", "'def'"))
   write.table(df, sep=";", tf, row.names = FALSE, quote = FALSE)
-  tab1 <- read_csv_arrow(tf, delim = ";", quote = "'")
+  tab1 <- read_delim_arrow(tf, delim = ";", quote = "'")
 
   # Is this a problem?
   # Component “a”: target is integer64, current is numeric

From 22268d960cc2799c30bdd831868dea880dfddd00 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 26 Jun 2019 15:20:11 -0700
Subject: [PATCH 4/5] Rename man topic in pkgdown.yml

---
 r/_pkgdown.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index 1eadc75c1900..648085bb78ac 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -45,7 +45,7 @@ reference:
   - install_arrow
 - title: Reading and writing files
   contents:
-  - read_csv_arrow
+  - read_delim_arrow
   - read_json_arrow
   - read_feather
   - read_parquet

From 92b0a2788c98c7bae23c238438d8846502d55f19 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 27 Jun 2019 09:50:55 -0700
Subject: [PATCH 5/5] :rat:

---
 r/NEWS.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/r/NEWS.md b/r/NEWS.md
index 123d1d6a73a7..fa6b25a472f0 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -1,3 +1,22 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
 # arrow 0.13.0.9000
 
 Initial CRAN release of the `arrow` package. Key features include: