apache · jonkeane · Jan 13, 2021
diff --git a/r/R/record-batch.R b/r/R/record-batch.R
@@ -46,9 +46,10 @@
 #' - `$column(i)`: Extract an `Array` by integer position from the batch
 #' - `$column_name(i)`: Get a column's name by integer position
 #' - `$names()`: Get all column names (called by `names(batch)`)
+#' - `$RenameColumns(value)`: Set all column names (called by `names(batch) <- value`)
 #' - `$GetColumnByName(name)`: Extract an `Array` by string name
 #' - `$RemoveColumn(i)`: Drops a column from the batch by integer position
-#' - `$selectColumns(indices)`: Return a new record batch with a selection of columns, expressed as 0-based integers.
+#' - `$SelectColumns(indices)`: Return a new record batch with a selection of columns, expressed as 0-based integers.
 #' - `$Slice(offset, length = NULL)`: Create a zero-copy view starting at the
 #'    indicated integer offset and going for the given length, or to the end
 #'    of the table if `NULL`, the default.

diff --git a/r/R/table.R b/r/R/table.R
@@ -53,6 +53,7 @@
 #'
 #' - `$column(i)`: Extract a `ChunkedArray` by integer position from the table
 #' - `$ColumnNames()`: Get all column names (called by `names(tab)`)
+#' - `$RenameColumns(value)`: Set all column names (called by `names(tab) <- value`)
 #' - `$GetColumnByName(name)`: Extract a `ChunkedArray` by string name
 #' - `$field(i)`: Extract a `Field` from the table schema by integer position
 #' - `$SelectColumns(indices)`: Return new `Table` with specified columns, expressed as 0-based integers.

diff --git a/r/README.md b/r/README.md
@@ -97,7 +97,7 @@ For the R package, you'll need to enable several features in the C++ library
 using `-D` flags:
 
 ```
-cmake
+cmake \
   -DARROW_COMPUTE=ON \
   -DARROW_CSV=ON \
   -DARROW_DATASET=ON \
@@ -106,6 +106,7 @@ cmake
   -DARROW_JSON=ON \
   -DARROW_PARQUET=ON \
   -DCMAKE_BUILD_TYPE=release \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
   ..
 ```
 
@@ -125,7 +126,6 @@ If you want to enable support for compression libraries, add some or all of thes
 Other flags that may be useful:
 
 * `-DARROW_EXTRA_ERROR_CONTEXT=ON` makes errors coming from the C++ library point to files and line numbers
-* `-DARROW_INSTALL_NAME_RPATH=OFF` may be needed on macOS if there are problems at link time
 * `-DBOOST_SOURCE=BUNDLED`, for example, or any other dependency `*_SOURCE`, if you have a system version of a C++ dependency that doesn't work correctly with Arrow. This tells the build to compile its own version of the dependency from source.
 
 Note that after any change to the C++ library, you must reinstall it and
@@ -161,8 +161,10 @@ If the package fails to install/load with an error like this:
     unable to load shared object '/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so':
     dlopen(/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so, 6): Library not loaded: @rpath/libarrow.14.dylib
 
-try setting the environment variable `R_LD_LIBRARY_PATH` to wherever
-Arrow C++ was put in `make install`, e.g. `export
+ensure that `-DARROW_INSTALL_NAME_RPATH=OFF` was passed (this is important on 
+macOS to prevent problems at link time and is a no-op on other platforms). 
+Alternativelly, try setting the environment variable `R_LD_LIBRARY_PATH` to 
+wherever Arrow C++ was put in `make install`, e.g. `export
 R_LD_LIBRARY_PATH=/usr/local/lib`, and retry installing the R package.
 
 When installing from source, if the R and C++ library versions do not

diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
@@ -85,7 +85,7 @@ reference:
 - title: C++ reader/writer interface
   contents:
   - ParquetFileReader
-  - ParquetReaderProperties
+  - ParquetArrowReaderProperties
   - ParquetFileWriter
   - ParquetWriterProperties
   - FeatherReader
@@ -143,10 +143,17 @@ reference:
   - compression
   - Codec
   - codec_is_available
+- title: Computation
+  contents:
+  - match_arrow
 - title: Configuration
   contents:
   - arrow_info
   - cpu_count
   - arrow_available
   - install_arrow
   - install_pyarrow
+
+repo:
+  url:
+    source: https://github.com/apache/arrow/blob/master/r/
diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd
diff --git a/r/man/Table.Rd b/r/man/Table.Rd
diff --git a/r/vignettes/dataset.Rmd b/r/vignettes/dataset.Rmd
@@ -131,31 +131,31 @@ ds
 ```
 ```{r, echo = FALSE, eval = !file.exists("nyc-taxi")}
 cat("
-## FileSystemDataset with 125 Parquet files
-## vendor_id: string
-## pickup_at: timestamp[us]
-## dropoff_at: timestamp[us]
-## passenger_count: int8
-## trip_distance: float
-## pickup_longitude: float
-## pickup_latitude: float
-## rate_code_id: string
-## store_and_fwd_flag: string
-## dropoff_longitude: float
-## dropoff_latitude: float
-## payment_type: string
-## fare_amount: float
-## extra: float
-## mta_tax: float
-## tip_amount: float
-## tolls_amount: float
-## total_amount: float
-## improvement_surcharge: float
-## pickup_location_id: int32
-## dropoff_location_id: int32
-## congestion_surcharge: float
-## year: int32
-## month: int32
+FileSystemDataset with 125 Parquet files
+vendor_id: string
+pickup_at: timestamp[us]
+dropoff_at: timestamp[us]
+passenger_count: int8
+trip_distance: float
+pickup_longitude: float
+pickup_latitude: float
+rate_code_id: string
+store_and_fwd_flag: string
+dropoff_longitude: float
+dropoff_latitude: float
+payment_type: string
+fare_amount: float
+extra: float
+mta_tax: float
+tip_amount: float
+tolls_amount: float
+total_amount: float
+improvement_surcharge: float
+pickup_location_id: int32
+dropoff_location_id: int32
+congestion_surcharge: float
+year: int32
+month: int32
 
 See $metadata for additional Schema metadata
 ")
@@ -212,22 +212,22 @@ system.time(ds %>%
 
 ```{r, echo = FALSE, eval = !file.exists("nyc-taxi")}
 cat("
-## # A tibble: 10 x 3
-##    passenger_count tip_pct      n
-##              <int>   <dbl>  <int>
-##  1               0    9.84    380
-##  2               1   16.7  143087
-##  3               2   16.6   34418
-##  4               3   14.4    8922
-##  5               4   11.4    4771
-##  6               5   16.7    5806
-##  7               6   16.7    3338
-##  8               7   16.7      11
-##  9               8   16.7      32
-## 10               9   16.7      42
-##
-##    user  system elapsed
-##   4.436   1.012   1.402
+# A tibble: 10 x 3
+   passenger_count tip_pct      n
+             <int>   <dbl>  <int>
+ 1               0    9.84    380
+ 2               1   16.7  143087
+ 3               2   16.6   34418
+ 4               3   14.4    8922
+ 5               4   11.4    4771
+ 6               5   16.7    5806
+ 7               6   16.7    3338
+ 8               7   16.7      11
+ 9               8   16.7      32
+10               9   16.7      42
+
+   user  system elapsed
+  4.436   1.012   1.402
 ")
 ```
 
@@ -246,14 +246,14 @@ ds %>%
 
 ```{r, echo = FALSE, eval = !file.exists("nyc-taxi")}
 cat("
-## FileSystemDataset (query)
-## tip_amount: float
-## total_amount: float
-## passenger_count: int8
-##
-## * Filter: ((total_amount > 100:double) and (year == 2015:double))
-## * Grouped by passenger_count
-## See $.data for the source Arrow object
+FileSystemDataset (query)
+tip_amount: float
+total_amount: float
+passenger_count: int8
+
+* Filter: ((total_amount > 100:double) and (year == 2015:double))
+* Grouped by passenger_count
+See $.data for the source Arrow object
 ")
 ```