delegate SQL translations of case_when() and lubridate-family to dbplyr

See https://github.com/tidyverse/dbplyr/blob/master/NEWS.md#sql-translations.
hoxo-m · May 10, 2019 · b232a57 · b232a57
1 parent 5c5bb77
commit b232a57
Show file tree

Hide file tree

Showing 7 changed files with 49 additions and 96 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: dplyr.teradata
 Type: Package
 Title: A 'Teradata' Backend for 'dplyr'
-Version: 0.3.1.9000
+Version: 0.3.2
 Description: A 'Teradata' backend for 'dplyr'. It makes it possible to operate
   'Teradata' database <https://www.teradata.com/products-and-services/teradata-database/>
   in the same way as manipulating data frames with 'dplyr'.
@@ -15,7 +15,7 @@ Encoding: UTF-8
 LazyData: true
 Depends:
     dplyr (>= 0.8.0),
-    dbplyr (>= 1.2.0)
+    dbplyr (>= 1.4.0)
 Imports:
     bit64,
     DBI (>= 0.8),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# dplyr.teradata 0.3.2
+
+## Changes
+
+- Delegate SQL translations of `case_when()` and **lubridate**-family (e.g. `year()`, `month()`, `day()`) to **dbplyr**. See https://github.com/tidyverse/dbplyr/blob/master/NEWS.md#sql-translations.
+
 # dplyr.teradata 0.3.1
 
 ## Changes

diff --git a/R/translate-sql-base-teradata.R b/R/translate-sql-base-teradata.R
@@ -1,46 +1,6 @@
 #' @import dbplyr
 NULL
 
-# case when ---------------------------------------------------------------
-sql_case_when <- function(...) {
-  formulas <- list(...)
-  n <- length(formulas)
-  if (n == 0) {
-    stop("No cases provided", call. = FALSE)
-  }
-  query <- vector("list", n)
-  value <- vector("list", n)
-  for (i in seq_len(n)) {
-    f <- formulas[[i]]
-    if (length(f) != 3) {
-      stop("Case ", i, " (", f, ") is not a two-sided formula", call. = FALSE)
-    }
-    query[[i]] <- translate_sql_(list(f[[2]]))
-    value[[i]] <- translate_sql_(list(f[[3]]))
-  }
-  sql <- build_sql("CASE")
-  for (i in seq_len(n)) {
-    if (query[[i]] == "TRUE") break
-    sql <- build_sql(sql, " WHEN ", query[[i]], " THEN ", value[[i]])
-  }
-  if (query[[i]] == "TRUE") {
-    sql <- build_sql(sql, " ELSE ", value[[i]])
-  }
-  sql <- build_sql(sql, " END")
-  sql
-}
-
-# extract -----------------------------------------------------------------
-make_extract <- function(target) {
-  function(date_column) {
-    build_sql("EXTRACT(", sql(target), " FROM ", date_column, ")")
-  }
-}
-
-sql_extract <- function(target, date_column) {
-  make_extract(target)(date_column)
-}
-
 # cut ---------------------------------------------------------------------
 sql_cut <- function(x, breaks = NULL, labels = NULL,
                          include.lowest = FALSE, right = TRUE, dig.lab = 3,

diff --git a/R/zzz.R b/R/zzz.R
@@ -1,13 +1,4 @@
 .onAttach <- function(libname, pkgname) {
-  assign("case_when", sql_case_when, envir = dbplyr::base_odbc_scalar)
-  assign("extract", sql_extract, envir = dbplyr::base_odbc_scalar)
-  assign("year", make_extract("YEAR"), envir = dbplyr::base_odbc_scalar)
-  assign("month", make_extract("MONTH"), envir = dbplyr::base_odbc_scalar)
-  assign("day", make_extract("DAY"), envir = dbplyr::base_odbc_scalar)
-  assign("hour", make_extract("HOUR"), envir = dbplyr::base_odbc_scalar)
-  assign("minute", make_extract("MINUTE"), envir = dbplyr::base_odbc_scalar)
-  assign("second", make_extract("SECOND"), envir = dbplyr::base_odbc_scalar)
-  assign("as_date", dbplyr::base_odbc_scalar$as.Date, envir = dbplyr::base_odbc_scalar)
   assign("cut", sql_cut, envir = dbplyr::base_odbc_scalar)
   assign("like", sql_like, envir = dbplyr::base_odbc_scalar)
   assign("to_timestamp", sql_to_timestamp, envir = dbplyr::base_odbc_scalar)

diff --git a/README.Rmd b/README.Rmd
@@ -1,9 +1,9 @@
 ---
 output:
   md_document:
-    variant: markdown_github
+    variant: gfm
   html_document:
-    keep_md: true
+    keep_md: false
 ---
 
 <!-- README.md is generated from README.Rmd. Please edit that file -->
@@ -16,9 +16,9 @@ output:
 knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>",
-  fig.path = "README-",
-  eval = FALSE,
-  message = FALSE
+  fig.path = "man/figures/README-",
+  message = FALSE,
+  eval = FALSE
 )
 ```
 
@@ -199,6 +199,8 @@ trans <- function(x) {
 
 #### 3.2.1 **lubridate** friendly functions
 
+⚠️ This has been supported by **dbplyr** 1.4.0. See https://github.com/tidyverse/dbplyr/blob/master/NEWS.md#sql-translations.
+
 You might familiar the **lubridate** package to manipulate date and time data.
 
 **dplyr.teradata** has **lubridate** friendly functions:

diff --git a/README.md b/README.md
@@ -1,20 +1,17 @@
 <!-- README.md is generated from README.Rmd. Please edit that file -->
-A Teradata Backend for dplyr
-============================
+
+# A Teradata Backend for dplyr
 
 #### *Koji Makiyama (@hoxo\_m)*
 
 [![Travis-CI Build
 Status](https://travis-ci.org/hoxo-m/dplyr.teradata.svg?branch=master)](https://travis-ci.org/hoxo-m/dplyr.teradata)
 [![CRAN
 Version](http://www.r-pkg.org/badges/version-ago/dplyr.teradata)](https://cran.r-project.org/package=dplyr.teradata)
-[![CRAN
-Downloads](http://cranlogs.r-pkg.org/badges/dplyr.teradata)](http://cranlogs.r-pkg.org/badges/dplyr.teradata)
-[![Coverage
+<http://cranlogs.r-pkg.org/badges/dplyr.teradata> [![Coverage
 Status](https://img.shields.io/coveralls/hoxo-m/dplyr.teradata.svg)](https://coveralls.io/r/hoxo-m/dplyr.teradata?branch=master)
 
-1 Overview
-----------
+## 1 Overview
 
 The package provides a Teradata backend for **dplyr**.
 
@@ -57,16 +54,16 @@ df
 #>  3 2017-01-03 12131415
 ```
 
-2 Installation
---------------
+## 2 Installation
 
 You can install the **dplyr.teradata** package from CRAN.
 
 ``` r
 install.packages("dplyr.teradata")
 ```
 
-You can also install the development version of the package from GitHub.
+You can also install the development version of the package from
+GitHub.
 
 ``` r
 install.packages("devtools") # if you have not installed "devtools" package
@@ -75,10 +72,9 @@ devtools::install_github("hoxo-m/dplyr.teradata")
 
 The source code for **dplyr.teradata** package is available on GitHub at
 
--   <https://github.com/hoxo-m/dplyr.teradata>.
+  - <https://github.com/hoxo-m/dplyr.teradata>.
 
-3 Details
----------
+## 3 Details
 
 The package provides a Teradata backend for **dplyr**. It makes it
 possible to build SQL for [Teradata
@@ -97,7 +93,7 @@ The package uses the **odbc** package to connect database and the
 
 First, you need to establish an ODBC connection to Teradata. See:
 
--   [README - **odbc**
+  - [README - **odbc**
     package](https://CRAN.R-project.org/package=odbc/readme/README.html).
 
 The **dplyr.teradata** package has special driver function `todbc()`.
@@ -111,7 +107,7 @@ con <- dbConnect(todbc(),
 
 Second, you need to specify a table to build SQL. See:
 
--   [Introduction to dbplyr •
+  - [Introduction to dbplyr •
     dbplyr](http://dbplyr.tidyverse.org/articles/dbplyr.html).
 
 To specify a table, you can use `tbl()`:
@@ -127,17 +123,19 @@ my_table <- tbl(con, "my_schema_name.my_table_name")
 Third, you build queries. It can do in the same way as manipulating data
 frames with **dplyr**:
 
--   [A Grammar of Data Manipulation •
+  - [A Grammar of Data Manipulation •
     dplyr](http://dplyr.tidyverse.org/).
 
 For example, you can use follows:
 
--   `mutate()` adds new *columns* that are functions of existing
+  - `mutate()` adds new *columns* that are functions of existing
     *columns*.
--   `select()` picks *columns* based on their names.
--   `filter()` picks cases based on their values.
--   `summarise()` reduces multiple values down to a single summary.
--   `arrange()` changes the ordering of the rows.
+  - `select()` picks *columns* based on their names.
+  - `filter()` picks cases based on their values.
+  - `summarise()` reduces multiple values down to a single summary.
+  - `arrange()` changes the ordering of the rows.
+
+<!-- end list -->
 
 ``` r
 # Build a query
@@ -191,20 +189,23 @@ For instance, `n()` is translated to `count(*)` in the above example.
 
 To know translatable functions for Teradata, refer the following:
 
--   [Adds Teradata
+  - [Adds Teradata
     translation](https://github.com/tidyverse/dbplyr/pull/43)
 
 Here, we introduce the special translatable functions that it becomes
 available by **dplyr.teradata**.
 
 #### 3.2.1 **lubridate** friendly functions
 
+⚠️ This has been supported by **dbplyr** 1.4.0. See
+<https://github.com/tidyverse/dbplyr/blob/master/NEWS.md#sql-translations>.
+
 You might familiar the **lubridate** package to manipulate date and time
 data.
 
 **dplyr.teradata** has **lubridate** friendly functions:
 
--   `year()`, `month()`, `day()`, `hour()`, `minutes()` and `second()`.
+  - `year()`, `month()`, `day()`, `hour()`, `minutes()` and `second()`.
 
 For example, you can pick year from date type column.
 
@@ -214,7 +215,7 @@ mutate(year = year(date_type_column))
 
 Such as above manipulation is translated into SQL like following:
 
-    #> <SQL> EXTRACT(YEAR FROM `date_type_column`)
+    #> <SQL> EXTRACT(year FROM `date_type_column`)
 
 #### 3.2.2 Treat Boolean
 
@@ -230,7 +231,8 @@ mutate(is_positive = bool_to_int(x > 0L))
 
     #> <SQL> CASE WHEN (`x` > 0) THEN 1 WHEN NOT(`x` > 0) THEN 0 END
 
-`count_if()` or `n_if()` counts a number of rows satisfying a condition.
+`count_if()` or `n_if()` counts a number of rows satisfying a
+    condition.
 
 ``` r
 summarize(n = count_if(x > 0L))
@@ -249,7 +251,8 @@ to timestamp, you need to write complex SQL.
 mutate(ts = to_timestamp(unixtime_column))
 ```
 
-Such as above manipulation is translated into SQL like following:
+Such as above manipulation is translated into SQL like
+    following:
 
     #> <SQL> CAST(DATE '1970-01-01' + (`unixtime_column` / 86400) AS TIMESTAMP(0)) + (`unixtime_column` MOD 86400) * (INTERVAL '00:00:01' HOUR TO SECOND)
 
@@ -322,12 +325,11 @@ blob_to_string(x)
 #> [1] "476f6f64206d6f726e696e67"
 ```
 
-4 Related work
---------------
+## 4 Related work
 
--   [A ‘dplyr’ Backend for Databases •
+  - [A ‘dplyr’ Backend for Databases •
     dbplyr](http://dbplyr.tidyverse.org/)
--   [A Teradata backend for
+  - [A Teradata backend for
     dplyr](https://github.com/xiaodaigh/teradata.dplyr)
--   [Dplyr backends: the ultimate
+  - [Dplyr backends: the ultimate
     collection](https://gist.github.com/piccolbo/3d8ac40291f4eaee644b)
diff --git a/tests/testthat/test-translate-teradata.R b/tests/testthat/test-translate-teradata.R
@@ -7,15 +7,7 @@ test_that("custom scalar translated correctly", {
   }
 
   expect_equal(trans(case_when(x == 1L ~ 1L, x == 2L ~ 2L, TRUE ~ 3L)),
-               sql('CASE WHEN "x" = 1 THEN 1 WHEN "x" = 2 THEN 2 ELSE 3 END'))
-  expect_equal(trans(extract("YEAR", x)),   sql("EXTRACT(YEAR FROM `x`)"))
-  expect_equal(trans(year(x)),              sql("EXTRACT(YEAR FROM `x`)"))
-  expect_equal(trans(month(x)),             sql("EXTRACT(MONTH FROM `x`)"))
-  expect_equal(trans(day(x)),               sql("EXTRACT(DAY FROM `x`)"))
-  expect_equal(trans(hour(x)),              sql("EXTRACT(HOUR FROM `x`)"))
-  expect_equal(trans(minute(x)),            sql("EXTRACT(MINUTE FROM `x`)"))
-  expect_equal(trans(second(x)),            sql("EXTRACT(SECOND FROM `x`)"))
-  expect_equal(trans(as_date(x)),           sql("CAST(`x` AS DATE)"))
+               sql('CASE\nWHEN (`x` = 1) THEN (1)\nWHEN (`x` = 2) THEN (2)\nELSE (3)\nEND'))
   expect_equal(trans(cut(x, 1:3)),
                sql("CASE\n WHEN x > 1 AND x <= 2 THEN '(1,2]'\n WHEN x > 2 AND x <= 3 THEN '(2,3]'\n ELSE NULL\nEND"))
   expect_equal(trans(like(x, "%pattern_")), sql("`x` LIKE '%pattern_'"))