From b232a57dd0ba642d1ad87f8a8fa5d397316574ca Mon Sep 17 00:00:00 2001 From: Koji Makiyama Date: Fri, 10 May 2019 18:03:24 +0900 Subject: [PATCH] delegate SQL translations of case_when() and lubridate-family to dbplyr See https://github.com/tidyverse/dbplyr/blob/master/NEWS.md#sql-translations. --- DESCRIPTION | 4 +- NEWS.md | 6 +++ R/translate-sql-base-teradata.R | 40 --------------- R/zzz.R | 9 ---- README.Rmd | 12 +++-- README.md | 64 ++++++++++++------------ tests/testthat/test-translate-teradata.R | 10 +--- 7 files changed, 49 insertions(+), 96 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3047479..010283b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dplyr.teradata Type: Package Title: A 'Teradata' Backend for 'dplyr' -Version: 0.3.1.9000 +Version: 0.3.2 Description: A 'Teradata' backend for 'dplyr'. It makes it possible to operate 'Teradata' database in the same way as manipulating data frames with 'dplyr'. @@ -15,7 +15,7 @@ Encoding: UTF-8 LazyData: true Depends: dplyr (>= 0.8.0), - dbplyr (>= 1.2.0) + dbplyr (>= 1.4.0) Imports: bit64, DBI (>= 0.8), diff --git a/NEWS.md b/NEWS.md index 2411e21..78b375f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# dplyr.teradata 0.3.2 + +## Changes + +- Delegate SQL translations of `case_when()` and **lubridate**-family (e.g. `year()`, `month()`, `day()`) to **dbplyr**. See https://github.com/tidyverse/dbplyr/blob/master/NEWS.md#sql-translations. + # dplyr.teradata 0.3.1 ## Changes diff --git a/R/translate-sql-base-teradata.R b/R/translate-sql-base-teradata.R index 73d2d25..1a52d6e 100644 --- a/R/translate-sql-base-teradata.R +++ b/R/translate-sql-base-teradata.R @@ -1,46 +1,6 @@ #' @import dbplyr NULL -# case when --------------------------------------------------------------- -sql_case_when <- function(...) { - formulas <- list(...) - n <- length(formulas) - if (n == 0) { - stop("No cases provided", call. = FALSE) - } - query <- vector("list", n) - value <- vector("list", n) - for (i in seq_len(n)) { - f <- formulas[[i]] - if (length(f) != 3) { - stop("Case ", i, " (", f, ") is not a two-sided formula", call. = FALSE) - } - query[[i]] <- translate_sql_(list(f[[2]])) - value[[i]] <- translate_sql_(list(f[[3]])) - } - sql <- build_sql("CASE") - for (i in seq_len(n)) { - if (query[[i]] == "TRUE") break - sql <- build_sql(sql, " WHEN ", query[[i]], " THEN ", value[[i]]) - } - if (query[[i]] == "TRUE") { - sql <- build_sql(sql, " ELSE ", value[[i]]) - } - sql <- build_sql(sql, " END") - sql -} - -# extract ----------------------------------------------------------------- -make_extract <- function(target) { - function(date_column) { - build_sql("EXTRACT(", sql(target), " FROM ", date_column, ")") - } -} - -sql_extract <- function(target, date_column) { - make_extract(target)(date_column) -} - # cut --------------------------------------------------------------------- sql_cut <- function(x, breaks = NULL, labels = NULL, include.lowest = FALSE, right = TRUE, dig.lab = 3, diff --git a/R/zzz.R b/R/zzz.R index 9d1a3e4..a3ce7f9 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,13 +1,4 @@ .onAttach <- function(libname, pkgname) { - assign("case_when", sql_case_when, envir = dbplyr::base_odbc_scalar) - assign("extract", sql_extract, envir = dbplyr::base_odbc_scalar) - assign("year", make_extract("YEAR"), envir = dbplyr::base_odbc_scalar) - assign("month", make_extract("MONTH"), envir = dbplyr::base_odbc_scalar) - assign("day", make_extract("DAY"), envir = dbplyr::base_odbc_scalar) - assign("hour", make_extract("HOUR"), envir = dbplyr::base_odbc_scalar) - assign("minute", make_extract("MINUTE"), envir = dbplyr::base_odbc_scalar) - assign("second", make_extract("SECOND"), envir = dbplyr::base_odbc_scalar) - assign("as_date", dbplyr::base_odbc_scalar$as.Date, envir = dbplyr::base_odbc_scalar) assign("cut", sql_cut, envir = dbplyr::base_odbc_scalar) assign("like", sql_like, envir = dbplyr::base_odbc_scalar) assign("to_timestamp", sql_to_timestamp, envir = dbplyr::base_odbc_scalar) diff --git a/README.Rmd b/README.Rmd index 5e6cff1..b41f77d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,9 +1,9 @@ --- output: md_document: - variant: markdown_github + variant: gfm html_document: - keep_md: true + keep_md: false --- @@ -16,9 +16,9 @@ output: knitr::opts_chunk$set( collapse = TRUE, comment = "#>", - fig.path = "README-", - eval = FALSE, - message = FALSE + fig.path = "man/figures/README-", + message = FALSE, + eval = FALSE ) ``` @@ -199,6 +199,8 @@ trans <- function(x) { #### 3.2.1 **lubridate** friendly functions +⚠️ This has been supported by **dbplyr** 1.4.0. See https://github.com/tidyverse/dbplyr/blob/master/NEWS.md#sql-translations. + You might familiar the **lubridate** package to manipulate date and time data. **dplyr.teradata** has **lubridate** friendly functions: diff --git a/README.md b/README.md index 7812b59..2db5e7c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -A Teradata Backend for dplyr -============================ + +# A Teradata Backend for dplyr #### *Koji Makiyama (@hoxo\_m)* @@ -8,13 +8,10 @@ A Teradata Backend for dplyr Status](https://travis-ci.org/hoxo-m/dplyr.teradata.svg?branch=master)](https://travis-ci.org/hoxo-m/dplyr.teradata) [![CRAN Version](http://www.r-pkg.org/badges/version-ago/dplyr.teradata)](https://cran.r-project.org/package=dplyr.teradata) -[![CRAN -Downloads](http://cranlogs.r-pkg.org/badges/dplyr.teradata)](http://cranlogs.r-pkg.org/badges/dplyr.teradata) -[![Coverage + [![Coverage Status](https://img.shields.io/coveralls/hoxo-m/dplyr.teradata.svg)](https://coveralls.io/r/hoxo-m/dplyr.teradata?branch=master) -1 Overview ----------- +## 1 Overview The package provides a Teradata backend for **dplyr**. @@ -57,8 +54,7 @@ df #> 3 2017-01-03 12131415 ``` -2 Installation --------------- +## 2 Installation You can install the **dplyr.teradata** package from CRAN. @@ -66,7 +62,8 @@ You can install the **dplyr.teradata** package from CRAN. install.packages("dplyr.teradata") ``` -You can also install the development version of the package from GitHub. +You can also install the development version of the package from +GitHub. ``` r install.packages("devtools") # if you have not installed "devtools" package @@ -75,10 +72,9 @@ devtools::install_github("hoxo-m/dplyr.teradata") The source code for **dplyr.teradata** package is available on GitHub at -- . + - . -3 Details ---------- +## 3 Details The package provides a Teradata backend for **dplyr**. It makes it possible to build SQL for [Teradata @@ -97,7 +93,7 @@ The package uses the **odbc** package to connect database and the First, you need to establish an ODBC connection to Teradata. See: -- [README - **odbc** + - [README - **odbc** package](https://CRAN.R-project.org/package=odbc/readme/README.html). The **dplyr.teradata** package has special driver function `todbc()`. @@ -111,7 +107,7 @@ con <- dbConnect(todbc(), Second, you need to specify a table to build SQL. See: -- [Introduction to dbplyr • + - [Introduction to dbplyr • dbplyr](http://dbplyr.tidyverse.org/articles/dbplyr.html). To specify a table, you can use `tbl()`: @@ -127,17 +123,19 @@ my_table <- tbl(con, "my_schema_name.my_table_name") Third, you build queries. It can do in the same way as manipulating data frames with **dplyr**: -- [A Grammar of Data Manipulation • + - [A Grammar of Data Manipulation • dplyr](http://dplyr.tidyverse.org/). For example, you can use follows: -- `mutate()` adds new *columns* that are functions of existing + - `mutate()` adds new *columns* that are functions of existing *columns*. -- `select()` picks *columns* based on their names. -- `filter()` picks cases based on their values. -- `summarise()` reduces multiple values down to a single summary. -- `arrange()` changes the ordering of the rows. + - `select()` picks *columns* based on their names. + - `filter()` picks cases based on their values. + - `summarise()` reduces multiple values down to a single summary. + - `arrange()` changes the ordering of the rows. + + ``` r # Build a query @@ -191,7 +189,7 @@ For instance, `n()` is translated to `count(*)` in the above example. To know translatable functions for Teradata, refer the following: -- [Adds Teradata + - [Adds Teradata translation](https://github.com/tidyverse/dbplyr/pull/43) Here, we introduce the special translatable functions that it becomes @@ -199,12 +197,15 @@ available by **dplyr.teradata**. #### 3.2.1 **lubridate** friendly functions +⚠️ This has been supported by **dbplyr** 1.4.0. See +. + You might familiar the **lubridate** package to manipulate date and time data. **dplyr.teradata** has **lubridate** friendly functions: -- `year()`, `month()`, `day()`, `hour()`, `minutes()` and `second()`. + - `year()`, `month()`, `day()`, `hour()`, `minutes()` and `second()`. For example, you can pick year from date type column. @@ -214,7 +215,7 @@ mutate(year = year(date_type_column)) Such as above manipulation is translated into SQL like following: - #> EXTRACT(YEAR FROM `date_type_column`) + #> EXTRACT(year FROM `date_type_column`) #### 3.2.2 Treat Boolean @@ -230,7 +231,8 @@ mutate(is_positive = bool_to_int(x > 0L)) #> CASE WHEN (`x` > 0) THEN 1 WHEN NOT(`x` > 0) THEN 0 END -`count_if()` or `n_if()` counts a number of rows satisfying a condition. +`count_if()` or `n_if()` counts a number of rows satisfying a + condition. ``` r summarize(n = count_if(x > 0L)) @@ -249,7 +251,8 @@ to timestamp, you need to write complex SQL. mutate(ts = to_timestamp(unixtime_column)) ``` -Such as above manipulation is translated into SQL like following: +Such as above manipulation is translated into SQL like + following: #> CAST(DATE '1970-01-01' + (`unixtime_column` / 86400) AS TIMESTAMP(0)) + (`unixtime_column` MOD 86400) * (INTERVAL '00:00:01' HOUR TO SECOND) @@ -322,12 +325,11 @@ blob_to_string(x) #> [1] "476f6f64206d6f726e696e67" ``` -4 Related work --------------- +## 4 Related work -- [A ‘dplyr’ Backend for Databases • + - [A ‘dplyr’ Backend for Databases • dbplyr](http://dbplyr.tidyverse.org/) -- [A Teradata backend for + - [A Teradata backend for dplyr](https://github.com/xiaodaigh/teradata.dplyr) -- [Dplyr backends: the ultimate + - [Dplyr backends: the ultimate collection](https://gist.github.com/piccolbo/3d8ac40291f4eaee644b) diff --git a/tests/testthat/test-translate-teradata.R b/tests/testthat/test-translate-teradata.R index 082d964..18bbcb1 100644 --- a/tests/testthat/test-translate-teradata.R +++ b/tests/testthat/test-translate-teradata.R @@ -7,15 +7,7 @@ test_that("custom scalar translated correctly", { } expect_equal(trans(case_when(x == 1L ~ 1L, x == 2L ~ 2L, TRUE ~ 3L)), - sql('CASE WHEN "x" = 1 THEN 1 WHEN "x" = 2 THEN 2 ELSE 3 END')) - expect_equal(trans(extract("YEAR", x)), sql("EXTRACT(YEAR FROM `x`)")) - expect_equal(trans(year(x)), sql("EXTRACT(YEAR FROM `x`)")) - expect_equal(trans(month(x)), sql("EXTRACT(MONTH FROM `x`)")) - expect_equal(trans(day(x)), sql("EXTRACT(DAY FROM `x`)")) - expect_equal(trans(hour(x)), sql("EXTRACT(HOUR FROM `x`)")) - expect_equal(trans(minute(x)), sql("EXTRACT(MINUTE FROM `x`)")) - expect_equal(trans(second(x)), sql("EXTRACT(SECOND FROM `x`)")) - expect_equal(trans(as_date(x)), sql("CAST(`x` AS DATE)")) + sql('CASE\nWHEN (`x` = 1) THEN (1)\nWHEN (`x` = 2) THEN (2)\nELSE (3)\nEND')) expect_equal(trans(cut(x, 1:3)), sql("CASE\n WHEN x > 1 AND x <= 2 THEN '(1,2]'\n WHEN x > 2 AND x <= 3 THEN '(2,3]'\n ELSE NULL\nEND")) expect_equal(trans(like(x, "%pattern_")), sql("`x` LIKE '%pattern_'"))