Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve quoting of SQL identifiers #419

Merged
merged 13 commits into from
Jul 29, 2020
18 changes: 13 additions & 5 deletions R/db-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ queries_set_fk_relations <- function(dest, fk_information) {
db_parent_tables,
parent_pk_col
),
~ glue_sql("ALTER TABLE {`..1`} ADD FOREIGN KEY ({`..2`*}) REFERENCES {`..3`} ({`..4`*}) ON DELETE CASCADE ON UPDATE CASCADE", .con = dest)
~ glue_sql("ALTER TABLE {`DBI::SQL(..1)`} ADD FOREIGN KEY ({`..2`*}) REFERENCES {`DBI::SQL(..3)`} ({`..4`*}) ON DELETE CASCADE ON UPDATE CASCADE", .con = dest)
)
} else {
return(character())
Expand All @@ -125,7 +125,7 @@ get_db_table_names <- function(dm) {
}
tibble(
table_name = src_tbls(dm),
remote_name = map_chr(dm_get_tables_impl(dm), list("ops", "x"))
remote_name = map_chr(dm_get_tables_impl(dm), dbplyr::remote_name)
)
}

Expand Down Expand Up @@ -157,10 +157,18 @@ con_from_src_or_con <- function(dest) {
if (is.src(dest)) dest$con else dest
}

repair_table_names_for_db <- function(table_names, temporary) {
repair_table_names_for_db <- function(table_names, temporary, con) {
if (temporary) {
set_names(dbplyr::ident_q(unique_db_table_name(table_names)), table_names)
# FIXME: Better logic for temporary table names
if (is_mssql(con)) {
names <- paste0("#", table_names)
} else {
names <- table_names
}
names <- unique_db_table_name(names)
} else {
set_names(dbplyr::ident_q(table_names), table_names)
names <- table_names
}
names <- set_names(names, table_names)
quote_ids(names, con)
}
76 changes: 57 additions & 19 deletions R/db-interface.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#' @param temporary Boolean variable, if `TRUE`, only temporary tables will be created.
#' These tables will vanish when disconnecting from the database.
#' @param table_names Desired names for the tables on `dest`; the names within the `dm` remain unchanged.
#' Can be `NULL`, a named character vector, a function or a one-sided formula.
#' Can be `NULL`, a named character vector, a function or a one-sided formula.
#'
#' If left `NULL` (default), the names will be determined automatically depending on the `temporary` argument:
#'
Expand All @@ -29,17 +29,25 @@
#'
#' If a function or one-sided formula, `table_names` is converted to a function
#' using [rlang::as_function()].
#' This function is called with the table names of the `dm` object
#' as the only argument, and is expected to return a character vector
#' of the same length.
#' Use `table_names = ~ dbplyr::in_schema("schema_name", .x)`
#' This function is called with the unquoted table names of the `dm` object
#' as the only argument.
#' The output of this function is processed by [DBI::dbQuoteIdentifier()],
#' that result should be a vector of identifiers of the same length
#' as the original table names.
#'
#' Use a variant of
#' `table_names = ~ DBI::SQL(dbplyr::in_schema("schema_name", .x))`
#' to specify the same schema for all tables.
#' Use `table_names = identity` with `temporary = TRUE`
#' to avoid giving temporary tables unique names.
#'
#' If a named character vector,
#' the names of this vector need to correspond to the table names in the `dm`,
#' and its values are the desired names on `dest`.
#' The value is processed by [DBI::dbQuoteIdentifier()],
#' that result should be a vector of identifiers of the same length
#' as the original table names.
#'
#' Use qualified names corresponding to your database's syntax
#' to specify e.g. database and schema for your tables.
#' @param ... Passed on to [dplyr::copy_to()], which is used on each table.
Expand Down Expand Up @@ -109,31 +117,61 @@ copy_dm_to <- function(dest, dm, ...,
}
}

# in case `table_names` was chosen by the user, check if the input makes sense:
# 1. is there one name per dm-table?
# 2. are there any duplicated table names?
# 3. is it a named character or ident_q vector with the correct names?
if (is_null(table_names)) {
table_names <- repair_table_names_for_db(src_tbls(dm), temporary)
dest <- src_from_src_or_con(dest)
src_names <- src_tbls(dm)

if (is_db(dest)) {
dest_con <- con_from_src_or_con(dest)

# in case `table_names` was chosen by the user, check if the input makes sense:
# 1. is there one name per dm-table?
# 2. are there any duplicated table names?
# 3. is it a named character or ident_q vector with the correct names?
if (is.null(table_names)) {
table_names_out <- repair_table_names_for_db(src_names, temporary, dest_con)

# https://github.com/tidyverse/dbplyr/issues/487
if (is_mssql(dest)) {
temporary <- FALSE
}
} else {
if (is_function(table_names) || is_bare_formula(table_names)) {
table_name_fun <- as_function(table_names)
table_names_out <- set_names(table_name_fun(src_names), src_names)
} else {
table_names_out <- table_names
}
check_naming(names(table_names_out), src_names)

table_names_out <- unclass(DBI::dbQuoteIdentifier(dest_con, table_names_out[src_names]))
# names(table_names_out) <- src_names
}

# create `ident`-class objects from the table names
table_names_out <- map(table_names_out, dbplyr::ident_q)
} else {
if (is_function(table_names) || is_bare_formula(table_names)) {
table_name_fun <- as_function(table_names)
table_names <- set_names(table_name_fun(src_tbls(dm)), src_tbls(dm))
# FIXME: Other data sources than local and database possible
if (!is.null(table_names)) {
lifecycle::deprecate_soft(
"0.1.6", "copy_dm_to(table_names = 'must be NULL if copying to a local source')"
)
}
check_naming(names(table_names), src_tbls(dm))
# add the schema and create an `ident`-class object from the table names
table_names <- dbplyr::ident_q(table_names[src_tbls(dm)])
table_names_out <- set_names(src_names)
}

check_not_zoomed(dm)

# FIXME: if same_src(), can use compute() but need to set NOT NULL
# constraints

dest <- src_from_src_or_con(dest)
dm <- collect(dm)

copy_data <- build_copy_data(dm, dest, table_names)
# Shortcut necessary to avoid copying into .GlobalEnv
if (!is_db(dest)) {
return(dm)
}

copy_data <- build_copy_data(dm, dest, table_names_out)

new_tables <- copy_list_of_tables_to(
dest,
Expand Down
18 changes: 14 additions & 4 deletions R/dm-from-src.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ dm_from_src <- function(src = NULL, table_names = NULL, learn_keys = NULL,
}
# both DBI-Connection and {dplyr}-src object are accepted
src <- src_from_src_or_con(src)
con <- con_from_src_or_con(src)

if (is.null(learn_keys) || isTRUE(learn_keys)) {
dm_learned <- dm_learn_from_db(src, ...)
Expand Down Expand Up @@ -76,14 +77,15 @@ dm_from_src <- function(src = NULL, table_names = NULL, learn_keys = NULL,
}
}

src_tbl_names <- unique(src_tbls(src))
if (!is_null(table_names)) {
if (is_null(table_names)) {
src_tbl_names <- unique(src_tbls(src))
} else {
src_tbl_names <- table_names
}

tbls <-
src_tbl_names %>%
set_names() %>%
set_names(src_tbl_names) %>%
quote_ids(con) %>%
map(possibly(tbl, NULL), src = src)

bad <- map_lgl(tbls, is_null)
Expand All @@ -99,6 +101,14 @@ dm_from_src <- function(src = NULL, table_names = NULL, learn_keys = NULL,
new_dm(tbls)
}

quote_ids <- function(x, con) {
if (is.null(con)) return(x)

map(
x,
~ dbplyr::ident_q(dbplyr::build_sql(dbplyr::ident(.x), con = con))
)
}

# Errors ------------------------------------------------------------------

Expand Down
13 changes: 8 additions & 5 deletions R/learn.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,17 @@ dm_learn_from_db <- function(dest, ...) {
dbGetQuery(con, sql) %>%
as_tibble()
if (nrow(overview) == 0) {
return(NULL)
} else {
overview <- arrange(overview, table)
return()
}

table_names <- overview %>%
table_names <-
overview %>%
arrange(table) %>%
select(schema, table) %>%
transmute(name = table, value = schema_if(schema, table)) %>%
transmute(
name = table,
value = schema_if(schema, DBI::dbQuoteIdentifier(con, table))
) %>%
deframe()

# FIXME: Use tbl_sql(vars = ...)
Expand Down
16 changes: 12 additions & 4 deletions man/copy_dm_to.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 39 additions & 13 deletions tests/testthat/test-db-interface.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,38 @@ test_that("data source found", {

skip_if_not_installed("dbplyr")

# ensure that we have one DB and one local `src`
if (inherits(my_test_src(), "src_dbi")) {
remote_test_src <- my_test_src()
local_test_src <- default_local_src()
} else {
remote_test_src <- sqlite()
local_test_src <- my_test_src()
}

test_that("copy_dm_to() copies data frames to databases", {
skip_if_local_src()

expect_equivalent_dm(
copy_dm_to(remote_test_src, collect(dm_for_filter())),
collect(dm_for_filter())
copy_dm_to(my_test_src(), collect(dm_for_filter())),
dm_for_filter()
)

# FIXME: How to test writing permanent tables without and be sure they are removed at the end independent what 'my_test_src()' is?
})

test_that("copy_dm_to() copies data frames from databases", {
test_that("copy_dm_to() copies data frames from any source", {
expect_equivalent_dm(
copy_dm_to(local_test_src, dm_for_filter_sqlite()),
copy_dm_to(default_local_src(), dm_for_filter()),
dm_for_filter()
)
})

test_that("copy_dm_to() copies to SQLite", {
skip_if_not_installed("RSQLite")

expect_equivalent_dm(
copy_dm_to(test_src_sqlite(), dm_for_filter()),
dm_for_filter()
)
})

test_that("copy_dm_to() copies from SQLite", {
skip_if_not_installed("RSQLite")

expect_equivalent_dm(
copy_dm_to(my_test_src(), dm_for_filter_sqlite()),
dm_for_filter_sqlite()
)
})
Expand Down Expand Up @@ -67,3 +78,18 @@ test_repair_table_names_for_db <- function(table_names, temporary) {
}
)
}

test_that("table identifiers are quoted", {
skip_if_local_src()

# Implicitly created with copy_dm_to()
dm <- dm_test_obj()
remote_names <-
dm %>%
dm_get_tables() %>%
map_chr(dbplyr::remote_name)

con <- dm_get_con(dm)
pattern <- unclass(DBI::dbQuoteIdentifier(con, "[a-z0-9_#]+"))
expect_true(all(grepl(pattern, remote_names)))
})
25 changes: 25 additions & 0 deletions tests/testthat/test-dm-from-src.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
test_that("table identifiers are quoted", {
skip_if_local_src()

dm <- dm_from_src(my_test_src())
remote_names <-
dm %>%
dm_get_tables() %>%
map_chr(dbplyr::remote_name)

con <- dm_get_con(dm)
expect_equal(unname(remote_names), unclass(DBI::dbQuoteIdentifier(con, names(dm))))
})

test_that("table identifiers are quoted with learn_keys = FALSE", {
skip_if_local_src()

dm <- dm_from_src(my_test_src(), learn_keys = FALSE)
remote_names <-
dm %>%
dm_get_tables() %>%
map_chr(dbplyr::remote_name)

con <- dm_get_con(dm)
expect_equal(unname(remote_names), unclass(DBI::dbQuoteIdentifier(con, names(dm))))
})