Skip to content
This repository has been archived by the owner on Apr 14, 2018. It is now read-only.

Commit

Permalink
Implement sql_join
Browse files Browse the repository at this point in the history
Basically because dplyr's sql_join uses the "USING" keyword which T-SQL
doesn't support.

Needed to bring back in a lot of unexported dplyr functions to make this
work
  • Loading branch information
imanuelcostigan committed Jul 19, 2015
1 parent 91316c4 commit 775b510
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 114 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ S3method(db_query_fields,SQLServerConnection)
S3method(db_query_rows,SQLServerConnection)
S3method(db_save_query,SQLServerConnection)
S3method(head,tbl_sqlserver)
S3method(sql_join,SQLServerConnection)
S3method(sql_select,SQLServerConnection)
S3method(src_desc,src_sqlserver)
S3method(src_translate_env,src_sqlserver)
Expand Down Expand Up @@ -45,6 +46,7 @@ importFrom(dplyr,db_list_tables)
importFrom(dplyr,db_query_fields)
importFrom(dplyr,db_query_rows)
importFrom(dplyr,db_save_query)
importFrom(dplyr,sql_join)
importFrom(dplyr,sql_select)
importFrom(dplyr,src_desc)
importFrom(dplyr,src_translate_env)
Expand Down
125 changes: 65 additions & 60 deletions R/dplyr-imports.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,70 @@ db_create_indexes <- function(con, table, indexes = NULL, ...) {
}
}

common_by <- function(by = NULL, x, y) {
if (is.list(by)) return(by)

if (!is.null(by)) {
x <- names(by) %||% by
y <- unname(by)

# If x partially named, assume unnamed are the same in both tables
x[x == ""] <- y[x == ""]

return(list(x = x, y = y))
}

by <- intersect(dplyr::tbl_vars(x), dplyr::tbl_vars(y))
if (length(by) == 0) {
stop("No common variables. Please specify `by` param.", call. = FALSE)
}
message("Joining by: ", capture.output(dput(by)))

list(
x = by,
y = by
)
}

auto_names <- function(x) {
nms <- names2(x)
missing <- nms == ""
if (all(!missing)) return(nms)

deparse2 <- function(x) paste(deparse(x, 500L), collapse = "")
defaults <- vapply(x[missing], deparse2, character(1), USE.NAMES = FALSE)

nms[missing] <- defaults
nms
}

unique_names <- function(x_names, y_names, by, x_suffix = ".x", y_suffix = ".y") {
# See: https://github.com/hadley/dplyr/issues/709
common <- intersect(x_names, y_names)
if (length(common) == 0) return(NULL)

x_match <- match(common, x_names)
x_new <- x_names
x_new[x_match] <- paste0(x_names[x_match], x_suffix)

y_match <- match(common, y_names)
y_new <- y_names
y_new[y_match] <- paste0(y_names[y_match], y_suffix)

list(x = setNames(x_new, x_names), y = setNames(y_new, y_names))
}

unique_name <- local({
i <- 0

function() {
i <<- i + 1
paste0("_W", i)
}
})



# all_calls <- function(x) {
# if (!is.call(x)) return(NULL)
#
Expand All @@ -69,17 +133,6 @@ db_create_indexes <- function(con, table, indexes = NULL, ...) {
# UseMethod("auto_copy")
# }
#
# auto_names <- function(x) {
# nms <- names2(x)
# missing <- nms == ""
# if (all(!missing)) return(nms)
#
# deparse2 <- function(x) paste(deparse(x, 500L), collapse = "")
# defaults <- vapply(x[missing], deparse2, character(1), USE.NAMES = FALSE)
#
# nms[missing] <- defaults
# nms
# }
#
# base_symbols <- dplyr::sql_translator(
# pi = dplyr::sql("PI()"),
Expand Down Expand Up @@ -151,31 +204,7 @@ db_create_indexes <- function(con, table, indexes = NULL, ...) {
# list2env(l, parent = parent)
# }
#
# common_by <- function(by = NULL, x, y) {
# if (is.list(by)) return(by)
#
# if (!is.null(by)) {
# x <- names(by) %||% by
# y <- unname(by)
#
# # If x partially named, assume unnamed are the same in both tables
# x[x == ""] <- y[x == ""]
#
# return(list(x = x, y = y))
# }
#
# by <- intersect(dplyr::tbl_vars(x), dplyr::tbl_vars(y))
# if (length(by) == 0) {
# stop("No common variables. Please specify `by` param.", call. = FALSE)
# }
# message("Joining by: ", capture.output(dput(by)))
#
# list(
# x = by,
# y = by
# )
# }
#

# compact <- function(x) Filter(Negate(is.null), x)
#
# copy_env <- function(from, to = NULL, parent = parent.env(from)) {
Expand Down Expand Up @@ -308,30 +337,6 @@ db_create_indexes <- function(con, table, indexes = NULL, ...) {
# )
# }
#
# unique_name <- local({
# i <- 0
#
# function() {
# i <<- i + 1
# paste0("_W", i)
# }
# })
#
# unique_names <- function(x_names, y_names, by, x_suffix = ".x", y_suffix = ".y") {
# # See: https://github.com/hadley/dplyr/issues/709
# common <- intersect(x_names, y_names)
# if (length(common) == 0) return(NULL)
#
# x_match <- match(common, x_names)
# x_new <- x_names
# x_new[x_match] <- paste0(x_names[x_match], x_suffix)
#
# y_match <- match(common, y_names)
# y_new <- y_names
# y_new[y_match] <- paste0(y_names[y_match], y_suffix)
#
# list(x = setNames(x_new, x_names), y = setNames(y_new, y_names))
# }
#
# uses_window_fun <- function(x, tbl) {
# if (is.null(x)) return(FALSE)
Expand Down
105 changes: 51 additions & 54 deletions R/sql-backends.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,60 +118,57 @@ build_query <- function (x, top = NULL) {
dplyr::query(x$src$con, sql, vars)
}

#' @importFrom dplyr sql_join
#' @export
sql_join.SQLServerConnection <- function(con, x, y, type = "inner",
by = NULL, ...) {
join <- switch(type,
left = dplyr::sql("LEFT"),
inner = dplyr::sql("INNER"),
right = dplyr::sql("RIGHT"),
full = dplyr::sql("FULL"),
stop("Unknown join type:", type, call. = FALSE)
)

by <- common_by(by, x, y)

# Ensure tables have unique names
x_names <- auto_names(x$select)
y_names <- auto_names(y$select)
uniques <- unique_names(x_names, y_names, by$x[by$x == by$y])

if (is.null(uniques)) {
sel_vars <- c(x_names, y_names)
} else {
x <- update(x, select = setNames(x$select, uniques$x))
y <- update(y, select = setNames(y$select, uniques$y))

by$x <- unname(uniques$x[by$x])
by$y <- unname(uniques$y[by$y])

sel_vars <- unique(c(uniques$x, uniques$y))
}

xname <- unique_name()
yname <- unique_name()
on <- sql_vector(paste0(
paste0(dplyr::sql_escape_ident(con, xname), ".",
dplyr::sql_escape_ident(con, by$x)), " = ",
paste0(dplyr::sql_escape_ident(con, yname), ".",
dplyr::sql_escape_ident(con, by$y)),
collapse = " AND "), parens = TRUE)
cond <- dplyr::build_sql("ON ", on, con = con)

from <- dplyr::build_sql(
'SELECT * FROM ',
dplyr::sql_subquery(con, x$query$sql, xname), "\n\n",
join, " JOIN \n\n" ,
dplyr::sql_subquery(con, y$query$sql, yname), "\n\n",
cond, con = con
)
attr(from, "vars") <- lapply(sel_vars, as.name)

from
}

# #' @importFrom dplyr sql_join
# #' @export
# sql_join.SQLServerConnection <- function(con, x, y, type = "inner",
# by = NULL, ...) {
# join <- switch(type,
# left = dplyr::sql("LEFT"),
# inner = dplyr::sql("INNER"),
# right = dplyr::sql("RIGHT"),
# full = dplyr::sql("FULL"),
# stop("Unknown join type:", type, call. = FALSE)
# )
#
# by <- common_by(by, x, y)
#
# # Ensure tables have unique names
# x_names <- auto_names(x$select)
# y_names <- auto_names(y$select)
# uniques <- unique_names(x_names, y_names, by$x[by$x == by$y])
#
# if (is.null(uniques)) {
# sel_vars <- c(x_names, y_names)
# } else {
# x <- update(x, select = setNames(x$select, uniques$x))
# y <- update(y, select = setNames(y$select, uniques$y))
#
# by$x <- unname(uniques$x[by$x])
# by$y <- unname(uniques$y[by$y])
#
# sel_vars <- unique(c(uniques$x, uniques$y))
# }
#
# xname <- unique_name()
# yname <- unique_name()
# on <- sql_vector(paste0(
# paste0(dplyr::sql_escape_ident(con, xname), ".",
# dplyr::sql_escape_ident(con, by$x)),
# " = ",
# paste0(dplyr::sql_escape_ident(con, yname), ".",
# dplyr::sql_escape_ident(con, by$y)),
# collapse = " AND "), parens = TRUE)
# cond <- dplyr::build_sql("ON ", on, con = con)
#
# from <- dplyr::build_sql(
# 'SELECT * FROM ',
# dplyr::sql_subquery(con, x$query$sql, xname), "\n\n",
# join, " JOIN \n\n" ,
# dplyr::sql_subquery(con, y$query$sql, yname), "\n\n",
# cond, con = con
# )
# attr(from, "vars") <- lapply(sel_vars, as.name)
#
# from
# }
#

0 comments on commit 775b510

Please sign in to comment.