-
Notifications
You must be signed in to change notification settings - Fork 2
/
duckdb_config.R
151 lines (132 loc) · 5.77 KB
/
duckdb_config.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# FIXME: should we expose a generic interface for setting any pragma listed in
# https://duckdb.org/docs/sql/configuration.html
duckdb_set <- function(x, conn = cached_connection()) {
if(!is.null(x)) {
name <- deparse(substitute(x))
cmd <- paste0("SET ", name, "='", x, "';")
DBI::dbExecute(conn, cmd)
}
}
#' Configure S3 settings for database connection
#'
#' This function is used to configure S3 settings for a database connection.
#' It allows you to set various S3-related parameters such as access key,
#' secret access key, endpoint, region, session token, uploader settings,
#' URL compatibility mode, URL style, and SSL usage.
#'
#' @param conn A database connection object created using the
#' \code{cache_connection} function (default: \code{cache_connection()}).
#' @param s3_access_key_id The S3 access key ID (default: \code{NULL}).
#' @param s3_secret_access_key The S3 secret access key (default: \code{NULL}).
#' @param s3_endpoint The S3 endpoint (default: \code{NULL}).
#' @param s3_region The S3 region (default: \code{NULL}).
#' @param s3_session_token The S3 session token (default: \code{NULL}).
#' @param s3_uploader_max_filesize The maximum filesize for S3 uploader
#' (between 50GB and 5TB, default 800GB).
#' @param s3_uploader_max_parts_per_file The maximum number of parts per file
#' for S3 uploader (between 1 and 10000, default 10000).
#' @param s3_uploader_thread_limit The thread limit for S3 uploader
#' (default: 50).
#' @param s3_url_compatibility_mode Disable Globs and Query Parameters on
#' S3 URLs (default: 0, allows globs/queries).
#' @param s3_url_style The style of S3 URLs to use. Default is
#' "vhost" unless s3_endpoint is set, which makes default "path"
#' (i.e. MINIO systems).
#' @param s3_use_ssl Enable or disable SSL for S3 connections
#' (default: 1 (TRUE)).
#' @param anonymous request anonymous access (sets `s3_access_key_id` and
#' `s3_secret_access_key` to `""`, allowing anonymous access to public buckets).
#' @details see <https://duckdb.org/docs/sql/configuration.html>
#' @return Returns silently (NULL) if successful.
#'
#' @examplesIf interactive()
#' # Configure S3 settings
#' duckdb_s3_config(
#' s3_access_key_id = "YOUR_ACCESS_KEY_ID",
#' s3_secret_access_key = "YOUR_SECRET_ACCESS_KEY",
#' s3_endpoint = "YOUR_S3_ENDPOINT",
#' s3_region = "YOUR_S3_REGION",
#' s3_uploader_max_filesize = "800GB",
#' s3_uploader_max_parts_per_file = 100,
#' s3_uploader_thread_limit = 8,
#' s3_url_compatibility_mode = FALSE,
#' s3_url_style = "vhost",
#' s3_use_ssl = TRUE,
#' anonymous = TRUE)
#'
#' @export
duckdb_s3_config <- function(conn = cached_connection(),
s3_access_key_id = NULL,
s3_secret_access_key = NULL,
s3_endpoint = NULL,
s3_region = NULL,
s3_session_token = NULL,
s3_uploader_max_filesize = NULL,
s3_uploader_max_parts_per_file = NULL,
s3_uploader_thread_limit = NULL,
s3_url_compatibility_mode = NULL,
s3_url_style = NULL,
s3_use_ssl = NULL,
anonymous = NULL) {
if (!is.null(s3_endpoint) && is.null(s3_url_style)) {
s3_url_style <- "path"
}
if(!is.null(s3_endpoint))
s3_endpoint <- gsub("^http[s]://", "", s3_endpoint)
if(!is.null(anonymous)){
if(!is.null(s3_access_key_id) || !is.null(s3_secret_access_key))
warning(paste("access keys provided when anonymous access requested.\n",
"keys will be ignored"))
s3_access_key_id <- ""
s3_secret_access_key <- ""
}
load_httpfs(conn)
duckdb_set(s3_access_key_id, conn = conn)
duckdb_set(s3_secret_access_key, conn = conn)
duckdb_set(s3_endpoint, conn = conn)
duckdb_set(s3_region, conn = conn)
duckdb_set(s3_secret_access_key, conn = conn)
duckdb_set(s3_session_token, conn = conn)
duckdb_set(s3_uploader_max_filesize, conn = conn)
duckdb_set(s3_uploader_max_parts_per_file, conn = conn)
duckdb_set(s3_uploader_thread_limit, conn = conn)
duckdb_set(s3_url_compatibility_mode, conn = conn)
duckdb_set(s3_url_style, conn = conn)
duckdb_set(s3_use_ssl, conn = conn)
}
load_httpfs <- function(conn = cached_connection()) {
status <- DBI::dbExecute(conn, "INSTALL 'httpfs';")
status <- DBI::dbExecute(conn, "LOAD 'httpfs';")
invisible(status)
}
enable_parallel <- function(conn = cached_connection(),
duckdb_cores = parallel::detectCores()){
status <- DBI::dbExecute(conn, paste0("PRAGMA threads=", duckdb_cores))
invisible(status)
}
#' load the duckdb geospatial data plugin
#'
#' @inheritParams duckdb_s3_config
#' @param nightly should we use the nightly version or not?
#' default FALSE, configurable as `duckdbfs_use_nightly` option.
#' @return loads the extension and returns status invisibly.
#' @references <https://duckdb.org/docs/extensions/spatial.html>
#' @export
load_spatial <- function(conn = cached_connection(),
nightly=getOption("duckdbfs_use_nightly", FALSE)) {
if (nightly) {
status <- DBI::dbExecute(conn,
paste0("FORCE INSTALL 'spatial'",
" FROM 'http://nightly-extensions.duckdb.org'",
";"))
} else {
status <- DBI::dbExecute(conn,
paste0("INSTALL 'spatial';"))
}
status <- DBI::dbExecute(conn, "LOAD 'spatial';")
invisible(status)
}
duckdb_extensions <- function(conn = cached_connection()) {
query <- "SELECT * FROM duckdb_extensions();"
DBI::dbGetQuery(conn, query)
}