-
-
Notifications
You must be signed in to change notification settings - Fork 29
/
objects.R
509 lines (443 loc) · 16.9 KB
/
objects.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
#' List objects in a bucket
#'
#' @param bucket bucket containing the objects
#' @param detail Set level of detail
#' @param prefix Filter results to objects whose names begin with this prefix
#' @param delimiter Use to list objects like a directory listing.
#' @details
#'
#' Columns returned by \code{detail} are:
#'
#' \itemize{
#' \item \code{summary} - name, size, updated
#' \item \code{more} - as above plus: bucket, contentType, storageClass, timeCreated
#' \item \code{full} - as above plus: id, selfLink, generation, metageneration, md5Hash, mediaLink, crc32c, etag
#' }
#'
#' \code{delimited} returns results in a directory-like mode: items will contain only objects whose names,
#' aside from the prefix, do not contain delimiter. In conjunction with the prefix filter,
#' the use of the delimiter parameter allows the list method to operate like a directory listing,
#' despite the object namespace being flat.
#' For example, if delimiter were set to \code{"/"}, then listing objects from a bucket that contains the
#' objects \code{"a/b", "a/c", "dddd", "eeee", "e/f"} would return objects \code{"dddd" and "eeee"},
#' and prefixes \code{"a/" and "e/"}.
#'
#'
#' @return A data.frame of the objects
#'
#' @family object functions
#' @export
#' @importFrom googleAuthR gar_api_generator gar_api_page
gcs_list_objects <- function(bucket = gcs_get_global_bucket(),
detail = c("summary","more","full"),
prefix = NULL,
delimiter = NULL){
detail <- match.arg(detail)
bucket <- as.bucket_name(bucket)
pars <- list(prefix = prefix,
delimiter = delimiter,
pageToken = "")
pars <- rmNullObs(pars)
lo <- gar_api_generator("https://www.googleapis.com/storage/v1/",
path_args = list(b = bucket,
o = ""),
pars_args = pars,
data_parse_function = parse_lo)
req <- gar_api_page(lo,
page_f = function(x) attr(x, "nextPageToken"),
page_method = "param",
page_arg = "pageToken")
limit_columns(my_reduce_rbind(req), detail = detail)
}
limit_columns <- function(req, detail){
if(nrow(req) == 0){
return(data.frame())
}
out_names <- switch(detail,
summary = c("name", "size", "updated"),
more = c("name", "size", "bucket", "contentType",
"timeCreated", "updated", "storageClass"),
full = TRUE)
req[,out_names]
}
## parse
parse_lo <- function(x){
nextPageToken <- x$nextPageToken
if(is.null(x$items)){
myMessage("No objects found", level = 3)
return(data.frame())
}
x <- x$items
if(is.null(x$prefixes)){
myMessage("No prefixes found", level = 2)
prefixes <- NULL
} else {
prefixes <- x$prefixes
}
x$timeCreated <- timestamp_to_r(x$timeCreated)
x$updated <- timestamp_to_r(x$updated)
x$kind <- NULL
x$size <- vapply(as.numeric(x$size), function(x) format_object_size(x, "auto"), character(1))
## extra columns for composite objects (#73)
x$componentCount <- if(is.null(x$componentCount)) NA else x$componentCount
x$contentLanguage <- if(is.null(x$contentLanguage)) NA else x$contentLanguage
attr(x, "nextPageToken") <- nextPageToken
attr(x, "prefixes") <- prefixes
attr(x, "metadata") <- x$metadata
x$metadata <- NULL
x
}
# Parse gs:// URIs to bucket and name
gcs_parse_gsurls <- function(gsurl){
assertthat::assert_that(is.character(gsurl))
if(grepl("^gs://", gsurl)){
## parse out bucket and object name
bucket <- gsub("^gs://(.+?)/(.+)$","\\1", gsurl)
obj <- gsub(paste0("^gs://",bucket,"/"), "", gsurl)
out <- list(bucket = bucket, obj = obj)
} else {
# not a gs:// URL
out <- NULL
}
out
}
#' Get an object in a bucket directly
#'
#' This retrieves an object directly.
#'
#'
#' @param object_name name of object in the bucket that will be URL encoded, or a \code{gs://} URL
#' @param bucket bucket containing the objects. Not needed if using a \code{gs://} URL
#' @param meta If TRUE then get info about the object, not the object itself
#' @param saveToDisk Specify a filename to save directly to disk
#' @param overwrite If saving to a file, whether to overwrite it
#' @param parseObject If saveToDisk is NULL, whether to parse with \code{parseFunction}
#' @param parseFunction If saveToDisk is NULL, the function that will parse the download. Defaults to \link{gcs_parse_download}
#'
#' @details
#'
#' This differs from providing downloads via a download link as you can
#' do via \link{gcs_download_url}
#'
#' \code{object_name} can use a \code{gs://} URI instead,
#' in which case it will take the bucket name from that URI and \code{bucket} argument
#' will be overridden. The URLs should be in the form \code{gs://bucket/object/name}
#'
#' By default if you want to get the object straight into an R session the parseFunction is \link{gcs_parse_download} which wraps \code{httr}'s \link[httr]{content}.
#'
#' If you want to use your own function (say to unzip the object) then supply it here. The first argument should take the downloaded object.
#'
#' @return The object, or TRUE if successfully saved to disk.
#'
#' @examples
#'
#' \dontrun{
#'
#' ## something to download
#' ## data.frame that defaults to be called "mtcars.csv"
#' gcs_upload(mtcars)
#'
#' ## get the mtcars csv from GCS, convert it to an R obj
#' gcs_get_object("mtcars.csv")
#'
#' ## get the mtcars csv from GCS, save it to disk
#' gcs_get_object("mtcars.csv", saveToDisk = "mtcars.csv")
#'
#'
#' ## default gives a warning about missing column name.
#' ## custom parse function to suppress warning
#' f <- function(object){
#' suppressWarnings(httr::content(object, encoding = "UTF-8"))
#' }
#'
#' ## get mtcars csv with custom parse function.
#' gcs_get_object("mtcars_meta.csv", parseFunction = f)
#'
#' }
#'
#' @family object functions
#' @importFrom utils URLdecode
#' @import assertthat
#' @importFrom httr write_disk
#' @importFrom googleAuthR gar_api_generator
#' @export
gcs_get_object <- function(object_name,
bucket = gcs_get_global_bucket(),
meta = FALSE,
saveToDisk = NULL,
overwrite = FALSE,
parseObject = TRUE,
parseFunction = gcs_parse_download){
assert_that(
is.string(object_name),
is.flag(meta),
is.flag(parseObject)
)
parse_gsurl <- gcs_parse_gsurls(object_name)
if(!is.null(parse_gsurl)){
object_name <- parse_gsurl$obj
bucket <- parse_gsurl$bucket
}
bucket <- as.bucket_name(bucket)
object_name <- URLencode(object_name, reserved = TRUE)
if(meta){
alt = "json"
} else {
options(googleAuthR.rawResponse = TRUE)
on.exit(options(googleAuthR.rawResponse = FALSE))
alt = "media"
}
## download directly to disk
if(!is.null(saveToDisk)){
assert_that(is.logical(overwrite))
customConfig <- list(write_disk(saveToDisk, overwrite = overwrite))
} else {
customConfig <- NULL
}
ob <- gar_api_generator("https://www.googleapis.com/storage/v1/",
path_args = list(b = bucket,
o = object_name),
pars_args = list(alt = alt),
customConfig = customConfig)
req <- ob()
if(!meta){
if(req$status_code == 404){
stop("File not found. Check object_name and if you have read permissions.
Looked for ", object_name)
}
if(!is.null(saveToDisk)){
myMessage("Saved ", URLdecode(object_name), " to ", saveToDisk,
" (",format_object_size(file.size(saveToDisk), "auto"),")",
level = 3)
out <- TRUE
} else {
message("Downloaded ", object_name)
if(parseObject){
out <- try(parseFunction(req))
if(is.error(out)){
stop("Problem parsing the object with supplied parseFunction.")
}
} else {
out <- req
}
}
} else {
out <- structure(req$content, class = "gcs_objectmeta")
}
out
}
#' Make metadata for an object
#'
#' Use this to pass to uploads in \link{gcs_upload}
#'
#' @inheritParams Object
#' @param object_name Name of the object. GCS uses this version if also set elsewhere, or a \code{gs://} URL
#'
#' @return Object metadata for uploading of class \code{gar_Object}
#' @family object functions
#' @export
gcs_metadata_object <- function(object_name = NULL,
metadata = NULL,
md5Hash = NULL,
crc32c = NULL,
contentLanguage = NULL,
contentEncoding = NULL,
contentDisposition = NULL,
cacheControl = NULL){
parse_gsurl <- gcs_parse_gsurls(object_name)
if(!is.null(parse_gsurl)){
object_name <- parse_gsurl$obj
}
object_name <- if(!is.null(object_name)) URLencode(object_name, reserved = TRUE)
out <- Object(name = object_name,
metadata = metadata,
md5Hash = md5Hash,
crc32c = crc32c,
contentLanguage = contentLanguage,
contentEncoding = contentEncoding,
contentDisposition = contentDisposition,
cacheControl = cacheControl)
out
}
#' Delete an object
#'
#' Deletes an object from a bucket
#'
#' @param object_name Object to be deleted, or a \code{gs://} URL
#' @param bucket Bucket to delete object from
#' @param generation If present, deletes a specific version.
#'
#' Default if \code{generation} is NULL is to delete the latest version.
#'
#' @return If successful, TRUE.
#' @family object functions
#' @import assertthat
#' @importFrom googleAuthR gar_api_generator
#' @export
gcs_delete_object <- function(object_name,
bucket = gcs_get_global_bucket(),
generation = NULL){
assert_that(
is.string(object_name)
)
parse_gsurl <- gcs_parse_gsurls(object_name)
if(!is.null(parse_gsurl)){
object_name <- parse_gsurl$obj
bucket <- parse_gsurl$bucket
}
bucket <- as.bucket_name(bucket)
object_name <- URLencode(object_name, reserved = TRUE)
pars <- list(generation = generation)
pars <- rmNullObs(pars)
ob <- gar_api_generator("https://www.googleapis.com/storage/v1/",
"DELETE",
path_args = list(b = bucket,
o = object_name),
pars_args = pars)
## suppress warnings of no JSON content detected
suppressWarnings(ob())
myMessage("Deleted '", object_name, "' from bucket '", bucket,"'")
TRUE
}
#' Copy an object
#'
#' Copies an object to a new destination
#'
#' @param source_object The name of the object to copy, or a \code{gs://} URL
#' @param destination_object The name of where to copy the object to, or a \code{gs://} URL
#' @param source_bucket The bucket of the source object
#' @param destination_bucket The bucket of the destination
#' @param rewriteToken Include this field (from the previous rewrite response) on each rewrite request after the first one, until the rewrite response 'done' flag is true.
#' @param destinationPredefinedAcl Apply a predefined set of access controls to the destination object. If not NULL must be one of the predefined access controls such as \code{"bucketOwnerFullControl"}
#'
#' @return If successful, a rewrite object.
#' @family object functions
#' @import assertthat
#' @importFrom googleAuthR gar_api_generator
#' @export
gcs_copy_object <- function(source_object,
destination_object,
source_bucket = gcs_get_global_bucket(),
destination_bucket = gcs_get_global_bucket(),
rewriteToken = NULL,
destinationPredefinedAcl = NULL){
assert_that(
is.string(source_object),
is.string(destination_object)
)
source_gcs <- gcs_parse_gsurls(source_object)
if(!is.null(source_gcs)){
source_object <- source_gcs$obj
source_bucket <- source_gcs$bucket
}
destination_gcs <- gcs_parse_gsurls(destination_object)
if(!is.null(destination_gcs)){
destination_object <- destination_gcs$obj
destination_bucket <- destination_gcs$bucket
}
source_bucket <- as.bucket_name(source_bucket)
destination_bucket <- as.bucket_name(destination_bucket)
source_object <- URLencode(source_object, reserved = TRUE)
destination_object <- URLencode(destination_object, reserved = TRUE)
the_url <- sprintf("https://www.googleapis.com/storage/v1/b/%s/o/%s/rewriteTo/b/%s/o/%s",
source_bucket, source_object, destination_bucket, destination_object)
pars <- NULL
if(!is.null(rewriteToken)){
pars <- list(rewriteToken = rewriteToken)
}
if(!is.null(destinationPredefinedAcl)){
assert_that(is.string(destinationPredefinedAcl))
pars <- c(pars, list(destinationPredefinedAcl = destinationPredefinedAcl))
}
ob <- gar_api_generator(the_url,
"POST",
pars_args = pars,
data_parse_function = function(x) x)
ob()
}
#' Object Object
#'
#' @details
#' An object.
#'
#' @param acl Access controls on the object
#' @param bucket The name of the bucket containing this object
#' @param cacheControl Cache-Control directive for the object data
#' @param componentCount Number of underlying components that make up this object
#' @param contentDisposition Content-Disposition of the object data
#' @param contentEncoding Content-Encoding of the object data
#' @param contentLanguage Content-Language of the object data
#' @param contentType Content-Type of the object data
#' @param crc32c CRC32c checksum, as described in RFC 4960, Appendix B; encoded using base64 in big-endian byte order
#' @param customerEncryption Metadata of customer-supplied encryption key, if the object is encrypted by such a key
#' @param etag HTTP 1
#' @param generation The content generation of this object
#' @param id The ID of the object
#' @param md5Hash MD5 hash of the data; encoded using base64
#' @param mediaLink Media download link
#' @param metadata User-provided metadata, in key/value pairs
#' @param metageneration The version of the metadata for this object at this generation
#' @param name The name of this object
#' @param owner The owner of the object
#' @param selfLink The link to this object
#' @param size Content-Length of the data in bytes
#' @param storageClass Storage class of the object
#' @param timeCreated The creation time of the object in RFC 3339 format
#' @param timeDeleted The deletion time of the object in RFC 3339 format
#' @param updated The modification time of the object metadata in RFC 3339 format
#'
#' @return Object object
#'
#' @family Object functions
#' @keywords internal
Object <- function(acl = NULL,
bucket = NULL,
cacheControl = NULL,
componentCount = NULL,
contentDisposition = NULL,
contentEncoding = NULL,
contentLanguage = NULL,
contentType = NULL,
crc32c = NULL,
customerEncryption = NULL,
etag = NULL,
generation = NULL,
id = NULL,
md5Hash = NULL,
mediaLink = NULL,
metadata = NULL,
metageneration = NULL,
name = NULL,
owner = NULL,
selfLink = NULL,
size = NULL,
storageClass = NULL,
timeCreated = NULL,
timeDeleted = NULL,
updated = NULL) {
structure(rmNullObs(list(acl = acl,
bucket = bucket,
cacheControl = cacheControl,
componentCount = componentCount,
contentDisposition = contentDisposition,
contentEncoding = contentEncoding,
contentLanguage = contentLanguage,
contentType = contentType,
crc32c = crc32c,
customerEncryption = customerEncryption,
etag = etag,
generation = generation,
id = id,
kind = "storage#object",
md5Hash = md5Hash,
mediaLink = mediaLink,
metadata = metadata,
metageneration = metageneration,
name = name,
owner = owner,
selfLink = selfLink,
size = size,
storageClass = storageClass,
timeCreated = timeCreated,
timeDeleted = timeDeleted, updated = updated)), class = "gar_Object")
}