/
CSWClient.R
413 lines (387 loc) · 20.6 KB
/
CSWClient.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
#' CSWClient
#'
#' @docType class
#' @export
#' @keywords OGC CSW catalogue service web
#' @return Object of \code{\link{R6Class}} with methods for interfacing an OGC
#' Catalogue Service for the Web.
#' @format \code{\link{R6Class}} object.
#'
#' @examples
#' \dontrun{
#' #example based on CSW endpoint responding at http://localhost:8000/csw
#' csw <- CSWClient$new("http://localhost:8000/csw", serviceVersion = "2.0.2")
#'
#' #get capabilities
#' caps <- csw$getCapabilities()
#'
#' #get records
#' records <- csw$getRecords()
#'
#' #get record by id
#' record <- csw$getRecordById("my-metadata-id")
#'
#' #Advanced examples at https://github.com/eblondel/ows4R/wiki#csw
#' }
#'
#' @author Emmanuel Blondel <emmanuel.blondel1@@gmail.com>
#'
CSWClient <- R6Class("CSWClient",
inherit = OWSClient,
private = list(
serviceName = "CSW"
),
public = list(
#'@description This method is used to instantiate a CSWClient with the \code{url} of the
#' OGC service. Authentication is supported using basic auth (using \code{user}/\code{pwd} arguments),
#' bearer token (using \code{token} argument), or custom (using \code{headers} argument). By default, the \code{logger}
#' argument will be set to \code{NULL} (no logger). This argument accepts two possible
#' values: \code{INFO}: to print only \pkg{ows4R} logs, \code{DEBUG}: to print more verbose logs
#'@param url url
#'@param serviceVersion CSW service version
#'@param user user
#'@param pwd password
#'@param token token
#'@param headers headers
#'@param config config
#'@param cas_url Central Authentication Service (CAS) URL
#'@param logger logger
initialize = function(url, serviceVersion = NULL,
user = NULL, pwd = NULL, token = NULL, headers = c(), config = httr::config(), cas_url = NULL,
logger = NULL) {
if(startsWith(serviceVersion, "3.0")) serviceVersion <- "3.0.0"
super$initialize(url, service = private$serviceName, serviceVersion = serviceVersion,
user = user, pwd = pwd, token = token, headers = headers, config = config, cas_url = cas_url,
logger = logger)
self$capabilities = CSWCapabilities$new(self$url, self$version,
user = user, pwd = pwd, token = token, headers = headers, config = config,
logger = logger)
self$capabilities$setClient(self)
},
#'@description Get CSW capabilities
#'@return an object of class \link{CSWCapabilities}
getCapabilities = function(){
return(self$capabilities)
},
#'@description Reloads CSW capabilities
reloadCapabilities = function(){
self$capabilities = CSWCapabilities$new(self$url, self$version,
user = self$getUser(), pwd = self$getPwd(), token = self$getToken(),
headers = self$getHeaders(), config = self$getConfig(),
logger = self$loggerType)
self$capabilities$setClient(self)
},
#'@description Describe records. Retrieves the XML schema for CSW records. By default, returns the XML schema
#' for the CSW records (http://www.opengis.net/cat/csw/2.0.2). For other schemas, specify the
#' \code{outputSchema} required, e.g. http://www.isotc211.org/2005/gmd for ISO 19115/19139 schema
#'@param namespace namespace
#'@param ... any other parameter to pass to the \link{CSWDescribeRecord} service request
#'@return the service record description
describeRecord = function(namespace, ...){
self$INFO("Fetching schema...")
operations <- self$capabilities$getOperationsMetadata()$getOperations()
op <- operations[sapply(operations,function(x){x$getName()=="DescribeRecord"})]
if(length(op)>0){
op <- op[[1]]
}else{
errorMsg <- "Operation 'DescribeRecord' not supported by this service"
self$ERROR(errorMsg)
stop(errorMsg)
}
request <- CSWDescribeRecord$new(self$capabilities, op, self$getUrl(), self$getVersion(),
user = self$getUser(), pwd = self$getPwd(), token = self$getToken(),
headers = self$getHeaders(), config = self$getConfig(),
namespace = namespace, logger = self$loggerType, ...)
#exception handling
if(request$hasException()){
return(request$getException())
}
#response handling
return(request$getResponse())
},
#'@description Get a record by Id. By default, the record will be returned following the CSW schema
#' (http://www.opengis.net/cat/csw/2.0.2). For other schemas, specify the \code{outputSchema}
#' required, e.g. http://www.isotc211.org/2005/gmd for ISO 19115/19139 records.
#' The parameter \code{elementSetName} should among values "full", "brief", "summary". The default
#' "full" corresponds to the full metadata sheet returned. "brief" and "summary" will contain only
#' a subset of the metadata content.
#' @param id record id
#' @param elementSetName element set name. Default is "full"
#' @param ... any other parameter to pass to \link{CSWGetRecordById} service request
#' @return the fetched record, \code{NULL} otherwise
getRecordById = function(id, elementSetName = "full", ...){
self$INFO(sprintf("Fetching record '%s' ...", id))
operations <- self$capabilities$getOperationsMetadata()$getOperations()
op <- operations[sapply(operations,function(x){x$getName()=="GetRecordById"})]
if(length(op)>0){
op <- op[[1]]
}else{
errorMsg <- "Operation 'GetRecordById' not supported by this service"
self$ERROR(errorMsg)
stop(errorMsg)
}
request <- CSWGetRecordById$new(self$capabilities, op, self$getUrl(), self$getVersion(),
user = self$getUser(), pwd = self$getPwd(), token = self$getToken(),
headers = self$getHeaders(), config = self$getConfig(),
id = id, elementSetName = elementSetName,
logger = self$loggerType, ...)
#exception handling
if(request$hasException()){
return(request$getException())
}
#response handling
return(request$getResponse())
},
#'@description Get records based on a query, object of class \code{CSWQuery}. The maximum number of records can be
#' set either for the full query (\code{maxRecords}) or per request (\code{maxRecordsPerRequest}, default set to 10 records)
#' considering this operation is paginated. By default, the record will be returned following the CSW schema
#' (http://www.opengis.net/cat/csw/2.0.2). For other schemas, specify the \code{outputSchema}
#' required, e.g. http://www.isotc211.org/2005/gmd for ISO 19115/19139 records.
#'@param query an object of class \link{CSWQuery}. By default, an empty query is set.
#'@param maxRecords max number of total records. Default is \code{NULL} meaning all records are returned.
#'@param maxRecordsPerRequest max number of records to return per request. Default set to 10.
#'@param ... any other parameter to be passed to \link{CSWGetRecords} service request
#'@return the list of records. By default each record will be returned as Dublin Core \code{list} object. In case
#' ISO 19115/19139 is set as \code{outputSchema}, each record will be object of class \code{ISOMetadata} from
#' \pkg{geometa}.
getRecords = function(query = CSWQuery$new(), maxRecords = NULL, maxRecordsPerRequest = 10L, ...){
self$INFO("Fetching records ...")
operations <- self$capabilities$getOperationsMetadata()$getOperations()
op <- operations[sapply(operations,function(x){x$getName()=="GetRecords"})]
if(length(op)>0){
op <- op[[1]]
}else{
errorMsg <- "Operation 'GetRecords' not supported by this service"
self$ERROR(errorMsg)
stop(errorMsg)
}
query$setServiceVersion(self$getVersion())
hasMaxRecords <- !is.null(maxRecords)
if(hasMaxRecords) if(maxRecords < maxRecordsPerRequest) maxRecordsPerRequest <- maxRecords
firstRequest <- CSWGetRecords$new(self$capabilities, op, self$getUrl(), self$getVersion(),
user = self$getUser(), pwd = self$getPwd(), token = self$getToken(),
headers = self$getHeaders(), config = self$getConfig(),
query = query, logger = self$loggerType,
maxRecords = maxRecordsPerRequest, ...)
#exception handling
if(firstRequest$hasException()){
return(firstRequest$getException())
}
#response handling
records <- firstRequest$getResponse()
numberOfRecordsMatched <- attr(records, "numberOfRecordsMatched")
numberOfRecordsMatchedSafe <- numberOfRecordsMatched
if(hasMaxRecords){
numberOfRecordsMatched <- maxRecords
if(length(records) >= maxRecords){
records <- records[1:maxRecords]
return(records)
}
}
nextRecord <- attr(records, "nextRecord")
while(nextRecord != 0L){
if(hasMaxRecords) {
if(maxRecords - length(records) < maxRecordsPerRequest){
maxRecordsPerRequest <- maxRecords - length(records)
}
}else{
if(numberOfRecordsMatched - length(records) < maxRecordsPerRequest){
maxRecordsPerRequest <- numberOfRecordsMatched - length(records)
}
}
nextRequest <- CSWGetRecords$new(self$capabilities, op, self$getUrl(), self$getVersion(),
user = self$getUser(), pwd = self$getPwd(), token = self$getToken(),
headers = self$getHeaders(), config = self$getConfig(),
query = query, logger = self$loggerType,
startPosition = nextRecord,
maxRecords = maxRecordsPerRequest, ...)
#exception handling
if(nextRequest$hasException()){
return(nextRequest$getException())
}
#response handling
nextRecords <- nextRequest$getResponse()
records <- c(records, nextRecords)
if(length(records) == numberOfRecordsMatched) break
nextRecord <- attr(nextRecords, "nextRecord")
if(nextRecord > numberOfRecordsMatchedSafe) nextRecord <- 0L
}
return(records)
},
#'@description Generic transaction method. Used for inserting, updating or deleting metadata using the transactional CSW service.
#' The \code{type} gives the type of transaction (Insert, Update, or Delete). The record
#'@param type of transaction either "Insert", "Update" or "Delete"
#'@param record the record subject of the transaction
#'@param recordProperty record property, object of class \link{CSWRecordProperty}
#'@param constraint constraint, object of class \link{CSWConstraint}
#'@param ... any other parameter to pass to \link{CSWTransaction} service request
#'@return \code{TRUE} if transaction succeeded, \code{FALSE} otherwise
transaction = function(type, record = NULL, recordProperty = NULL, constraint = NULL, ...){
self$INFO(sprintf("Transaction (%s) ...", type))
operations <- self$capabilities$getOperationsMetadata()$getOperations()
op <- operations[sapply(operations,function(x){x$getName()=="Transaction"})]
if(length(op)>0){
op <- op[[1]]
}else{
errorMsg <- "Operation 'Transaction' not supported by this service"
self$ERROR(errorMsg)
stop(errorMsg)
}
cswt_url <- self$getUrl()
#special check for Geonetwork url
if(regexpr("geonetwork",cswt_url) > 0){
cswt_url <- paste0(cswt_url, "-publication")
if(is.null(self$getUser()) || is.null(self$getPwd())){
stop("Geonetwork CSW Transaction service requires user authentication")
}
}
#transation
transaction <- CSWTransaction$new(self$capabilities, op, cswt_url, self$getVersion(), type = type,
user = self$getUser(), pwd = self$getPwd(), token = self$getToken(),
headers = self$getHeaders(), config = self$getConfig(),
record = record, recordProperty = recordProperty, constraint = constraint,
logger = self$loggerType, ...)
#exception handling
if(transaction$hasException()){
return(transaction$getException())
}
#response handling
summaryKey <- switch(type,
"Insert" = "Inserted",
"Update" = "Updated",
"Delete" = "Deleted"
)
transaction$setResult(FALSE)
if(is.null(xmlNamespaces(transaction$getResponse())$csw)){
return(transaction)
}else{
ns <- ifelse(self$getVersion()=="3.0.0", "csw30", "csw")
nsUri <- xmlNamespaces(transaction$getResponse())[[ns]]$uri
names(nsUri) <- ifelse(self$getVersion()=="3.0.0", "csw30", "csw")
result <- getNodeSet(transaction$getResponse(),paste0("//",ns,":total",summaryKey), nsUri)
if(length(result)>0){
result <- result[[1]]
if(xmlValue(result)>0) transaction$setResult(TRUE)
}
if(transaction$getResult()){
self$INFO(sprintf("Successful transaction (%s)!", type))
}
}
return(transaction)
},
#'@description Inserts a new record
#'@param record record subject of the Insertion
#'@param ... any other parameter to pass to the transaction
#'@return \code{TRUE} if insertion succeeded, \code{FALSE} otherwise
insertRecord = function(record, ...){
return(self$transaction("Insert", record = record, constraint = NULL, ...))
},
#'@description Updates an existing record
#'@param record record subject of the Insertion
#'@param recordProperty record property, object of class \link{CSWRecordProperty}
#'@param constraint constraint, object of class \link{CSWConstraint}
#'@param ... any other parameter to pass to the transaction
#'@return \code{TRUE} if update succeeded, \code{FALSE} otherwise
updateRecord = function(record = NULL, recordProperty = NULL, constraint = NULL, ...){
if(!is.null(recordProperty)) if(!is(recordProperty, "CSWRecordProperty")){
stop("The argument recordProperty should be an object of class 'CSWRecordProperty'")
}
if(!is.null(constraint)) if(!is(constraint, "CSWConstraint")){
stop("The argument constraint should be an object of class 'CSWConstraint'")
}
if(!is.null(constraint)) constraint$setServiceVersion(self$getVersion())
return(self$transaction("Update", record = record, recordProperty = recordProperty, constraint = constraint, ...))
},
#'@description Deletes an existing (set of) record(s). A constraint (object of class \code{CSWConstraint})
#' can be specified to limit the deletion to some records.
#'@param record record subject of the Insertion
#'@param constraint constraint, object of class \link{CSWConstraint}
#'@param ... any other parameter to pass to the transaction
#'@return \code{TRUE} if deletion succeeded, \code{FALSE} otherwise
deleteRecord = function(record = NULL, constraint = NULL, ...){
if(!is.null(constraint)) constraint$setServiceVersion(self$getVersion())
return(self$transaction("Delete", record = record, constraint = constraint, ...))
},
#'@description Deletes an existing record by identifier (constraint used to identify the record based on its identifier).
#'@param id record id
#'@return \code{TRUE} if deletion succeeded, \code{FALSE} otherwise
deleteRecordById = function(id){
ogcFilter = OGCFilter$new( PropertyIsEqualTo$new("apiso:Identifier", id) )
cswConstraint = CSWConstraint$new(filter = ogcFilter)
cswConstraint$setServiceVersion(self$getVersion())
return(self$deleteRecord(constraint = cswConstraint))
},
#'@description Harvests a single record from a \code{sourceUrl}, given a \code{resourceType} (by default "http://www.isotc211.org/2005/gmd").
#'@param sourceUrl source URL
#'@param resourceType resource type. Default is "http://www.isotc211.org/2005/gmd"
#'@return \code{TRUE} if harvesting succeeded, \code{FALSE} otherwise
harvestRecord = function(sourceUrl, resourceType = "http://www.isotc211.org/2005/gmd"){
operations <- self$capabilities$getOperationsMetadata()$getOperations()
op <- operations[sapply(operations,function(x){x$getName()=="Harvest"})]
if(length(op)>0){
op <- op[[1]]
}else{
errorMsg <- "Operation 'Harvest' not supported by this service"
self$ERROR(errorMsg)
stop(errorMsg)
}
self$INFO(sprintf("Harvesting '%s' ...", sourceUrl))
harvest <- CSWHarvest$new(self$capabilities, op, self$getUrl(), self$getVersion(),
user = self$getUser(), pwd = self$getPwd(), token = self$getToken(),
headers = self$getHeaders(), config = self$getConfig(),
source = sourceUrl, resourceType = resourceType, resourceFormat = "application/xml",
logger = self$loggerType)
#exception handling
if(harvest$hasException()){
return(harvest$getException())
}
#response handling
harvest$setResult(FALSE)
if(is.null(xmlNamespaces(harvest$getResponse())$csw)){
return(harvest)
}else{
ns <- ifelse(self$getVersion()=="3.0.0", "csw30", "csw")
nsUri <- xmlNamespaces(harvest$getResponse())[[ns]]$uri
names(nsUri) <- ifelse(self$getVersion()=="3.0.0", "csw30", "csw")
inserted <- getNodeSet(harvest$getResponse(),paste0("//",ns,":totalInserted"), nsUri)[[1]]
updated <- getNodeSet(harvest$getResponse(),paste0("//",ns,":totalUpdated"), nsUri)[[1]]
deleted <- getNodeSet(harvest$getResponse(),paste0("//",ns,":totalDeleted"), nsUri)[[1]]
if(xmlValue(inserted)>0 || xmlValue(updated)>0 || xmlValue(deleted)>0){
harvest$setResult(TRUE)
}
if(harvest$getResult()){
self$INFO("Successful record harvesting (%s)!")
}
}
return(harvest)
},
#'@description Harvests a CSW node (having its endpoint defined by an \code{url}). A \code{query} (object of class \code{CSWQuery}) can be
#' specificed if needed to restrain the harvesting to a subset. The \code{resourceType} defines the type of resources to be harvested
#' (by default "http://www.isotc211.org/2005/gmd")
#'@param url CSW node URL
#'@param query a CSW query, object of class \link{CSWQuery}
#'@param resourceType resource type. Default is "http://www.isotc211.org/2005/gmd"
#'@param sourceBaseUrl source base URL
#'@return an object of class \code{list} giving the number of records \code{found} and those actually \code{harvested}
harvestNode = function(url,
query = CSWQuery$new(), resourceType = "http://www.isotc211.org/2005/gmd",
sourceBaseUrl){
nodeHarvest <- NULL
csw <- CSWClient$new(url = url, serviceVersion = self$getVersion(),
user = self$getUser(), pwd = self$getPwd(), token = self$getToken(),
headers = self$getHeaders(), config = self$getConfig(),
logger = self$loggerType)
if(!is.null(csw)){
records <- csw$getRecords(query = query)
nodeHarvest$found <- length(records)
nodeHarvest$harvested <- 0
for(record in records){
sourceUrl <- paste0(sourceBaseUrl, record$identifier)
recHarvest <- self$harvestRecord(sourceUrl, resourceType = resourceType)
if(recHarvest$getResult()) nodeHarvest$harvested <- nodeHarvest$harvested+1
}
}
return(nodeHarvest)
}
)
)