diff --git a/pom.xml b/pom.xml index 05a8697d..fe061fd1 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ 4.0.0 ingrid-iplug-csw-dsc - 4.0.1-SNAPSHOT + 4.0.0.1-SNAPSHOT jar InGrid iPlug-csw-dsc diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 5521c73a..8f86e5c0 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -28,6 +28,12 @@ Joachim Müller + + + Cannot consume documents with gmd:series element and resulting multiple gmd:fileIdentifier Elements. + + + Updated codelist. diff --git a/src/main/java/de/ingrid/iplug/csw/dsc/cache/impl/AbstractUpdateStrategy.java b/src/main/java/de/ingrid/iplug/csw/dsc/cache/impl/AbstractUpdateStrategy.java index 51f68c4e..9c6e54de 100644 --- a/src/main/java/de/ingrid/iplug/csw/dsc/cache/impl/AbstractUpdateStrategy.java +++ b/src/main/java/de/ingrid/iplug/csw/dsc/cache/impl/AbstractUpdateStrategy.java @@ -1,367 +1,367 @@ -/* +/* * **************************************************- * ingrid-iplug-csw-dsc:war * ================================================== * Copyright (C) 2014 - 2016 wemove digital solutions GmbH * ================================================== - * Licensed under the EUPL, Version 1.1 or – as soon they will be - * approved by the European Commission - subsequent versions of the - * EUPL (the "Licence"); - * - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * - * http://ec.europa.eu/idabc/eupl5 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and + * Licensed under the EUPL, Version 1.1 or – as soon they will be + * approved by the European Commission - subsequent versions of the + * EUPL (the "Licence"); + * + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * + * http://ec.europa.eu/idabc/eupl5 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and * limitations under the Licence. - * **************************************************# - */ -/* - * Copyright (c) 2009 wemove digital solutions. All rights reserved. - */ - -package de.ingrid.iplug.csw.dsc.cache.impl; - -import java.io.StringReader; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Set; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; - -import org.apache.commons.lang.exception.ExceptionUtils; -import org.apache.commons.logging.Log; -import org.springframework.beans.factory.annotation.Autowired; -import org.w3c.dom.Document; -import org.xml.sax.InputSource; - -import de.ingrid.admin.elasticsearch.StatusProvider; -import de.ingrid.admin.elasticsearch.StatusProvider.Classification; -import de.ingrid.iplug.csw.dsc.CswDscSearchPlug; -import de.ingrid.iplug.csw.dsc.cache.Cache; -import de.ingrid.iplug.csw.dsc.cache.ExecutionContext; -import de.ingrid.iplug.csw.dsc.cache.UpdateStrategy; -import de.ingrid.iplug.csw.dsc.cswclient.CSWClient; -import de.ingrid.iplug.csw.dsc.cswclient.CSWFactory; -import de.ingrid.iplug.csw.dsc.cswclient.CSWQuery; -import de.ingrid.iplug.csw.dsc.cswclient.CSWRecord; -import de.ingrid.iplug.csw.dsc.cswclient.CSWSearchResult; -import de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName; -import de.ingrid.iplug.csw.dsc.cswclient.constants.ResultType; -import de.ingrid.iplug.csw.dsc.tools.StringUtils; - -public abstract class AbstractUpdateStrategy implements UpdateStrategy { - - @Autowired - protected StatusProvider statusProvider; - - DocumentBuilder docBuilder = null; - - /** The time in msec the strategy pauses between different requests to the CSW server. */ - int requestPause = 1000; - - /** The default number of records the strategy requests at once during fetching of records. */ - int recordsPerCall = 10; - - - /** - * Set the time in msec the strategy pauses between requests to the CSW server. - * - * @param requestPause the requestPause to set - */ - public void setRequestPause(int requestPause) { - this.requestPause = requestPause; - } - - /** - * Set the number of records the strategy requests at once during fetching of records. - * - * @param recordsPerCall the recordsPerCall to set - */ - public void setRecordsPerCall(int recordsPerCall) { - this.recordsPerCall = recordsPerCall; - } - - - /** - * Create a filter Document from a filter string. Replace any filter - * variables. TODO: if there should be more variables, this could be done - * more generic - * - * @param filterStr - * @return Document - * @throws Exception - */ - protected Document createFilterDocument(String filterStr) throws Exception { - - ExecutionContext context = this.getExecutionContext(); - - if (this.docBuilder == null) { - DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); - docBuilder = docBuilderFactory.newDocumentBuilder(); - } - - // replace last update date variable - Pattern lastUpdateDatePattern = Pattern.compile("\\{LAST_UPDATE_DATE\\}", Pattern.MULTILINE); - Matcher matcher = lastUpdateDatePattern.matcher(filterStr); - if (matcher.find()) { - Date lastUpdateDate = context.getLastExecutionDate(); - SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd"); - filterStr = matcher.replaceAll(df.format(lastUpdateDate)); - } - - return docBuilder.parse(new InputSource(new StringReader(filterStr))); - } - - /** - * Fetch all records that satisfy the given filter using the GetRecords and - * return the ids and put them into the cache - * @note This method guarantees to query the server without a constraint, if the - * provided filter set is empty - * - * @param client The CSWClient to use - * @param elementSetName The ElementSetName of the records to fetch - * @param filterSet The filter set used to select the records - * @param doCache Determines wether to cache the record or not - * @return A list of ids of the fetched records - * @throws Exception - */ - protected List fetchRecords(CSWClient client, ElementSetName elementSetName, - Set filterSet, boolean doCache) throws Exception { - - CSWFactory factory = client.getFactory(); - Log log = this.getLog(); - - // if the filter set is empty, we add a null a least - // this causes execution of the iteration below, but - // but will not add a constraint definition to the request - if (filterSet == null) - filterSet = new HashSet(); - if (filterSet.size() == 0) - filterSet.add(null); - - // variables for complete fetch process - // int numTotal = 0; - List fetchedRecordIds = new CopyOnWriteArrayList(); - - // iterate over all filters - int filterIndex = 1; - for (Document filter : filterSet) { - if (log.isDebugEnabled()) - log.debug("Processing filter "+filterIndex+": "+ - StringUtils.nodeToString(filter).replace("\n", "")+"."); - - // variables for current fetch process (current filter) - int numRecordsTotal = 0; - int numRecordsFetched = 0; - List currentFetchedRecordIds = new ArrayList(); - - // create the query - CSWQuery query = factory.createQuery(); - query.setConstraint(filter); - query.setResultType(ResultType.RESULTS); - query.setElementSetName(elementSetName); - query.setMaxRecords(this.recordsPerCall); - query.setStartPosition(1); - - // do requests - - // do first request - - CSWSearchResult result = client.getRecords(query); - numRecordsFetched += result.getNumberOfRecords(); - numRecordsTotal = result.getNumberOfRecordsTotal(); - if (log.isInfoEnabled()) - log.info(numRecordsTotal+" record(s) from filter "+filterIndex+":"); - - if (numRecordsTotal > 0) { - - if (log.isInfoEnabled()) { - log.info("\nPARAMETERS OF FETCHING PROCESS:" + - "\nrecords per chunk (request): " + recordsPerCall + - "\ngeneral pause between requesting next chunk (msec): " + requestPause + - "\nnum retries per chunk: " + CswDscSearchPlug.conf.numRetriesPerRequest + - "\npause between retries (msec): " + CswDscSearchPlug.conf.timeBetweenRetries + - "\nmax number of lost chunks: " + CswDscSearchPlug.conf.maxNumSkippedRequests); - } - - // process - currentFetchedRecordIds.addAll(processResult(result, doCache)); - - int numSkippedRequests = 0; - String logLostRecordChunks = ""; - int numLostRecords = 0; - while (numRecordsFetched < numRecordsTotal) { - if (CswDscSearchPlug.conf.maxNumSkippedRequests > -1) { - // fetching should end when a maximum number of failures (in a row) is reached. - if (numSkippedRequests > CswDscSearchPlug.conf.maxNumSkippedRequests) { - log.error("Problems fetching records. Total number of skipped requests reached (" + CswDscSearchPlug.conf.maxNumSkippedRequests + - " requests without results). We end fetching process for this filter."); - statusProvider.addState( "ERROR_FETCH", "Error during fetch, since more than " + CswDscSearchPlug.conf.maxNumSkippedRequests + " records have been skipped.", Classification.ERROR ); - break; - } - } - - // generic pause between requests, set via spring - Thread.sleep(this.requestPause); - - String logCurrRecordChunk = ""; - try { - // prepare next request - // Just for safety: get number of last fetched records from last result, if we have a result and records. - int numLastFetch = query.getMaxRecords(); - if (result != null && (result.getNumberOfRecords() > 0)) { - numLastFetch = result.getNumberOfRecords(); - } - numRecordsFetched += numLastFetch; - statusProvider.addState( "FETCH", "Fetching record " + (numRecordsFetched-numLastFetch+1) + "-" + numRecordsFetched + " / " + numRecordsTotal + " from " + client.getFactory().getServiceUrl() ); - - query.setStartPosition(query.getStartPosition() + numLastFetch); - - // for logging below - logCurrRecordChunk = "" + query.getStartPosition() + " - " + (query.getStartPosition() + query.getMaxRecords()); - - // do next request, if problems retry with increasing pause in between - int numRetries = 0; - while (true) { - try { - result = null; - result = client.getRecords(query); - break; - - } catch (Exception e) { - if (numRetries == CswDscSearchPlug.conf.numRetriesPerRequest) { - log.error("Retried " + numRetries + " times ! We skip records " + logCurrRecordChunk, e); - break; - } - - numRetries++; - int timeBetweenRetry = numRetries * CswDscSearchPlug.conf.timeBetweenRetries; - log.error("Error fetching records " + logCurrRecordChunk + ". We retry " + - numRetries + ". time after " + timeBetweenRetry + " msec !", e); - Thread.sleep(timeBetweenRetry); - } - } - - - // process - if (result == null || result.getNumberOfRecords() == 0) { - // no result from this query, we count the failures to check whether fetching process should be ended ! - numSkippedRequests++; - numLostRecords += query.getMaxRecords(); - logLostRecordChunks += logCurrRecordChunk + "\n"; - - } else { - currentFetchedRecordIds.addAll(processResult(result, doCache)); - } - } catch (Exception e) { - statusProvider.addState( "ERROR_FETCH_PROCESS", "Error during processing record: " + logCurrRecordChunk, Classification.ERROR ); - log.error("Error processing records " + logCurrRecordChunk); - log.error( ExceptionUtils.getStackTrace(e) ); - } - } - - if (numLostRecords > 0) { - statusProvider.addState( "ERROR_FETCH_PROCESS", "Error during fetching of record: " + logLostRecordChunks, Classification.ERROR ); - log.error("\nWe had failed GetRecords requests !!!" + - "\nThe following " + numLostRecords + " records were NOT fetched and are \"lost\":" + - "\n" + logLostRecordChunks); - } - } - - // collect record ids - fetchedRecordIds.addAll(currentFetchedRecordIds); - // numTotal += currentFetchedRecordIds.size(); - filterIndex++; - } - return fetchedRecordIds; - } - - /** - * Fetch all records from a id list using the GetRecordById and put them in the cache - * - * @param client The CSWClient to use - * @param elementSetName The ElementSetName of the records to fetch - * @param recordIds The list of ids - * @param requestPause The time between two requests in milliseconds - * @throws Exception - */ - protected void fetchRecords(CSWClient client, ElementSetName elementSetName, - List recordIds, int requestPause) throws Exception { - - CSWFactory factory = client.getFactory(); - Cache cache = this.getExecutionContext().getCache(); - Log log = this.getLog(); - - CSWQuery query = factory.createQuery(); - query.setElementSetName(elementSetName); - - int cnt = 1; - int max = recordIds.size(); - Iterator it = recordIds.iterator(); - while (it.hasNext()) { - String id = it.next(); - query.setId(id); - CSWRecord record = null; - try { - record = client.getRecordById(query); - if (log.isDebugEnabled()) - log.debug("Fetched record: "+id+" "+record.getElementSetName() + " (" + cnt + "/" + max + ")"); - cache.putRecord(record); - } catch (Exception e) { - log.error("Error fetching record '" + query.getId() + "'! Removing record from cache.", e); - cache.removeRecord(query.getId()); - recordIds.remove(id); - } - cnt++; - Thread.sleep(requestPause); - } - } - - /** - * Process a fetched search result (collect ids and cache records) - * - * @param result The search result - * @param doCache Determines wether to cache the record or not - * @return The list of ids of the fetched records - * @throws Exception - */ - private List processResult(CSWSearchResult result, boolean doCache) - throws Exception { - - Cache cache = this.getExecutionContext().getCache(); - Log log = this.getLog(); - - List fetchedRecordIds = new ArrayList(); - for (CSWRecord record : result.getRecordList()) { - String id = record.getId(); - - if (log.isDebugEnabled()) - log.debug("Fetched record: "+id+" "+record.getElementSetName()); - if (fetchedRecordIds.contains(id)) { - log.warn("Duplicated id: "+id+". Overriding previous entry."); - } - fetchedRecordIds.add(id); - - // cache only if requested - if (doCache) - cache.putRecord(record); - } - if (log.isInfoEnabled()) - log.info("Fetched "+fetchedRecordIds.size()+" of "+result.getNumberOfRecordsTotal()+ - " [starting from "+result.getQuery().getStartPosition() + "]"); - return fetchedRecordIds; - } -} + * **************************************************# + */ +/* + * Copyright (c) 2009 wemove digital solutions. All rights reserved. + */ + +package de.ingrid.iplug.csw.dsc.cache.impl; + +import java.io.StringReader; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; + +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.commons.logging.Log; +import org.springframework.beans.factory.annotation.Autowired; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; + +import de.ingrid.admin.elasticsearch.StatusProvider; +import de.ingrid.admin.elasticsearch.StatusProvider.Classification; +import de.ingrid.iplug.csw.dsc.CswDscSearchPlug; +import de.ingrid.iplug.csw.dsc.cache.Cache; +import de.ingrid.iplug.csw.dsc.cache.ExecutionContext; +import de.ingrid.iplug.csw.dsc.cache.UpdateStrategy; +import de.ingrid.iplug.csw.dsc.cswclient.CSWClient; +import de.ingrid.iplug.csw.dsc.cswclient.CSWFactory; +import de.ingrid.iplug.csw.dsc.cswclient.CSWQuery; +import de.ingrid.iplug.csw.dsc.cswclient.CSWRecord; +import de.ingrid.iplug.csw.dsc.cswclient.CSWSearchResult; +import de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName; +import de.ingrid.iplug.csw.dsc.cswclient.constants.ResultType; +import de.ingrid.iplug.csw.dsc.tools.StringUtils; + +public abstract class AbstractUpdateStrategy implements UpdateStrategy { + + @Autowired + protected StatusProvider statusProvider; + + DocumentBuilder docBuilder = null; + + /** The time in msec the strategy pauses between different requests to the CSW server. */ + int requestPause = 1000; + + /** The default number of records the strategy requests at once during fetching of records. */ + int recordsPerCall = 10; + + + /** + * Set the time in msec the strategy pauses between requests to the CSW server. + * + * @param requestPause the requestPause to set + */ + public void setRequestPause(int requestPause) { + this.requestPause = requestPause; + } + + /** + * Set the number of records the strategy requests at once during fetching of records. + * + * @param recordsPerCall the recordsPerCall to set + */ + public void setRecordsPerCall(int recordsPerCall) { + this.recordsPerCall = recordsPerCall; + } + + + /** + * Create a filter Document from a filter string. Replace any filter + * variables. TODO: if there should be more variables, this could be done + * more generic + * + * @param filterStr + * @return Document + * @throws Exception + */ + protected Document createFilterDocument(String filterStr) throws Exception { + + ExecutionContext context = this.getExecutionContext(); + + if (this.docBuilder == null) { + DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); + docBuilder = docBuilderFactory.newDocumentBuilder(); + } + + // replace last update date variable + Pattern lastUpdateDatePattern = Pattern.compile("\\{LAST_UPDATE_DATE\\}", Pattern.MULTILINE); + Matcher matcher = lastUpdateDatePattern.matcher(filterStr); + if (matcher.find()) { + Date lastUpdateDate = context.getLastExecutionDate(); + SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + filterStr = matcher.replaceAll(df.format(lastUpdateDate)); + } + + return docBuilder.parse(new InputSource(new StringReader(filterStr))); + } + + /** + * Fetch all records that satisfy the given filter using the GetRecords and + * return the ids and put them into the cache + * @note This method guarantees to query the server without a constraint, if the + * provided filter set is empty + * + * @param client The CSWClient to use + * @param elementSetName The ElementSetName of the records to fetch + * @param filterSet The filter set used to select the records + * @param doCache Determines wether to cache the record or not + * @return A list of ids of the fetched records + * @throws Exception + */ + protected List fetchRecords(CSWClient client, ElementSetName elementSetName, + Set filterSet, boolean doCache) throws Exception { + + CSWFactory factory = client.getFactory(); + Log log = this.getLog(); + + // if the filter set is empty, we add a null a least + // this causes execution of the iteration below, but + // but will not add a constraint definition to the request + if (filterSet == null) + filterSet = new HashSet(); + if (filterSet.size() == 0) + filterSet.add(null); + + // variables for complete fetch process + // int numTotal = 0; + List fetchedRecordIds = new CopyOnWriteArrayList(); + + // iterate over all filters + int filterIndex = 1; + for (Document filter : filterSet) { + if (log.isDebugEnabled()) + log.debug("Processing filter "+filterIndex+": "+ + StringUtils.nodeToString(filter).replace("\n", "")+"."); + + // variables for current fetch process (current filter) + int numRecordsTotal = 0; + int numRecordsFetched = 0; + List currentFetchedRecordIds = new ArrayList(); + + // create the query + CSWQuery query = factory.createQuery(); + query.setConstraint(filter); + query.setResultType(ResultType.RESULTS); + query.setElementSetName(elementSetName); + query.setMaxRecords(this.recordsPerCall); + query.setStartPosition(1); + + // do requests + + // do first request + + CSWSearchResult result = client.getRecords(query); + numRecordsFetched += result.getNumberOfRecords(); + numRecordsTotal = result.getNumberOfRecordsTotal(); + if (log.isInfoEnabled()) + log.info(numRecordsTotal+" record(s) from filter "+filterIndex+":"); + + if (numRecordsTotal > 0) { + + if (log.isInfoEnabled()) { + log.info("\nPARAMETERS OF FETCHING PROCESS:" + + "\nrecords per chunk (request): " + recordsPerCall + + "\ngeneral pause between requesting next chunk (msec): " + requestPause + + "\nnum retries per chunk: " + CswDscSearchPlug.conf.numRetriesPerRequest + + "\npause between retries (msec): " + CswDscSearchPlug.conf.timeBetweenRetries + + "\nmax number of lost chunks: " + CswDscSearchPlug.conf.maxNumSkippedRequests); + } + + // process + currentFetchedRecordIds.addAll(processResult(result, doCache)); + + int numSkippedRequests = 0; + String logLostRecordChunks = ""; + int numLostRecords = 0; + while (numRecordsFetched < numRecordsTotal) { + if (CswDscSearchPlug.conf.maxNumSkippedRequests > -1) { + // fetching should end when a maximum number of failures (in a row) is reached. + if (numSkippedRequests > CswDscSearchPlug.conf.maxNumSkippedRequests) { + log.error("Problems fetching records. Total number of skipped requests reached (" + CswDscSearchPlug.conf.maxNumSkippedRequests + + " requests without results). We end fetching process for this filter."); + statusProvider.addState( "ERROR_FETCH", "Error during fetch, since more than " + CswDscSearchPlug.conf.maxNumSkippedRequests + " records have been skipped.", Classification.ERROR ); + break; + } + } + + // generic pause between requests, set via spring + Thread.sleep(this.requestPause); + + String logCurrRecordChunk = ""; + try { + // prepare next request + // Just for safety: get number of last fetched records from last result, if we have a result and records. + int numLastFetch = query.getMaxRecords(); + if (result != null && (result.getNumberOfRecords() > 0)) { + numLastFetch = result.getNumberOfRecords(); + } + numRecordsFetched += numLastFetch; + statusProvider.addState( "FETCH", "Fetching record " + (numRecordsFetched-numLastFetch+1) + "-" + numRecordsFetched + " / " + numRecordsTotal + " from " + client.getFactory().getServiceUrl() ); + + query.setStartPosition(query.getStartPosition() + numLastFetch); + + // for logging below + logCurrRecordChunk = "" + query.getStartPosition() + " - " + (query.getStartPosition() + query.getMaxRecords()); + + // do next request, if problems retry with increasing pause in between + int numRetries = 0; + while (true) { + try { + result = null; + result = client.getRecords(query); + break; + + } catch (Exception e) { + if (numRetries == CswDscSearchPlug.conf.numRetriesPerRequest) { + log.error("Retried " + numRetries + " times ! We skip records " + logCurrRecordChunk, e); + break; + } + + numRetries++; + int timeBetweenRetry = numRetries * CswDscSearchPlug.conf.timeBetweenRetries; + log.error("Error fetching records " + logCurrRecordChunk + ". We retry " + + numRetries + ". time after " + timeBetweenRetry + " msec !", e); + Thread.sleep(timeBetweenRetry); + } + } + + + // process + if (result == null || result.getNumberOfRecords() == 0) { + // no result from this query, we count the failures to check whether fetching process should be ended ! + numSkippedRequests++; + numLostRecords += query.getMaxRecords(); + logLostRecordChunks += logCurrRecordChunk + "\n"; + + } else { + currentFetchedRecordIds.addAll(processResult(result, doCache)); + } + } catch (Exception e) { + statusProvider.addState( "ERROR_FETCH_PROCESS", "Error during processing record: " + logCurrRecordChunk, Classification.ERROR ); + log.error("Error processing records " + logCurrRecordChunk); + log.error( ExceptionUtils.getStackTrace(e) ); + } + } + + if (numLostRecords > 0) { + statusProvider.addState( "ERROR_FETCH_PROCESS", "Error during fetching of record: " + logLostRecordChunks, Classification.ERROR ); + log.error("\nWe had failed GetRecords requests !!!" + + "\nThe following " + numLostRecords + " records were NOT fetched and are \"lost\":" + + "\n" + logLostRecordChunks); + } + } + + // collect record ids + fetchedRecordIds.addAll(currentFetchedRecordIds); + // numTotal += currentFetchedRecordIds.size(); + filterIndex++; + } + return fetchedRecordIds; + } + + /** + * Fetch all records from a id list using the GetRecordById and put them in the cache + * + * @param client The CSWClient to use + * @param elementSetName The ElementSetName of the records to fetch + * @param recordIds The list of ids + * @param requestPause The time between two requests in milliseconds + * @throws Exception + */ + protected void fetchRecords(CSWClient client, ElementSetName elementSetName, + List recordIds, int requestPause) throws Exception { + + CSWFactory factory = client.getFactory(); + Cache cache = this.getExecutionContext().getCache(); + Log log = this.getLog(); + + CSWQuery query = factory.createQuery(); + query.setElementSetName(elementSetName); + + int cnt = 1; + int max = recordIds.size(); + Iterator it = recordIds.iterator(); + while (it.hasNext()) { + String id = it.next(); + query.setId(id); + CSWRecord record = null; + try { + record = client.getRecordById(query); + if (log.isDebugEnabled()) + log.debug("Fetched record: "+id+" "+record.getElementSetName() + " (" + cnt + "/" + max + ")"); + cache.putRecord(record); + } catch (Exception e) { + log.error("Error fetching record '" + query.getId() + "'! Removing record from cache.", e); + cache.removeRecord(query.getId()); + recordIds.remove(id); + } + cnt++; + Thread.sleep(requestPause); + } + } + + /** + * Process a fetched search result (collect ids and cache records) + * + * @param result The search result + * @param doCache Determines wether to cache the record or not + * @return The list of ids of the fetched records + * @throws Exception + */ + private List processResult(CSWSearchResult result, boolean doCache) + throws Exception { + + Cache cache = this.getExecutionContext().getCache(); + Log log = this.getLog(); + + List fetchedRecordIds = new ArrayList(); + for (CSWRecord record : result.getRecordList()) { + String id = record.getId(); + + if (log.isInfoEnabled()) + log.info("Fetched record: "+id+" "+record.getElementSetName()); + if (fetchedRecordIds.contains(id)) { + log.warn("Duplicated id: "+id+". Overriding previous entry."); + } + fetchedRecordIds.add(id); + + // cache only if requested + if (doCache) + cache.putRecord(record); + } + if (log.isInfoEnabled()) + log.info("Fetched "+fetchedRecordIds.size()+" of "+result.getNumberOfRecordsTotal()+ + " [starting from "+result.getQuery().getStartPosition() + "]"); + return fetchedRecordIds; + } +} diff --git a/src/main/java/de/ingrid/iplug/csw/dsc/cswclient/impl/GenericRecord.java b/src/main/java/de/ingrid/iplug/csw/dsc/cswclient/impl/GenericRecord.java index f2ae1c7b..921de935 100644 --- a/src/main/java/de/ingrid/iplug/csw/dsc/cswclient/impl/GenericRecord.java +++ b/src/main/java/de/ingrid/iplug/csw/dsc/cswclient/impl/GenericRecord.java @@ -1,108 +1,108 @@ -/* +/* * **************************************************- * ingrid-iplug-csw-dsc:war * ================================================== * Copyright (C) 2014 - 2016 wemove digital solutions GmbH * ================================================== - * Licensed under the EUPL, Version 1.1 or – as soon they will be - * approved by the European Commission - subsequent versions of the - * EUPL (the "Licence"); - * - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * - * http://ec.europa.eu/idabc/eupl5 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and + * Licensed under the EUPL, Version 1.1 or – as soon they will be + * approved by the European Commission - subsequent versions of the + * EUPL (the "Licence"); + * + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * + * http://ec.europa.eu/idabc/eupl5 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and * limitations under the Licence. - * **************************************************# - */ -/* - * Copyright (c) 2008 wemove digital solutions. All rights reserved. - */ - -package de.ingrid.iplug.csw.dsc.cswclient.impl; - -import org.w3c.dom.Comment; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; - -import de.ingrid.iplug.csw.dsc.cswclient.CSWRecord; -import de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName; -import de.ingrid.iplug.csw.dsc.tools.NodeUtils; -import de.ingrid.iplug.csw.dsc.tools.StringUtils; -import de.ingrid.utils.xml.IDFNamespaceContext; -import de.ingrid.utils.xpath.XPathUtils; - -public class GenericRecord implements CSWRecord { - - final private XPathUtils xPathUtils = new XPathUtils(new IDFNamespaceContext()); - - protected String id = null; - protected ElementSetName elementSetName = null; - protected Node node = null; - - /** - * Initializes the record. The node will be detached (cloned) from it's - * owner document. - * - * @param elementSetName - * The {@link ElementSetName} of this record. - * @param node - * The DOM Node describing the record. The node will be detached - * (cloned). - * - * @see de.ingrid.iplug.csw.dsc.cswclient.CSWRecord#initialize(de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName, - * org.w3c.dom.Node) - */ - @Override - public void initialize(ElementSetName elementSetName, Node node) throws Exception { - // detach node from whole document inkl. all namespace definitions - while (node instanceof Comment) { - node = node.getNextSibling(); - } - this.node = NodeUtils.detachWithNameSpaces(node); - this.elementSetName = elementSetName; - - // get the record id - NodeList idNodes = xPathUtils - .getNodeList(this.node, "//gmd:fileIdentifier/gco:CharacterString"); - if (idNodes == null || idNodes.item(0) == null) - throw new RuntimeException( - "CSWRecord does not contain an id (looking for //gmd:fileIdentifier/gco:CharacterString):\n" - + StringUtils.nodeToString(this.node)); - if (idNodes.getLength() > 1) - throw new RuntimeException( - "CSWRecord contains more than one id (looking for //gmd:fileIdentifier/gco:CharacterString):\n" - + StringUtils.nodeToString(this.node)); - - this.id = idNodes.item(0).getTextContent().trim(); - } - - @Override - public String getId() { - if (this.id != null) { - return this.id; - } else - throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize."); - } - - @Override - public ElementSetName getElementSetName() { - if (this.elementSetName != null) { - return this.elementSetName; - } else - throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize."); - } - - @Override - public Node getOriginalResponse() { - if (this.node != null) { - return this.node; - } else - throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize."); - } -} + * **************************************************# + */ +/* + * Copyright (c) 2008 wemove digital solutions. All rights reserved. + */ + +package de.ingrid.iplug.csw.dsc.cswclient.impl; + +import org.w3c.dom.Comment; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import de.ingrid.iplug.csw.dsc.cswclient.CSWRecord; +import de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName; +import de.ingrid.iplug.csw.dsc.tools.NodeUtils; +import de.ingrid.iplug.csw.dsc.tools.StringUtils; +import de.ingrid.utils.xml.IDFNamespaceContext; +import de.ingrid.utils.xpath.XPathUtils; + +public class GenericRecord implements CSWRecord { + + final private XPathUtils xPathUtils = new XPathUtils(new IDFNamespaceContext()); + + protected String id = null; + protected ElementSetName elementSetName = null; + protected Node node = null; + + /** + * Initializes the record. The node will be detached (cloned) from it's + * owner document. + * + * @param elementSetName + * The {@link ElementSetName} of this record. + * @param node + * The DOM Node describing the record. The node will be detached + * (cloned). + * + * @see de.ingrid.iplug.csw.dsc.cswclient.CSWRecord#initialize(de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName, + * org.w3c.dom.Node) + */ + @Override + public void initialize(ElementSetName elementSetName, Node node) throws Exception { + // detach node from whole document inkl. all namespace definitions + while (node instanceof Comment) { + node = node.getNextSibling(); + } + this.node = NodeUtils.detachWithNameSpaces(node); + this.elementSetName = elementSetName; + + // get the record id + NodeList idNodes = xPathUtils + .getNodeList(this.node, "/gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString | /idf:html/idf:body/idf:idfMdMetadata/gmd:fileIdentifier/gco:CharacterString"); + if (idNodes == null || idNodes.item(0) == null) + throw new RuntimeException( + "CSWRecord does not contain an id (looking for /gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString | /idf:html/idf:body/idf:idfMdMetadata/gmd:fileIdentifier/gco:CharacterString):\n" + + StringUtils.nodeToString(this.node)); + if (idNodes.getLength() > 1) + throw new RuntimeException( + "CSWRecord contains more than one id (looking for /gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString | /idf:html/idf:body/idf:idfMdMetadata/gmd:fileIdentifier/gco:CharacterString):\n" + + StringUtils.nodeToString(this.node)); + + this.id = idNodes.item(0).getTextContent().trim(); + } + + @Override + public String getId() { + if (this.id != null) { + return this.id; + } else + throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize."); + } + + @Override + public ElementSetName getElementSetName() { + if (this.elementSetName != null) { + return this.elementSetName; + } else + throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize."); + } + + @Override + public Node getOriginalResponse() { + if (this.node != null) { + return this.node; + } else + throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize."); + } +} diff --git a/src/main/resources/mapping/idf_to_lucene.js b/src/main/resources/mapping/idf_to_lucene.js index 32f429d8..cbf337c2 100644 --- a/src/main/resources/mapping/idf_to_lucene.js +++ b/src/main/resources/mapping/idf_to_lucene.js @@ -1,1008 +1,1008 @@ -/* +/* * **************************************************- * ingrid-iplug-csw-dsc:war * ================================================== * Copyright (C) 2014 - 2016 wemove digital solutions GmbH * ================================================== - * Licensed under the EUPL, Version 1.1 or – as soon they will be - * approved by the European Commission - subsequent versions of the - * EUPL (the "Licence"); - * - * You may not use this work except in compliance with the Licence. - * You may obtain a copy of the Licence at: - * - * http://ec.europa.eu/idabc/eupl5 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Licence is distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Licence for the specific language governing permissions and + * Licensed under the EUPL, Version 1.1 or – as soon they will be + * approved by the European Commission - subsequent versions of the + * EUPL (the "Licence"); + * + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * + * http://ec.europa.eu/idabc/eupl5 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and * limitations under the Licence. - * **************************************************# - */ -/** - * CSW 2.0.2 AP ISO 1.0 Record (full) to Lucene Document mapping according to mapping IGC 1.0.3 - * Copyright (c) 2008 wemove digital solutions. All rights reserved. - * - * The following global variable are passed from the application: - * - * @param cswRecord A CSWRecord instance, that defines the input - * @param document A lucene Document instance, that defines the output - * @param log A Log instance - * - */ -if (javaVersion.indexOf( "1.8" ) === 0) { - load("nashorn:mozilla_compat.js"); -} - -importPackage(Packages.de.ingrid.iplug.csw.dsc.tools); -importPackage(Packages.de.ingrid.iplug.csw.dsc.index); -importPackage(Packages.de.ingrid.utils.udk); -importPackage(Packages.org.w3c.dom); - -//constant to punish the rank of a service/data object, which has no coupled resource -var BOOST_NO_COUPLED_RESOURCE = 0.9; -//constant to boost the rank of a service/data object, which has at least one coupled resource -var BOOST_HAS_COUPLED_RESOURCE = 1.0; - - -if (log.isDebugEnabled()) { - log.debug("Mapping csw record "+cswRecord.getId()+" to lucene document"); -} - -// get the xml content of the record -var recordNode = cswRecord.getOriginalResponse(); - -// define one-to-one mappings -/** each entry consists off the following possible values: - - indexField: The name of the field in the index the data will be put into. - xpath: The xpath expression for the data in the XML input file. Multiple xpath - results will be put in the same index field. - transform: The transformation to be executed on the value - funct: The transformation function to use. - params: The parameters for the transformation function additional to the value - from the xpath expression that is always the first parameter. - execute: The function to be executed. No xpath value is obtained. Instead the recordNode of the - source XML is put as default parameter to the function. All other parameters are ignored. - funct: The function to execute. - params: The parameters for the function additional to the recordNode - that is always the first parameter. - tokenized: If set to false no tokenizing will take place before the value is put into the index. - additionalTokenize: constant specifying additional method to tokenize value and write tokenized value to index -*/ -var transformationDescriptions = [ - { "indexField":"t01_object.obj_id", - "tokenized":true, - "xpath":"//gmd:fileIdentifier/gco:CharacterString" - }, - { "indexField":"title", - "tokenized":true, - "xpath":"//gmd:identificationInfo//gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString" - }, - { "indexField":"t01_object.org_obj_id", - "tokenized":true, - "xpath":"//gmd:fileIdentifier/gco:CharacterString" - }, - { "indexField":"summary", - "xpath":"//gmd:identificationInfo//gmd:abstract/gco:CharacterString" - }, - { "indexField":"t01_object.info_note", - "xpath":"//gmd:identificationInfo//gmd:purpose/gco:CharacterString" - }, - { "indexField":"t01_object.loc_descr", - "xpath":"//gmd:identificationInfo//gmd:EX_Extent/gmd:description/gco:CharacterString" - }, - { "indexField":"t01_object.dataset_alternate_name", - "xpath":"//gmd:identificationInfo//gmd:citation/gmd:CI_Citation/gmd:alternateTitle/gco:CharacterString" - }, - { "indexField":"t01_object.time_status", - "xpath":"//gmd:identificationInfo//gmd:status/gmd:MD_ProgressCode/@codeListValue", - "transform":{ - "funct":transformToIgcDomainId, - "params":[523] - } - }, - { "execute":{ - "funct":mapObjectClass, - "params":[recordNode] - } - }, - { "indexField":"t01_object.dataset_character_set", - "xpath":"//gmd:identificationInfo//gmd:characterSet/gmd:MD_CharacterSetCode/@codeListValue", - "transform":{ - "funct":transformToIgcDomainId, - "params":[510] - } - }, - { "indexField":"t01_object.dataset_usage", - "xpath":"//gmd:identificationInfo//gmd:resourceSpecificUsage/gmd:MD_Usage/gmd:specificUsage/gco:CharacterString" - }, - { "indexField":"t01_object.data_language_code", - "xpath":"//gmd:identificationInfo//gmd:language/gco:CharacterString", - "transform":{ - "funct":transformISO639_2ToISO639_1 - } - }, - { "indexField":"t01_object.metadata_character_set", - "xpath":"//gmd:characterSet/gmd:MD_CharacterSetCode/@codeListValue", - "transform":{ - "funct":transformToIgcDomainId, - "params":[510] - } - }, - { "indexField":"t01_object.metadata_standard_name", - "xpath":"//gmd:metadataStandardName/gco:CharacterString" - }, - { "indexField":"t01_object.metadata_standard_version", - "xpath":"//gmd:metadataStandardVersion/gco:CharacterString" - }, - { "indexField":"t01_object.metadata_language_code", - "xpath":"//gmd:language/gco:CharacterString", - "transform":{ - "funct":transformISO639_2ToISO639_1 - } - }, - { "indexField":"t01_object.vertical_extent_minimum", - "xpath":"//gmd:identificationInfo//gmd:extent/gmd:EX_Extent/gmd:verticalElement/gmd:EX_VerticalExtent/gmd:minimumValue/gco:Real" - }, - { "indexField":"t01_object.vertical_extent_maximum", - "xpath":"//gmd:identificationInfo//gmd:extent/gmd:EX_Extent/gmd:verticalElement/gmd:EX_VerticalExtent/gmd:maximumValue/gco:Real" - }, - { "indexField":"t01_object.vertical_extent_unit", - "xpath":"//gmd:identificationInfo//gmd:EX_Extent/gmd:verticalElement/gmd:EX_VerticalExtent/gmd:verticalCRS/gmd:verticalCRS/gml:verticalCS/gml:VerticalCS/gml:axis/gml:CoordinateSystemAxis/@uom", - "transform":{ - "funct":transformToIgcDomainId, - "params":[102] - } - }, - { "indexField":"t01_object.vertical_extent_vdatum", - "xpath":"//gmd:identificationInfo//gmd:EX_Extent/gmd:verticalElement/gmd:EX_VerticalExtent/gmd:verticalCRS/gml:verticalCRS/gml:verticalDatum/gml:VerticalDatum/gml:identifier", - "transform":{ - "funct":transformToIgcDomainId, - "params":[101] - } - }, - { "indexField":"t01_object.ordering_instructions", - "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributor/gmd:MD_Distributor/gmd:distributionOrderProcess/gmd:MD_StandardOrderProcess/gmd:orderingInstructions/gco:CharacterString" - }, - { "indexField":"t01_object.mod_time", - "xpath":"//gmd:dateStamp/gco:DateTime | //gmd:dateStamp/gco:Date[not(../gco:DateTime)]", - "transform":{ - "funct":UtilsCSWDate.mapDateFromIso8601ToIndex - } - }, - // object_access - { "indexField":"object_access.restriction_key", - "xpath":"//gmd:identificationInfo//gmd:resourceConstraints//gmd:otherConstraints/gco:CharacterString", - "transform":{ - "funct":transformToIgcDomainId, - "params":[6010] - } - }, - { "indexField":"object_access.restriction_value", - "xpath":"//gmd:identificationInfo//gmd:resourceConstraints//gmd:otherConstraints/gco:CharacterString" - }, - { "indexField":"object_access.terms_of_use", - "xpath":"//gmd:identificationInfo//gmd:resourceConstraints//gmd:useLimitation/gco:CharacterString" - }, - // t0110_avail_format - { "indexField":"t0110_avail_format.name", - "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:name/gco:CharacterString" - }, - { "indexField":"t0110_avail_format.version", - "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:version/gco:CharacterString" - }, - { "indexField":"t0110_avail_format.file_decompression_technique", - "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:fileDecompressionTechnique/gco:CharacterString" - }, - { "indexField":"t0110_avail_format.specification", - "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:specification/gco:CharacterString" - }, - // t0113_dataset_reference - { "indexField":"t0113_dataset_reference.reference_date", - "xpath":"//gmd:identificationInfo//gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:Date", - "transform":{ - "funct":UtilsCSWDate.mapDateFromIso8601ToIndex - } - }, - { "indexField":"t0113_dataset_reference.type", - "xpath":"//gmd:identificationInfo//gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:dateType/gmd:CI_DateTypeCode/@codeListValue", - "transform":{ - "funct":transformToIgcDomainId, - "params":[502] - } - }, - // t011_obj_serv - { "indexField":"t011_obj_serv.type", - "xpath":"//gmd:identificationInfo//srv:serviceType/gco:LocalName" - }, - { "indexField":"t011_obj_serv.history", - "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:processStep/gmd:LI_ProcessStep/gmd:description/gco:CharacterString" - }, - { "indexField":"t011_obj_serv.base", - "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/source/LI_Source/gmd:description/gco:CharacterString" - }, - // t011_obj_serv_op_connpoint - { "indexField":"t011_obj_serv_op_connpoint.connect_point", - "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:connectPoint/gmd:CI_OnlineResource/gmd:linkage/gmd:URL", - "additionalTokenize":"SPLIT_URL" - }, - // t011_obj_serv_op_depends - { "indexField":"t011_obj_serv_op_depends.depends_on", - "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:dependsOn/srv:SV_OperationMetadata/srv:operationName/gco:CharacterString" - }, - // t011_obj_serv_op_para - { "indexField":"t011_obj_serv_op_para.name", - "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:parameters/srv:SV_Parameter/srv:name" - }, - { "indexField":"t011_obj_serv_op_para.direction", - "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:parameters/srv:SV_Parameter/direction/SV_ParameterDirection" - }, - { "indexField":"t011_obj_serv_op_para.descr", - "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:parameters/srv:SV_Parameter/gmd:description/gco:CharacterString" - }, - { "indexField":"t011_obj_serv_op_para.optional", - "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:parameters/srv:SV_Parameter/srv:optionality/gco:CharacterString", - "transform":{ - "funct":transformGeneric, - "params":[{"optional":"1", "mandatory":"0"}, false] - } - }, - { "indexField":"t011_obj_serv_op_para.repeatability", - "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:parameters/srv:SV_Parameter/srv:repeatability/gco:Boolean", - "transform":{ - "funct":transformGeneric, - "params":[{"true":"1", "false":"0"}, false] - } - }, - // t011_obj_serv_op_platform - { "indexField":"t011_obj_serv_op_platform.platform", - "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:DCP/srv:DCPList/@codeListValue" - }, - // t011_obj_serv_operation - { "indexField":"t011_obj_serv_operation.name", - "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:operationName/gco:CharacterString" - }, - { "indexField":"t011_obj_serv_operation.descr", - "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:operationDescription/gco:CharacterString" - }, - { "indexField":"t011_obj_serv_operation.invocation_name", - "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:invocationName/gco:CharacterString" - }, - // t011_obj_serv_version - { "indexField":"t011_obj_serv_version.serv_version", - "xpath":"//gmd:identificationInfo//srv:serviceTypeVersion/gco:CharacterString" - }, - // t011_obj_topic_cat - { "indexField":"t011_obj_topic_cat.topic_category", - "xpath":"//gmd:identificationInfo//gmd:topicCategory/gmd:MD_TopicCategoryCode", - "transform":{ - "funct":transformToIgcDomainId, - "params":[527] - } - }, - // t011_obj_geo - { "indexField":"t011_obj_geo.special_base", - "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:statement/gco:CharacterString" - }, - { "indexField":"t011_obj_geo.data_base", - "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:source/gmd:LI_Source/gmd:description/gco:CharacterString" - }, - { "indexField":"t011_obj_geo.method", - "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:processStep/gmd:LI_ProcessStep/gmd:description/gco:CharacterString" - }, - { "execute":{ - "funct":mapReferenceSystemInfo - } - }, - { "indexField":"t011_obj_geo.rec_exact", - "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/report/gmd:DQ_RelativeInternalPositionalAccuracy/gmd:DQ_QuantitativeResult/gmd:value/gco:Record" - }, - { "indexField":"t011_obj_geo.rec_grade", - "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/report/DQ_CompletenessCommission/gmd:DQ_QuantitativeResult/gmd:value/gco:Record" - }, - { "indexField":"t011_obj_geo.hierarchy_level", - "xpath":"//gmd:hierarchyLevel/gmd:MD_ScopeCode/@codeListValue", - "transform":{ - "funct":transformGeneric, - "params":[{"dataset":"5", "series":"6"}, false] - } - }, - { "indexField":"t011_obj_geo.vector_topology_level", - "xpath":"//gmd:spatialRepresentationInfo/gmd:MD_VectorSpatialRepresentation/gmd:topologyLevel/gmd:MD_TopologyLevelCode/@codeListValue", - "transform":{ - "funct":transformToIgcDomainId, - "params":[528] - } - }, - { "indexField":"t011_obj_geo.pos_accuracy_vertical", - "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:report/gmd:DQ_RelativeInternalPositionalAccuracy[gmd:measureDescription/gco:CharacterString='vertical']/gmd:DQ_QuantitativeResult/gmd:value/gmd:Record" - }, - { "indexField":"t011_obj_geo.keyc_incl_w_dataset", - "xpath":"//gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:includedWithDataset/gco:Boolean", - "transform":{ - "funct":transformGeneric, - "params":[{"true":"1", "false":"0"}, false] - } - }, - // accept RS_Indentifier and MD_Identifier with xpath: "...identifier//code..." - { "indexField":"t011_obj_geo.datasource_uuid", - "xpath":"//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:identifier//gmd:code/gco:CharacterString" - }, - // t011_obj_geo_keyc - { "indexField":"t011_obj_geo_keyc.subject_cat", - "xpath":"//gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:featureCatalogueCitation/gmd:CI_Citation/gmd:title/gco:CharacterString" - }, - { "indexField":"t011_obj_geo_keyc.key_date", - "xpath":"//gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:featureCatalogueCitation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:Date", - "transform":{ - "funct":UtilsCSWDate.mapDateFromIso8601ToIndex - } - }, - { "indexField":"t011_obj_geo_keyc.edition", - "xpath":"//gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:featureCatalogueCitation/gmd:CI_Citation/gmd:edition/gco:CharacterString" - }, - // t011_obj_geo_scale - { "indexField":"t011_obj_geo_scale.scale", - "xpath":"//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:equivalentScale/gmd:MD_RepresentativeFraction/gmd:denominator/gco:Integer" - }, - { "indexField":"t011_obj_geo_scale.resolution_ground", - "xpath":"//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:distance/gmd:Distance[@uom='meter']" - }, - { "indexField":"t011_obj_geo_scale.resolution_scan", - "xpath":"//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:distance/gmd:Distance[@uom='dpi']" - }, - // t011_obj_geo_spatial_rep - { "indexField":"t011_obj_geo_spatial_rep.type", - "xpath":"//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialRepresentationType/MD_SpatialRepresentationTypeCode/@codeListValue", - "transform":{ - "funct":transformToIgcDomainId, - "params":[526] - } - }, - // t011_obj_geo_supplinfo - { "indexField":"t011_obj_geo_supplinfo.feature_type", - "xpath":"//gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:featureTypes/gco:LocalName" - }, - // t011_obj_geo_symc - { "indexField":"t011_obj_geo_symc.symbol_cat", - "xpath":"//gmd:portrayalCatalogueInfo/gmd:MD_PortrayalCatalogueReference/gmd:portrayalCatalogueCitation/gmd:CI_Citation/gmd:title/gco:CharacterString" - }, - { "indexField":"t011_obj_geo_symc.symbol_date", - "xpath":"//gmd:portrayalCatalogueInfo/gmd:MD_PortrayalCatalogueReference/gmd:portrayalCatalogueCitation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:Date", - "transform":{ - "funct":UtilsCSWDate.mapDateFromIso8601ToIndex - } - }, - { "indexField":"t011_obj_geo_symc.edition", - "xpath":"//gmd:portrayalCatalogueInfo/gmd:MD_PortrayalCatalogueReference/gmd:portrayalCatalogueCitation/gmd:CI_Citation /gco:CharacterString" - }, - // t011_obj_geo_vector - { "indexField":"t011_obj_geo_vector.geometric_object_type", - "xpath":"//gmd:spatialRepresentationInfo/gmd:MD_VectorSpatialRepresentation/gmd:geometricObjects/gmd:MD_GeometricObjects/gmd:geometricObjectType/gmd:MD_GeometricObjectTypeCode/@codeListValue", - "transform":{ - "funct":transformToIgcDomainId, - "params":[515] - } - }, - { "indexField":"t011_obj_geo_vector.geometric_object_count", - "xpath":"//gmd:spatialRepresentationInfo/gmd:MD_VectorSpatialRepresentation/gmd:geometricObjects/gmd:MD_GeometricObjects/gmd:geometricObjectCount/gco:Integer" - }, - // t017_url_ref - { "indexField":"t017_url_ref.url_link", - "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource/gmd:linkage/gmd:URL" - }, - { "indexField":"t017_url_ref.content", - "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource/name/gco:CharacterString" - }, - { "indexField":"t017_url_ref.descr", - "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource/gmd:description/gco:CharacterString" - }, - // add MD_BrowseGraphic as additional html - { "indexField":"additional_html_1", - "xpath":"//gmd:identificationInfo//gmd:graphicOverview/gmd:MD_BrowseGraphic/gmd:fileName/gco:CharacterString", - "transform":{ - "funct":transformToPreviewGraphic - } - }, - { "indexField":"t017_url_ref.content", - "xpath":"//gmd:identificationInfo//gmd:graphicOverview/gmd:MD_BrowseGraphic/gmd:fileDescription/gco:CharacterString" - }, - // object_references - { "execute":{ - "funct":mapReferences, - "params":[recordNode] - } - }, - // keywords - { "execute":{ - "funct":mapKeywords, - "params":[recordNode] - } - }, - // geographic elements - { "execute":{ - "funct":mapGeographicElements, - "params":[recordNode] - } - }, - // time constraints - { "execute":{ - "funct":addTimeConstraints, - "params":[recordNode] - } - }, - // resource maintenance - { "execute":{ - "funct":addResourceMaintenance, - "params":[recordNode] - } - }, - // addresses - { "execute":{ - "funct":mapAddresses, - "params":[recordNode] - } - }, - { "execute":{ - "funct":addCoupledServices, - "params":[recordNode] - } - } - ]; - -document.put( "datatype", "default" ); - -// iterate over all transformation descriptions -var value; -for (var i in transformationDescriptions) { - var t = transformationDescriptions[i]; - - // check for execution (special function) - if (hasValue(t.execute)) { - if (log.isDebugEnabled()) { - log.debug("Execute function: " + t.execute.funct.name) - } - call_f(t.execute.funct, t.execute.params) - } else { - if (log.isDebugEnabled()) { - log.debug("Working on " + t.indexField) - } - var tokenized = true; - // iterate over all xpath results - var nodeList = XPathUtils.getNodeList(recordNode, t.xpath); - if (nodeList && nodeList.getLength() > 0) { - for (j=0; j"; - return previewImageHtmlTag; - } - return ""; -} - - -function addResourceMaintenance() { - var maintenanceFrequencyCode = XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode/@codeListValue") - if (hasValue(maintenanceFrequencyCode)) { - // transform to IGC domain id - var idcCode = codelistService.getCodeListEntryId("518", maintenanceFrequencyCode, "iso"); - if (hasValue(idcCode)) { - addToDoc("t01_object.time_period", idcCode, false); - addToDoc("t01_object.time_descr", XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceNote/gco:CharacterString"), true); - var periodDuration = XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:userDefinedMaintenanceFrequency/gmd:TM_PeriodDuration"); - addToDoc("t01_object.time_interval", new TM_PeriodDurationToTimeInterval().parse(periodDuration), false); - addToDoc("t01_object.time_alle", new TM_PeriodDurationToTimeAlle().parse(periodDuration), false); - } else { - if (log.isDebugEnabled()) { - log.debug("MD_MaintenanceFrequencyCode '" + maintenanceFrequencyCode + "' unknown.") - } - } - } -} - -/* - * Set the boundaries of dates to values that can be compared with lucene. The - * value of inifinite pas is '00000000' and the value for inifinit future is '99999999'. - * - * Makes sure that the fields are only set, if we have a UDK date type of 'seit' or 'bis'. - * We can do this because the mapping filters and maps the dates to t0 in case of date type - * 'am' and to t1 in case of 'seit', even if the database fields are the same. Thus we do not - * need to look at the DB field time_type which controls the date - * type ('am', 'seit', 'bis', 'von (von-bis)') - * - */ -function addTimeConstraints() { - var t1 = UtilsCSWDate.mapDateFromIso8601ToIndex(XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition")); - var t2 = UtilsCSWDate.mapDateFromIso8601ToIndex(XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition")); - var timeType; - if (hasValue(t1) && hasValue(t2)) { - if (t1 == t2) { - addToDoc("t01_object.time_type", "am", false); - addToDoc("t0", t1, false); - } else { - addToDoc("t01_object.time_type", "von", false); - addToDoc("t1", t1, false); - addToDoc("t2", t2, false); - } - } else if (hasValue(t1) && !hasValue(t2)) { - addToDoc("t01_object.time_type", "seit", false); - addToDoc("t1", t1, false); - addToDoc("t2", "99999999", false); - } else if (!hasValue(t1) && hasValue(t2)) { - addToDoc("t01_object.time_type", "bis", false); - addToDoc("t1", "00000000", false); - addToDoc("t2", t2, false); - } -} - -function mapObjectClass() { - var hierarchyLevel = XPathUtils.getString(recordNode, "//gmd:hierarchyLevel/gmd:MD_ScopeCode/@codeListValue"); - var hierarchyLevelName = XPathUtils.getString(recordNode, "//gmd:hierarchyLevelName/gco:CharacterString"); - var objectClass = "1"; - if (hasValue(hierarchyLevel)) { - if (hierarchyLevel.toLowerCase() == "service") { - // "Geodatendienst" - objectClass = "3"; - } else if (hierarchyLevel.toLowerCase() == "application") { - // "Dienst / Anwendung / Informationssystem" - objectClass = "6"; - } else if (hierarchyLevel.toLowerCase() == "nongeographicdataset") { - if (hasValue(hierarchyLevelName)) { - if (hierarchyLevelName == "job") { - // "Organisation/Fachaufgabe" - objectClass = "0"; - } else if (hierarchyLevelName == "document") { - objectClass = "2"; - } else if (hierarchyLevelName == "project") { - objectClass = "4"; - } else if (hierarchyLevelName == "database") { - objectClass = "5"; - } - } - } - } - addToDoc("t01_object.obj_class", objectClass, false); -} - -function addCoupledServices() { - var crossReferences = XPathUtils.getNodeList(recordNode, "//idf:crossReference[./idf:objectType=3]"); - if (hasValue(crossReferences)) { - for (i=0; i 0) { + for (j=0; j"; + return previewImageHtmlTag; + } + return ""; +} + + +function addResourceMaintenance() { + var maintenanceFrequencyCode = XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode/@codeListValue") + if (hasValue(maintenanceFrequencyCode)) { + // transform to IGC domain id + var idcCode = codelistService.getCodeListEntryId("518", maintenanceFrequencyCode, "iso"); + if (hasValue(idcCode)) { + addToDoc("t01_object.time_period", idcCode, false); + addToDoc("t01_object.time_descr", XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceNote/gco:CharacterString"), true); + var periodDuration = XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:userDefinedMaintenanceFrequency/gmd:TM_PeriodDuration"); + addToDoc("t01_object.time_interval", new TM_PeriodDurationToTimeInterval().parse(periodDuration), false); + addToDoc("t01_object.time_alle", new TM_PeriodDurationToTimeAlle().parse(periodDuration), false); + } else { + if (log.isDebugEnabled()) { + log.debug("MD_MaintenanceFrequencyCode '" + maintenanceFrequencyCode + "' unknown.") + } + } + } +} + +/* + * Set the boundaries of dates to values that can be compared with lucene. The + * value of inifinite pas is '00000000' and the value for inifinit future is '99999999'. + * + * Makes sure that the fields are only set, if we have a UDK date type of 'seit' or 'bis'. + * We can do this because the mapping filters and maps the dates to t0 in case of date type + * 'am' and to t1 in case of 'seit', even if the database fields are the same. Thus we do not + * need to look at the DB field time_type which controls the date + * type ('am', 'seit', 'bis', 'von (von-bis)') + * + */ +function addTimeConstraints() { + var t1 = UtilsCSWDate.mapDateFromIso8601ToIndex(XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition")); + var t2 = UtilsCSWDate.mapDateFromIso8601ToIndex(XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition")); + var timeType; + if (hasValue(t1) && hasValue(t2)) { + if (t1 == t2) { + addToDoc("t01_object.time_type", "am", false); + addToDoc("t0", t1, false); + } else { + addToDoc("t01_object.time_type", "von", false); + addToDoc("t1", t1, false); + addToDoc("t2", t2, false); + } + } else if (hasValue(t1) && !hasValue(t2)) { + addToDoc("t01_object.time_type", "seit", false); + addToDoc("t1", t1, false); + addToDoc("t2", "99999999", false); + } else if (!hasValue(t1) && hasValue(t2)) { + addToDoc("t01_object.time_type", "bis", false); + addToDoc("t1", "00000000", false); + addToDoc("t2", t2, false); + } +} + +function mapObjectClass() { + var hierarchyLevel = XPathUtils.getString(recordNode, "//gmd:hierarchyLevel/gmd:MD_ScopeCode/@codeListValue"); + var hierarchyLevelName = XPathUtils.getString(recordNode, "//gmd:hierarchyLevelName/gco:CharacterString"); + var objectClass = "1"; + if (hasValue(hierarchyLevel)) { + if (hierarchyLevel.toLowerCase() == "service") { + // "Geodatendienst" + objectClass = "3"; + } else if (hierarchyLevel.toLowerCase() == "application") { + // "Dienst / Anwendung / Informationssystem" + objectClass = "6"; + } else if (hierarchyLevel.toLowerCase() == "nongeographicdataset") { + if (hasValue(hierarchyLevelName)) { + if (hierarchyLevelName == "job") { + // "Organisation/Fachaufgabe" + objectClass = "0"; + } else if (hierarchyLevelName == "document") { + objectClass = "2"; + } else if (hierarchyLevelName == "project") { + objectClass = "4"; + } else if (hierarchyLevelName == "database") { + objectClass = "5"; + } + } + } + } + addToDoc("t01_object.obj_class", objectClass, false); +} + +function addCoupledServices() { + var crossReferences = XPathUtils.getNodeList(recordNode, "//idf:crossReference[./idf:objectType=3]"); + if (hasValue(crossReferences)) { + for (i=0; i