diff --git a/pom.xml b/pom.xml
index 05a8697d..fe061fd1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -7,7 +7,7 @@
4.0.0
ingrid-iplug-csw-dsc
- 4.0.1-SNAPSHOT
+ 4.0.0.1-SNAPSHOT
jar
InGrid iPlug-csw-dsc
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 5521c73a..8f86e5c0 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -28,6 +28,12 @@
Joachim Müller
+
+
+ Cannot consume documents with gmd:series element and resulting multiple gmd:fileIdentifier Elements.
+
+
+
Updated codelist.
diff --git a/src/main/java/de/ingrid/iplug/csw/dsc/cache/impl/AbstractUpdateStrategy.java b/src/main/java/de/ingrid/iplug/csw/dsc/cache/impl/AbstractUpdateStrategy.java
index 51f68c4e..9c6e54de 100644
--- a/src/main/java/de/ingrid/iplug/csw/dsc/cache/impl/AbstractUpdateStrategy.java
+++ b/src/main/java/de/ingrid/iplug/csw/dsc/cache/impl/AbstractUpdateStrategy.java
@@ -1,367 +1,367 @@
-/*
+/*
* **************************************************-
* ingrid-iplug-csw-dsc:war
* ==================================================
* Copyright (C) 2014 - 2016 wemove digital solutions GmbH
* ==================================================
- * Licensed under the EUPL, Version 1.1 or – as soon they will be
- * approved by the European Commission - subsequent versions of the
- * EUPL (the "Licence");
- *
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- *
- * http://ec.europa.eu/idabc/eupl5
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
+ * Licensed under the EUPL, Version 1.1 or – as soon they will be
+ * approved by the European Commission - subsequent versions of the
+ * EUPL (the "Licence");
+ *
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ *
+ * http://ec.europa.eu/idabc/eupl5
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
* limitations under the Licence.
- * **************************************************#
- */
-/*
- * Copyright (c) 2009 wemove digital solutions. All rights reserved.
- */
-
-package de.ingrid.iplug.csw.dsc.cache.impl;
-
-import java.io.StringReader;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.CopyOnWriteArrayList;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-
-import org.apache.commons.lang.exception.ExceptionUtils;
-import org.apache.commons.logging.Log;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.w3c.dom.Document;
-import org.xml.sax.InputSource;
-
-import de.ingrid.admin.elasticsearch.StatusProvider;
-import de.ingrid.admin.elasticsearch.StatusProvider.Classification;
-import de.ingrid.iplug.csw.dsc.CswDscSearchPlug;
-import de.ingrid.iplug.csw.dsc.cache.Cache;
-import de.ingrid.iplug.csw.dsc.cache.ExecutionContext;
-import de.ingrid.iplug.csw.dsc.cache.UpdateStrategy;
-import de.ingrid.iplug.csw.dsc.cswclient.CSWClient;
-import de.ingrid.iplug.csw.dsc.cswclient.CSWFactory;
-import de.ingrid.iplug.csw.dsc.cswclient.CSWQuery;
-import de.ingrid.iplug.csw.dsc.cswclient.CSWRecord;
-import de.ingrid.iplug.csw.dsc.cswclient.CSWSearchResult;
-import de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName;
-import de.ingrid.iplug.csw.dsc.cswclient.constants.ResultType;
-import de.ingrid.iplug.csw.dsc.tools.StringUtils;
-
-public abstract class AbstractUpdateStrategy implements UpdateStrategy {
-
- @Autowired
- protected StatusProvider statusProvider;
-
- DocumentBuilder docBuilder = null;
-
- /** The time in msec the strategy pauses between different requests to the CSW server. */
- int requestPause = 1000;
-
- /** The default number of records the strategy requests at once during fetching of records. */
- int recordsPerCall = 10;
-
-
- /**
- * Set the time in msec the strategy pauses between requests to the CSW server.
- *
- * @param requestPause the requestPause to set
- */
- public void setRequestPause(int requestPause) {
- this.requestPause = requestPause;
- }
-
- /**
- * Set the number of records the strategy requests at once during fetching of records.
- *
- * @param recordsPerCall the recordsPerCall to set
- */
- public void setRecordsPerCall(int recordsPerCall) {
- this.recordsPerCall = recordsPerCall;
- }
-
-
- /**
- * Create a filter Document from a filter string. Replace any filter
- * variables. TODO: if there should be more variables, this could be done
- * more generic
- *
- * @param filterStr
- * @return Document
- * @throws Exception
- */
- protected Document createFilterDocument(String filterStr) throws Exception {
-
- ExecutionContext context = this.getExecutionContext();
-
- if (this.docBuilder == null) {
- DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
- docBuilder = docBuilderFactory.newDocumentBuilder();
- }
-
- // replace last update date variable
- Pattern lastUpdateDatePattern = Pattern.compile("\\{LAST_UPDATE_DATE\\}", Pattern.MULTILINE);
- Matcher matcher = lastUpdateDatePattern.matcher(filterStr);
- if (matcher.find()) {
- Date lastUpdateDate = context.getLastExecutionDate();
- SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd");
- filterStr = matcher.replaceAll(df.format(lastUpdateDate));
- }
-
- return docBuilder.parse(new InputSource(new StringReader(filterStr)));
- }
-
- /**
- * Fetch all records that satisfy the given filter using the GetRecords and
- * return the ids and put them into the cache
- * @note This method guarantees to query the server without a constraint, if the
- * provided filter set is empty
- *
- * @param client The CSWClient to use
- * @param elementSetName The ElementSetName of the records to fetch
- * @param filterSet The filter set used to select the records
- * @param doCache Determines wether to cache the record or not
- * @return A list of ids of the fetched records
- * @throws Exception
- */
- protected List fetchRecords(CSWClient client, ElementSetName elementSetName,
- Set filterSet, boolean doCache) throws Exception {
-
- CSWFactory factory = client.getFactory();
- Log log = this.getLog();
-
- // if the filter set is empty, we add a null a least
- // this causes execution of the iteration below, but
- // but will not add a constraint definition to the request
- if (filterSet == null)
- filterSet = new HashSet();
- if (filterSet.size() == 0)
- filterSet.add(null);
-
- // variables for complete fetch process
- // int numTotal = 0;
- List fetchedRecordIds = new CopyOnWriteArrayList();
-
- // iterate over all filters
- int filterIndex = 1;
- for (Document filter : filterSet) {
- if (log.isDebugEnabled())
- log.debug("Processing filter "+filterIndex+": "+
- StringUtils.nodeToString(filter).replace("\n", "")+".");
-
- // variables for current fetch process (current filter)
- int numRecordsTotal = 0;
- int numRecordsFetched = 0;
- List currentFetchedRecordIds = new ArrayList();
-
- // create the query
- CSWQuery query = factory.createQuery();
- query.setConstraint(filter);
- query.setResultType(ResultType.RESULTS);
- query.setElementSetName(elementSetName);
- query.setMaxRecords(this.recordsPerCall);
- query.setStartPosition(1);
-
- // do requests
-
- // do first request
-
- CSWSearchResult result = client.getRecords(query);
- numRecordsFetched += result.getNumberOfRecords();
- numRecordsTotal = result.getNumberOfRecordsTotal();
- if (log.isInfoEnabled())
- log.info(numRecordsTotal+" record(s) from filter "+filterIndex+":");
-
- if (numRecordsTotal > 0) {
-
- if (log.isInfoEnabled()) {
- log.info("\nPARAMETERS OF FETCHING PROCESS:" +
- "\nrecords per chunk (request): " + recordsPerCall +
- "\ngeneral pause between requesting next chunk (msec): " + requestPause +
- "\nnum retries per chunk: " + CswDscSearchPlug.conf.numRetriesPerRequest +
- "\npause between retries (msec): " + CswDscSearchPlug.conf.timeBetweenRetries +
- "\nmax number of lost chunks: " + CswDscSearchPlug.conf.maxNumSkippedRequests);
- }
-
- // process
- currentFetchedRecordIds.addAll(processResult(result, doCache));
-
- int numSkippedRequests = 0;
- String logLostRecordChunks = "";
- int numLostRecords = 0;
- while (numRecordsFetched < numRecordsTotal) {
- if (CswDscSearchPlug.conf.maxNumSkippedRequests > -1) {
- // fetching should end when a maximum number of failures (in a row) is reached.
- if (numSkippedRequests > CswDscSearchPlug.conf.maxNumSkippedRequests) {
- log.error("Problems fetching records. Total number of skipped requests reached (" + CswDscSearchPlug.conf.maxNumSkippedRequests +
- " requests without results). We end fetching process for this filter.");
- statusProvider.addState( "ERROR_FETCH", "Error during fetch, since more than " + CswDscSearchPlug.conf.maxNumSkippedRequests + " records have been skipped.", Classification.ERROR );
- break;
- }
- }
-
- // generic pause between requests, set via spring
- Thread.sleep(this.requestPause);
-
- String logCurrRecordChunk = "";
- try {
- // prepare next request
- // Just for safety: get number of last fetched records from last result, if we have a result and records.
- int numLastFetch = query.getMaxRecords();
- if (result != null && (result.getNumberOfRecords() > 0)) {
- numLastFetch = result.getNumberOfRecords();
- }
- numRecordsFetched += numLastFetch;
- statusProvider.addState( "FETCH", "Fetching record " + (numRecordsFetched-numLastFetch+1) + "-" + numRecordsFetched + " / " + numRecordsTotal + " from " + client.getFactory().getServiceUrl() );
-
- query.setStartPosition(query.getStartPosition() + numLastFetch);
-
- // for logging below
- logCurrRecordChunk = "" + query.getStartPosition() + " - " + (query.getStartPosition() + query.getMaxRecords());
-
- // do next request, if problems retry with increasing pause in between
- int numRetries = 0;
- while (true) {
- try {
- result = null;
- result = client.getRecords(query);
- break;
-
- } catch (Exception e) {
- if (numRetries == CswDscSearchPlug.conf.numRetriesPerRequest) {
- log.error("Retried " + numRetries + " times ! We skip records " + logCurrRecordChunk, e);
- break;
- }
-
- numRetries++;
- int timeBetweenRetry = numRetries * CswDscSearchPlug.conf.timeBetweenRetries;
- log.error("Error fetching records " + logCurrRecordChunk + ". We retry " +
- numRetries + ". time after " + timeBetweenRetry + " msec !", e);
- Thread.sleep(timeBetweenRetry);
- }
- }
-
-
- // process
- if (result == null || result.getNumberOfRecords() == 0) {
- // no result from this query, we count the failures to check whether fetching process should be ended !
- numSkippedRequests++;
- numLostRecords += query.getMaxRecords();
- logLostRecordChunks += logCurrRecordChunk + "\n";
-
- } else {
- currentFetchedRecordIds.addAll(processResult(result, doCache));
- }
- } catch (Exception e) {
- statusProvider.addState( "ERROR_FETCH_PROCESS", "Error during processing record: " + logCurrRecordChunk, Classification.ERROR );
- log.error("Error processing records " + logCurrRecordChunk);
- log.error( ExceptionUtils.getStackTrace(e) );
- }
- }
-
- if (numLostRecords > 0) {
- statusProvider.addState( "ERROR_FETCH_PROCESS", "Error during fetching of record: " + logLostRecordChunks, Classification.ERROR );
- log.error("\nWe had failed GetRecords requests !!!" +
- "\nThe following " + numLostRecords + " records were NOT fetched and are \"lost\":" +
- "\n" + logLostRecordChunks);
- }
- }
-
- // collect record ids
- fetchedRecordIds.addAll(currentFetchedRecordIds);
- // numTotal += currentFetchedRecordIds.size();
- filterIndex++;
- }
- return fetchedRecordIds;
- }
-
- /**
- * Fetch all records from a id list using the GetRecordById and put them in the cache
- *
- * @param client The CSWClient to use
- * @param elementSetName The ElementSetName of the records to fetch
- * @param recordIds The list of ids
- * @param requestPause The time between two requests in milliseconds
- * @throws Exception
- */
- protected void fetchRecords(CSWClient client, ElementSetName elementSetName,
- List recordIds, int requestPause) throws Exception {
-
- CSWFactory factory = client.getFactory();
- Cache cache = this.getExecutionContext().getCache();
- Log log = this.getLog();
-
- CSWQuery query = factory.createQuery();
- query.setElementSetName(elementSetName);
-
- int cnt = 1;
- int max = recordIds.size();
- Iterator it = recordIds.iterator();
- while (it.hasNext()) {
- String id = it.next();
- query.setId(id);
- CSWRecord record = null;
- try {
- record = client.getRecordById(query);
- if (log.isDebugEnabled())
- log.debug("Fetched record: "+id+" "+record.getElementSetName() + " (" + cnt + "/" + max + ")");
- cache.putRecord(record);
- } catch (Exception e) {
- log.error("Error fetching record '" + query.getId() + "'! Removing record from cache.", e);
- cache.removeRecord(query.getId());
- recordIds.remove(id);
- }
- cnt++;
- Thread.sleep(requestPause);
- }
- }
-
- /**
- * Process a fetched search result (collect ids and cache records)
- *
- * @param result The search result
- * @param doCache Determines wether to cache the record or not
- * @return The list of ids of the fetched records
- * @throws Exception
- */
- private List processResult(CSWSearchResult result, boolean doCache)
- throws Exception {
-
- Cache cache = this.getExecutionContext().getCache();
- Log log = this.getLog();
-
- List fetchedRecordIds = new ArrayList();
- for (CSWRecord record : result.getRecordList()) {
- String id = record.getId();
-
- if (log.isDebugEnabled())
- log.debug("Fetched record: "+id+" "+record.getElementSetName());
- if (fetchedRecordIds.contains(id)) {
- log.warn("Duplicated id: "+id+". Overriding previous entry.");
- }
- fetchedRecordIds.add(id);
-
- // cache only if requested
- if (doCache)
- cache.putRecord(record);
- }
- if (log.isInfoEnabled())
- log.info("Fetched "+fetchedRecordIds.size()+" of "+result.getNumberOfRecordsTotal()+
- " [starting from "+result.getQuery().getStartPosition() + "]");
- return fetchedRecordIds;
- }
-}
+ * **************************************************#
+ */
+/*
+ * Copyright (c) 2009 wemove digital solutions. All rights reserved.
+ */
+
+package de.ingrid.iplug.csw.dsc.cache.impl;
+
+import java.io.StringReader;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+
+import org.apache.commons.lang.exception.ExceptionUtils;
+import org.apache.commons.logging.Log;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+
+import de.ingrid.admin.elasticsearch.StatusProvider;
+import de.ingrid.admin.elasticsearch.StatusProvider.Classification;
+import de.ingrid.iplug.csw.dsc.CswDscSearchPlug;
+import de.ingrid.iplug.csw.dsc.cache.Cache;
+import de.ingrid.iplug.csw.dsc.cache.ExecutionContext;
+import de.ingrid.iplug.csw.dsc.cache.UpdateStrategy;
+import de.ingrid.iplug.csw.dsc.cswclient.CSWClient;
+import de.ingrid.iplug.csw.dsc.cswclient.CSWFactory;
+import de.ingrid.iplug.csw.dsc.cswclient.CSWQuery;
+import de.ingrid.iplug.csw.dsc.cswclient.CSWRecord;
+import de.ingrid.iplug.csw.dsc.cswclient.CSWSearchResult;
+import de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName;
+import de.ingrid.iplug.csw.dsc.cswclient.constants.ResultType;
+import de.ingrid.iplug.csw.dsc.tools.StringUtils;
+
+public abstract class AbstractUpdateStrategy implements UpdateStrategy {
+
+ @Autowired
+ protected StatusProvider statusProvider;
+
+ DocumentBuilder docBuilder = null;
+
+ /** The time in msec the strategy pauses between different requests to the CSW server. */
+ int requestPause = 1000;
+
+ /** The default number of records the strategy requests at once during fetching of records. */
+ int recordsPerCall = 10;
+
+
+ /**
+ * Set the time in msec the strategy pauses between requests to the CSW server.
+ *
+ * @param requestPause the requestPause to set
+ */
+ public void setRequestPause(int requestPause) {
+ this.requestPause = requestPause;
+ }
+
+ /**
+ * Set the number of records the strategy requests at once during fetching of records.
+ *
+ * @param recordsPerCall the recordsPerCall to set
+ */
+ public void setRecordsPerCall(int recordsPerCall) {
+ this.recordsPerCall = recordsPerCall;
+ }
+
+
+ /**
+ * Create a filter Document from a filter string. Replace any filter
+ * variables. TODO: if there should be more variables, this could be done
+ * more generic
+ *
+ * @param filterStr
+ * @return Document
+ * @throws Exception
+ */
+ protected Document createFilterDocument(String filterStr) throws Exception {
+
+ ExecutionContext context = this.getExecutionContext();
+
+ if (this.docBuilder == null) {
+ DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
+ docBuilder = docBuilderFactory.newDocumentBuilder();
+ }
+
+ // replace last update date variable
+ Pattern lastUpdateDatePattern = Pattern.compile("\\{LAST_UPDATE_DATE\\}", Pattern.MULTILINE);
+ Matcher matcher = lastUpdateDatePattern.matcher(filterStr);
+ if (matcher.find()) {
+ Date lastUpdateDate = context.getLastExecutionDate();
+ SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd");
+ filterStr = matcher.replaceAll(df.format(lastUpdateDate));
+ }
+
+ return docBuilder.parse(new InputSource(new StringReader(filterStr)));
+ }
+
+ /**
+ * Fetch all records that satisfy the given filter using the GetRecords and
+ * return the ids and put them into the cache
+ * @note This method guarantees to query the server without a constraint, if the
+ * provided filter set is empty
+ *
+ * @param client The CSWClient to use
+ * @param elementSetName The ElementSetName of the records to fetch
+ * @param filterSet The filter set used to select the records
+ * @param doCache Determines wether to cache the record or not
+ * @return A list of ids of the fetched records
+ * @throws Exception
+ */
+ protected List fetchRecords(CSWClient client, ElementSetName elementSetName,
+ Set filterSet, boolean doCache) throws Exception {
+
+ CSWFactory factory = client.getFactory();
+ Log log = this.getLog();
+
+ // if the filter set is empty, we add a null a least
+ // this causes execution of the iteration below, but
+ // but will not add a constraint definition to the request
+ if (filterSet == null)
+ filterSet = new HashSet();
+ if (filterSet.size() == 0)
+ filterSet.add(null);
+
+ // variables for complete fetch process
+ // int numTotal = 0;
+ List fetchedRecordIds = new CopyOnWriteArrayList();
+
+ // iterate over all filters
+ int filterIndex = 1;
+ for (Document filter : filterSet) {
+ if (log.isDebugEnabled())
+ log.debug("Processing filter "+filterIndex+": "+
+ StringUtils.nodeToString(filter).replace("\n", "")+".");
+
+ // variables for current fetch process (current filter)
+ int numRecordsTotal = 0;
+ int numRecordsFetched = 0;
+ List currentFetchedRecordIds = new ArrayList();
+
+ // create the query
+ CSWQuery query = factory.createQuery();
+ query.setConstraint(filter);
+ query.setResultType(ResultType.RESULTS);
+ query.setElementSetName(elementSetName);
+ query.setMaxRecords(this.recordsPerCall);
+ query.setStartPosition(1);
+
+ // do requests
+
+ // do first request
+
+ CSWSearchResult result = client.getRecords(query);
+ numRecordsFetched += result.getNumberOfRecords();
+ numRecordsTotal = result.getNumberOfRecordsTotal();
+ if (log.isInfoEnabled())
+ log.info(numRecordsTotal+" record(s) from filter "+filterIndex+":");
+
+ if (numRecordsTotal > 0) {
+
+ if (log.isInfoEnabled()) {
+ log.info("\nPARAMETERS OF FETCHING PROCESS:" +
+ "\nrecords per chunk (request): " + recordsPerCall +
+ "\ngeneral pause between requesting next chunk (msec): " + requestPause +
+ "\nnum retries per chunk: " + CswDscSearchPlug.conf.numRetriesPerRequest +
+ "\npause between retries (msec): " + CswDscSearchPlug.conf.timeBetweenRetries +
+ "\nmax number of lost chunks: " + CswDscSearchPlug.conf.maxNumSkippedRequests);
+ }
+
+ // process
+ currentFetchedRecordIds.addAll(processResult(result, doCache));
+
+ int numSkippedRequests = 0;
+ String logLostRecordChunks = "";
+ int numLostRecords = 0;
+ while (numRecordsFetched < numRecordsTotal) {
+ if (CswDscSearchPlug.conf.maxNumSkippedRequests > -1) {
+ // fetching should end when a maximum number of failures (in a row) is reached.
+ if (numSkippedRequests > CswDscSearchPlug.conf.maxNumSkippedRequests) {
+ log.error("Problems fetching records. Total number of skipped requests reached (" + CswDscSearchPlug.conf.maxNumSkippedRequests +
+ " requests without results). We end fetching process for this filter.");
+ statusProvider.addState( "ERROR_FETCH", "Error during fetch, since more than " + CswDscSearchPlug.conf.maxNumSkippedRequests + " records have been skipped.", Classification.ERROR );
+ break;
+ }
+ }
+
+ // generic pause between requests, set via spring
+ Thread.sleep(this.requestPause);
+
+ String logCurrRecordChunk = "";
+ try {
+ // prepare next request
+ // Just for safety: get number of last fetched records from last result, if we have a result and records.
+ int numLastFetch = query.getMaxRecords();
+ if (result != null && (result.getNumberOfRecords() > 0)) {
+ numLastFetch = result.getNumberOfRecords();
+ }
+ numRecordsFetched += numLastFetch;
+ statusProvider.addState( "FETCH", "Fetching record " + (numRecordsFetched-numLastFetch+1) + "-" + numRecordsFetched + " / " + numRecordsTotal + " from " + client.getFactory().getServiceUrl() );
+
+ query.setStartPosition(query.getStartPosition() + numLastFetch);
+
+ // for logging below
+ logCurrRecordChunk = "" + query.getStartPosition() + " - " + (query.getStartPosition() + query.getMaxRecords());
+
+ // do next request, if problems retry with increasing pause in between
+ int numRetries = 0;
+ while (true) {
+ try {
+ result = null;
+ result = client.getRecords(query);
+ break;
+
+ } catch (Exception e) {
+ if (numRetries == CswDscSearchPlug.conf.numRetriesPerRequest) {
+ log.error("Retried " + numRetries + " times ! We skip records " + logCurrRecordChunk, e);
+ break;
+ }
+
+ numRetries++;
+ int timeBetweenRetry = numRetries * CswDscSearchPlug.conf.timeBetweenRetries;
+ log.error("Error fetching records " + logCurrRecordChunk + ". We retry " +
+ numRetries + ". time after " + timeBetweenRetry + " msec !", e);
+ Thread.sleep(timeBetweenRetry);
+ }
+ }
+
+
+ // process
+ if (result == null || result.getNumberOfRecords() == 0) {
+ // no result from this query, we count the failures to check whether fetching process should be ended !
+ numSkippedRequests++;
+ numLostRecords += query.getMaxRecords();
+ logLostRecordChunks += logCurrRecordChunk + "\n";
+
+ } else {
+ currentFetchedRecordIds.addAll(processResult(result, doCache));
+ }
+ } catch (Exception e) {
+ statusProvider.addState( "ERROR_FETCH_PROCESS", "Error during processing record: " + logCurrRecordChunk, Classification.ERROR );
+ log.error("Error processing records " + logCurrRecordChunk);
+ log.error( ExceptionUtils.getStackTrace(e) );
+ }
+ }
+
+ if (numLostRecords > 0) {
+ statusProvider.addState( "ERROR_FETCH_PROCESS", "Error during fetching of record: " + logLostRecordChunks, Classification.ERROR );
+ log.error("\nWe had failed GetRecords requests !!!" +
+ "\nThe following " + numLostRecords + " records were NOT fetched and are \"lost\":" +
+ "\n" + logLostRecordChunks);
+ }
+ }
+
+ // collect record ids
+ fetchedRecordIds.addAll(currentFetchedRecordIds);
+ // numTotal += currentFetchedRecordIds.size();
+ filterIndex++;
+ }
+ return fetchedRecordIds;
+ }
+
+ /**
+ * Fetch all records from a id list using the GetRecordById and put them in the cache
+ *
+ * @param client The CSWClient to use
+ * @param elementSetName The ElementSetName of the records to fetch
+ * @param recordIds The list of ids
+ * @param requestPause The time between two requests in milliseconds
+ * @throws Exception
+ */
+ protected void fetchRecords(CSWClient client, ElementSetName elementSetName,
+ List recordIds, int requestPause) throws Exception {
+
+ CSWFactory factory = client.getFactory();
+ Cache cache = this.getExecutionContext().getCache();
+ Log log = this.getLog();
+
+ CSWQuery query = factory.createQuery();
+ query.setElementSetName(elementSetName);
+
+ int cnt = 1;
+ int max = recordIds.size();
+ Iterator it = recordIds.iterator();
+ while (it.hasNext()) {
+ String id = it.next();
+ query.setId(id);
+ CSWRecord record = null;
+ try {
+ record = client.getRecordById(query);
+ if (log.isDebugEnabled())
+ log.debug("Fetched record: "+id+" "+record.getElementSetName() + " (" + cnt + "/" + max + ")");
+ cache.putRecord(record);
+ } catch (Exception e) {
+ log.error("Error fetching record '" + query.getId() + "'! Removing record from cache.", e);
+ cache.removeRecord(query.getId());
+ recordIds.remove(id);
+ }
+ cnt++;
+ Thread.sleep(requestPause);
+ }
+ }
+
+ /**
+ * Process a fetched search result (collect ids and cache records)
+ *
+ * @param result The search result
+ * @param doCache Determines wether to cache the record or not
+ * @return The list of ids of the fetched records
+ * @throws Exception
+ */
+ private List processResult(CSWSearchResult result, boolean doCache)
+ throws Exception {
+
+ Cache cache = this.getExecutionContext().getCache();
+ Log log = this.getLog();
+
+ List fetchedRecordIds = new ArrayList();
+ for (CSWRecord record : result.getRecordList()) {
+ String id = record.getId();
+
+ if (log.isInfoEnabled())
+ log.info("Fetched record: "+id+" "+record.getElementSetName());
+ if (fetchedRecordIds.contains(id)) {
+ log.warn("Duplicated id: "+id+". Overriding previous entry.");
+ }
+ fetchedRecordIds.add(id);
+
+ // cache only if requested
+ if (doCache)
+ cache.putRecord(record);
+ }
+ if (log.isInfoEnabled())
+ log.info("Fetched "+fetchedRecordIds.size()+" of "+result.getNumberOfRecordsTotal()+
+ " [starting from "+result.getQuery().getStartPosition() + "]");
+ return fetchedRecordIds;
+ }
+}
diff --git a/src/main/java/de/ingrid/iplug/csw/dsc/cswclient/impl/GenericRecord.java b/src/main/java/de/ingrid/iplug/csw/dsc/cswclient/impl/GenericRecord.java
index f2ae1c7b..921de935 100644
--- a/src/main/java/de/ingrid/iplug/csw/dsc/cswclient/impl/GenericRecord.java
+++ b/src/main/java/de/ingrid/iplug/csw/dsc/cswclient/impl/GenericRecord.java
@@ -1,108 +1,108 @@
-/*
+/*
* **************************************************-
* ingrid-iplug-csw-dsc:war
* ==================================================
* Copyright (C) 2014 - 2016 wemove digital solutions GmbH
* ==================================================
- * Licensed under the EUPL, Version 1.1 or – as soon they will be
- * approved by the European Commission - subsequent versions of the
- * EUPL (the "Licence");
- *
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- *
- * http://ec.europa.eu/idabc/eupl5
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
+ * Licensed under the EUPL, Version 1.1 or – as soon they will be
+ * approved by the European Commission - subsequent versions of the
+ * EUPL (the "Licence");
+ *
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ *
+ * http://ec.europa.eu/idabc/eupl5
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
* limitations under the Licence.
- * **************************************************#
- */
-/*
- * Copyright (c) 2008 wemove digital solutions. All rights reserved.
- */
-
-package de.ingrid.iplug.csw.dsc.cswclient.impl;
-
-import org.w3c.dom.Comment;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-
-import de.ingrid.iplug.csw.dsc.cswclient.CSWRecord;
-import de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName;
-import de.ingrid.iplug.csw.dsc.tools.NodeUtils;
-import de.ingrid.iplug.csw.dsc.tools.StringUtils;
-import de.ingrid.utils.xml.IDFNamespaceContext;
-import de.ingrid.utils.xpath.XPathUtils;
-
-public class GenericRecord implements CSWRecord {
-
- final private XPathUtils xPathUtils = new XPathUtils(new IDFNamespaceContext());
-
- protected String id = null;
- protected ElementSetName elementSetName = null;
- protected Node node = null;
-
- /**
- * Initializes the record. The node will be detached (cloned) from it's
- * owner document.
- *
- * @param elementSetName
- * The {@link ElementSetName} of this record.
- * @param node
- * The DOM Node describing the record. The node will be detached
- * (cloned).
- *
- * @see de.ingrid.iplug.csw.dsc.cswclient.CSWRecord#initialize(de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName,
- * org.w3c.dom.Node)
- */
- @Override
- public void initialize(ElementSetName elementSetName, Node node) throws Exception {
- // detach node from whole document inkl. all namespace definitions
- while (node instanceof Comment) {
- node = node.getNextSibling();
- }
- this.node = NodeUtils.detachWithNameSpaces(node);
- this.elementSetName = elementSetName;
-
- // get the record id
- NodeList idNodes = xPathUtils
- .getNodeList(this.node, "//gmd:fileIdentifier/gco:CharacterString");
- if (idNodes == null || idNodes.item(0) == null)
- throw new RuntimeException(
- "CSWRecord does not contain an id (looking for //gmd:fileIdentifier/gco:CharacterString):\n"
- + StringUtils.nodeToString(this.node));
- if (idNodes.getLength() > 1)
- throw new RuntimeException(
- "CSWRecord contains more than one id (looking for //gmd:fileIdentifier/gco:CharacterString):\n"
- + StringUtils.nodeToString(this.node));
-
- this.id = idNodes.item(0).getTextContent().trim();
- }
-
- @Override
- public String getId() {
- if (this.id != null) {
- return this.id;
- } else
- throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize.");
- }
-
- @Override
- public ElementSetName getElementSetName() {
- if (this.elementSetName != null) {
- return this.elementSetName;
- } else
- throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize.");
- }
-
- @Override
- public Node getOriginalResponse() {
- if (this.node != null) {
- return this.node;
- } else
- throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize.");
- }
-}
+ * **************************************************#
+ */
+/*
+ * Copyright (c) 2008 wemove digital solutions. All rights reserved.
+ */
+
+package de.ingrid.iplug.csw.dsc.cswclient.impl;
+
+import org.w3c.dom.Comment;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import de.ingrid.iplug.csw.dsc.cswclient.CSWRecord;
+import de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName;
+import de.ingrid.iplug.csw.dsc.tools.NodeUtils;
+import de.ingrid.iplug.csw.dsc.tools.StringUtils;
+import de.ingrid.utils.xml.IDFNamespaceContext;
+import de.ingrid.utils.xpath.XPathUtils;
+
+public class GenericRecord implements CSWRecord {
+
+ final private XPathUtils xPathUtils = new XPathUtils(new IDFNamespaceContext());
+
+ protected String id = null;
+ protected ElementSetName elementSetName = null;
+ protected Node node = null;
+
+ /**
+ * Initializes the record. The node will be detached (cloned) from it's
+ * owner document.
+ *
+ * @param elementSetName
+ * The {@link ElementSetName} of this record.
+ * @param node
+ * The DOM Node describing the record. The node will be detached
+ * (cloned).
+ *
+ * @see de.ingrid.iplug.csw.dsc.cswclient.CSWRecord#initialize(de.ingrid.iplug.csw.dsc.cswclient.constants.ElementSetName,
+ * org.w3c.dom.Node)
+ */
+ @Override
+ public void initialize(ElementSetName elementSetName, Node node) throws Exception {
+ // detach node from whole document inkl. all namespace definitions
+ while (node instanceof Comment) {
+ node = node.getNextSibling();
+ }
+ this.node = NodeUtils.detachWithNameSpaces(node);
+ this.elementSetName = elementSetName;
+
+ // get the record id
+ NodeList idNodes = xPathUtils
+ .getNodeList(this.node, "/gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString | /idf:html/idf:body/idf:idfMdMetadata/gmd:fileIdentifier/gco:CharacterString");
+ if (idNodes == null || idNodes.item(0) == null)
+ throw new RuntimeException(
+ "CSWRecord does not contain an id (looking for /gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString | /idf:html/idf:body/idf:idfMdMetadata/gmd:fileIdentifier/gco:CharacterString):\n"
+ + StringUtils.nodeToString(this.node));
+ if (idNodes.getLength() > 1)
+ throw new RuntimeException(
+ "CSWRecord contains more than one id (looking for /gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString | /idf:html/idf:body/idf:idfMdMetadata/gmd:fileIdentifier/gco:CharacterString):\n"
+ + StringUtils.nodeToString(this.node));
+
+ this.id = idNodes.item(0).getTextContent().trim();
+ }
+
+ @Override
+ public String getId() {
+ if (this.id != null) {
+ return this.id;
+ } else
+ throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize.");
+ }
+
+ @Override
+ public ElementSetName getElementSetName() {
+ if (this.elementSetName != null) {
+ return this.elementSetName;
+ } else
+ throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize.");
+ }
+
+ @Override
+ public Node getOriginalResponse() {
+ if (this.node != null) {
+ return this.node;
+ } else
+ throw new RuntimeException("CSWRecord is not initialized properly. Make sure to call CSWRecord.initialize.");
+ }
+}
diff --git a/src/main/resources/mapping/idf_to_lucene.js b/src/main/resources/mapping/idf_to_lucene.js
index 32f429d8..cbf337c2 100644
--- a/src/main/resources/mapping/idf_to_lucene.js
+++ b/src/main/resources/mapping/idf_to_lucene.js
@@ -1,1008 +1,1008 @@
-/*
+/*
* **************************************************-
* ingrid-iplug-csw-dsc:war
* ==================================================
* Copyright (C) 2014 - 2016 wemove digital solutions GmbH
* ==================================================
- * Licensed under the EUPL, Version 1.1 or – as soon they will be
- * approved by the European Commission - subsequent versions of the
- * EUPL (the "Licence");
- *
- * You may not use this work except in compliance with the Licence.
- * You may obtain a copy of the Licence at:
- *
- * http://ec.europa.eu/idabc/eupl5
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Licence is distributed on an "AS IS" basis,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Licence for the specific language governing permissions and
+ * Licensed under the EUPL, Version 1.1 or – as soon they will be
+ * approved by the European Commission - subsequent versions of the
+ * EUPL (the "Licence");
+ *
+ * You may not use this work except in compliance with the Licence.
+ * You may obtain a copy of the Licence at:
+ *
+ * http://ec.europa.eu/idabc/eupl5
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Licence is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Licence for the specific language governing permissions and
* limitations under the Licence.
- * **************************************************#
- */
-/**
- * CSW 2.0.2 AP ISO 1.0 Record (full) to Lucene Document mapping according to mapping IGC 1.0.3
- * Copyright (c) 2008 wemove digital solutions. All rights reserved.
- *
- * The following global variable are passed from the application:
- *
- * @param cswRecord A CSWRecord instance, that defines the input
- * @param document A lucene Document instance, that defines the output
- * @param log A Log instance
- *
- */
-if (javaVersion.indexOf( "1.8" ) === 0) {
- load("nashorn:mozilla_compat.js");
-}
-
-importPackage(Packages.de.ingrid.iplug.csw.dsc.tools);
-importPackage(Packages.de.ingrid.iplug.csw.dsc.index);
-importPackage(Packages.de.ingrid.utils.udk);
-importPackage(Packages.org.w3c.dom);
-
-//constant to punish the rank of a service/data object, which has no coupled resource
-var BOOST_NO_COUPLED_RESOURCE = 0.9;
-//constant to boost the rank of a service/data object, which has at least one coupled resource
-var BOOST_HAS_COUPLED_RESOURCE = 1.0;
-
-
-if (log.isDebugEnabled()) {
- log.debug("Mapping csw record "+cswRecord.getId()+" to lucene document");
-}
-
-// get the xml content of the record
-var recordNode = cswRecord.getOriginalResponse();
-
-// define one-to-one mappings
-/** each entry consists off the following possible values:
-
- indexField: The name of the field in the index the data will be put into.
- xpath: The xpath expression for the data in the XML input file. Multiple xpath
- results will be put in the same index field.
- transform: The transformation to be executed on the value
- funct: The transformation function to use.
- params: The parameters for the transformation function additional to the value
- from the xpath expression that is always the first parameter.
- execute: The function to be executed. No xpath value is obtained. Instead the recordNode of the
- source XML is put as default parameter to the function. All other parameters are ignored.
- funct: The function to execute.
- params: The parameters for the function additional to the recordNode
- that is always the first parameter.
- tokenized: If set to false no tokenizing will take place before the value is put into the index.
- additionalTokenize: constant specifying additional method to tokenize value and write tokenized value to index
-*/
-var transformationDescriptions = [
- { "indexField":"t01_object.obj_id",
- "tokenized":true,
- "xpath":"//gmd:fileIdentifier/gco:CharacterString"
- },
- { "indexField":"title",
- "tokenized":true,
- "xpath":"//gmd:identificationInfo//gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString"
- },
- { "indexField":"t01_object.org_obj_id",
- "tokenized":true,
- "xpath":"//gmd:fileIdentifier/gco:CharacterString"
- },
- { "indexField":"summary",
- "xpath":"//gmd:identificationInfo//gmd:abstract/gco:CharacterString"
- },
- { "indexField":"t01_object.info_note",
- "xpath":"//gmd:identificationInfo//gmd:purpose/gco:CharacterString"
- },
- { "indexField":"t01_object.loc_descr",
- "xpath":"//gmd:identificationInfo//gmd:EX_Extent/gmd:description/gco:CharacterString"
- },
- { "indexField":"t01_object.dataset_alternate_name",
- "xpath":"//gmd:identificationInfo//gmd:citation/gmd:CI_Citation/gmd:alternateTitle/gco:CharacterString"
- },
- { "indexField":"t01_object.time_status",
- "xpath":"//gmd:identificationInfo//gmd:status/gmd:MD_ProgressCode/@codeListValue",
- "transform":{
- "funct":transformToIgcDomainId,
- "params":[523]
- }
- },
- { "execute":{
- "funct":mapObjectClass,
- "params":[recordNode]
- }
- },
- { "indexField":"t01_object.dataset_character_set",
- "xpath":"//gmd:identificationInfo//gmd:characterSet/gmd:MD_CharacterSetCode/@codeListValue",
- "transform":{
- "funct":transformToIgcDomainId,
- "params":[510]
- }
- },
- { "indexField":"t01_object.dataset_usage",
- "xpath":"//gmd:identificationInfo//gmd:resourceSpecificUsage/gmd:MD_Usage/gmd:specificUsage/gco:CharacterString"
- },
- { "indexField":"t01_object.data_language_code",
- "xpath":"//gmd:identificationInfo//gmd:language/gco:CharacterString",
- "transform":{
- "funct":transformISO639_2ToISO639_1
- }
- },
- { "indexField":"t01_object.metadata_character_set",
- "xpath":"//gmd:characterSet/gmd:MD_CharacterSetCode/@codeListValue",
- "transform":{
- "funct":transformToIgcDomainId,
- "params":[510]
- }
- },
- { "indexField":"t01_object.metadata_standard_name",
- "xpath":"//gmd:metadataStandardName/gco:CharacterString"
- },
- { "indexField":"t01_object.metadata_standard_version",
- "xpath":"//gmd:metadataStandardVersion/gco:CharacterString"
- },
- { "indexField":"t01_object.metadata_language_code",
- "xpath":"//gmd:language/gco:CharacterString",
- "transform":{
- "funct":transformISO639_2ToISO639_1
- }
- },
- { "indexField":"t01_object.vertical_extent_minimum",
- "xpath":"//gmd:identificationInfo//gmd:extent/gmd:EX_Extent/gmd:verticalElement/gmd:EX_VerticalExtent/gmd:minimumValue/gco:Real"
- },
- { "indexField":"t01_object.vertical_extent_maximum",
- "xpath":"//gmd:identificationInfo//gmd:extent/gmd:EX_Extent/gmd:verticalElement/gmd:EX_VerticalExtent/gmd:maximumValue/gco:Real"
- },
- { "indexField":"t01_object.vertical_extent_unit",
- "xpath":"//gmd:identificationInfo//gmd:EX_Extent/gmd:verticalElement/gmd:EX_VerticalExtent/gmd:verticalCRS/gmd:verticalCRS/gml:verticalCS/gml:VerticalCS/gml:axis/gml:CoordinateSystemAxis/@uom",
- "transform":{
- "funct":transformToIgcDomainId,
- "params":[102]
- }
- },
- { "indexField":"t01_object.vertical_extent_vdatum",
- "xpath":"//gmd:identificationInfo//gmd:EX_Extent/gmd:verticalElement/gmd:EX_VerticalExtent/gmd:verticalCRS/gml:verticalCRS/gml:verticalDatum/gml:VerticalDatum/gml:identifier",
- "transform":{
- "funct":transformToIgcDomainId,
- "params":[101]
- }
- },
- { "indexField":"t01_object.ordering_instructions",
- "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributor/gmd:MD_Distributor/gmd:distributionOrderProcess/gmd:MD_StandardOrderProcess/gmd:orderingInstructions/gco:CharacterString"
- },
- { "indexField":"t01_object.mod_time",
- "xpath":"//gmd:dateStamp/gco:DateTime | //gmd:dateStamp/gco:Date[not(../gco:DateTime)]",
- "transform":{
- "funct":UtilsCSWDate.mapDateFromIso8601ToIndex
- }
- },
- // object_access
- { "indexField":"object_access.restriction_key",
- "xpath":"//gmd:identificationInfo//gmd:resourceConstraints//gmd:otherConstraints/gco:CharacterString",
- "transform":{
- "funct":transformToIgcDomainId,
- "params":[6010]
- }
- },
- { "indexField":"object_access.restriction_value",
- "xpath":"//gmd:identificationInfo//gmd:resourceConstraints//gmd:otherConstraints/gco:CharacterString"
- },
- { "indexField":"object_access.terms_of_use",
- "xpath":"//gmd:identificationInfo//gmd:resourceConstraints//gmd:useLimitation/gco:CharacterString"
- },
- // t0110_avail_format
- { "indexField":"t0110_avail_format.name",
- "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:name/gco:CharacterString"
- },
- { "indexField":"t0110_avail_format.version",
- "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:version/gco:CharacterString"
- },
- { "indexField":"t0110_avail_format.file_decompression_technique",
- "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:fileDecompressionTechnique/gco:CharacterString"
- },
- { "indexField":"t0110_avail_format.specification",
- "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:specification/gco:CharacterString"
- },
- // t0113_dataset_reference
- { "indexField":"t0113_dataset_reference.reference_date",
- "xpath":"//gmd:identificationInfo//gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:Date",
- "transform":{
- "funct":UtilsCSWDate.mapDateFromIso8601ToIndex
- }
- },
- { "indexField":"t0113_dataset_reference.type",
- "xpath":"//gmd:identificationInfo//gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:dateType/gmd:CI_DateTypeCode/@codeListValue",
- "transform":{
- "funct":transformToIgcDomainId,
- "params":[502]
- }
- },
- // t011_obj_serv
- { "indexField":"t011_obj_serv.type",
- "xpath":"//gmd:identificationInfo//srv:serviceType/gco:LocalName"
- },
- { "indexField":"t011_obj_serv.history",
- "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:processStep/gmd:LI_ProcessStep/gmd:description/gco:CharacterString"
- },
- { "indexField":"t011_obj_serv.base",
- "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/source/LI_Source/gmd:description/gco:CharacterString"
- },
- // t011_obj_serv_op_connpoint
- { "indexField":"t011_obj_serv_op_connpoint.connect_point",
- "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:connectPoint/gmd:CI_OnlineResource/gmd:linkage/gmd:URL",
- "additionalTokenize":"SPLIT_URL"
- },
- // t011_obj_serv_op_depends
- { "indexField":"t011_obj_serv_op_depends.depends_on",
- "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:dependsOn/srv:SV_OperationMetadata/srv:operationName/gco:CharacterString"
- },
- // t011_obj_serv_op_para
- { "indexField":"t011_obj_serv_op_para.name",
- "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:parameters/srv:SV_Parameter/srv:name"
- },
- { "indexField":"t011_obj_serv_op_para.direction",
- "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:parameters/srv:SV_Parameter/direction/SV_ParameterDirection"
- },
- { "indexField":"t011_obj_serv_op_para.descr",
- "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:parameters/srv:SV_Parameter/gmd:description/gco:CharacterString"
- },
- { "indexField":"t011_obj_serv_op_para.optional",
- "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:parameters/srv:SV_Parameter/srv:optionality/gco:CharacterString",
- "transform":{
- "funct":transformGeneric,
- "params":[{"optional":"1", "mandatory":"0"}, false]
- }
- },
- { "indexField":"t011_obj_serv_op_para.repeatability",
- "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:parameters/srv:SV_Parameter/srv:repeatability/gco:Boolean",
- "transform":{
- "funct":transformGeneric,
- "params":[{"true":"1", "false":"0"}, false]
- }
- },
- // t011_obj_serv_op_platform
- { "indexField":"t011_obj_serv_op_platform.platform",
- "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:DCP/srv:DCPList/@codeListValue"
- },
- // t011_obj_serv_operation
- { "indexField":"t011_obj_serv_operation.name",
- "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:operationName/gco:CharacterString"
- },
- { "indexField":"t011_obj_serv_operation.descr",
- "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:operationDescription/gco:CharacterString"
- },
- { "indexField":"t011_obj_serv_operation.invocation_name",
- "xpath":"//gmd:identificationInfo//srv:containsOperations/srv:SV_OperationMetadata/srv:invocationName/gco:CharacterString"
- },
- // t011_obj_serv_version
- { "indexField":"t011_obj_serv_version.serv_version",
- "xpath":"//gmd:identificationInfo//srv:serviceTypeVersion/gco:CharacterString"
- },
- // t011_obj_topic_cat
- { "indexField":"t011_obj_topic_cat.topic_category",
- "xpath":"//gmd:identificationInfo//gmd:topicCategory/gmd:MD_TopicCategoryCode",
- "transform":{
- "funct":transformToIgcDomainId,
- "params":[527]
- }
- },
- // t011_obj_geo
- { "indexField":"t011_obj_geo.special_base",
- "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:statement/gco:CharacterString"
- },
- { "indexField":"t011_obj_geo.data_base",
- "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:source/gmd:LI_Source/gmd:description/gco:CharacterString"
- },
- { "indexField":"t011_obj_geo.method",
- "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:processStep/gmd:LI_ProcessStep/gmd:description/gco:CharacterString"
- },
- { "execute":{
- "funct":mapReferenceSystemInfo
- }
- },
- { "indexField":"t011_obj_geo.rec_exact",
- "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/report/gmd:DQ_RelativeInternalPositionalAccuracy/gmd:DQ_QuantitativeResult/gmd:value/gco:Record"
- },
- { "indexField":"t011_obj_geo.rec_grade",
- "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/report/DQ_CompletenessCommission/gmd:DQ_QuantitativeResult/gmd:value/gco:Record"
- },
- { "indexField":"t011_obj_geo.hierarchy_level",
- "xpath":"//gmd:hierarchyLevel/gmd:MD_ScopeCode/@codeListValue",
- "transform":{
- "funct":transformGeneric,
- "params":[{"dataset":"5", "series":"6"}, false]
- }
- },
- { "indexField":"t011_obj_geo.vector_topology_level",
- "xpath":"//gmd:spatialRepresentationInfo/gmd:MD_VectorSpatialRepresentation/gmd:topologyLevel/gmd:MD_TopologyLevelCode/@codeListValue",
- "transform":{
- "funct":transformToIgcDomainId,
- "params":[528]
- }
- },
- { "indexField":"t011_obj_geo.pos_accuracy_vertical",
- "xpath":"//gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:report/gmd:DQ_RelativeInternalPositionalAccuracy[gmd:measureDescription/gco:CharacterString='vertical']/gmd:DQ_QuantitativeResult/gmd:value/gmd:Record"
- },
- { "indexField":"t011_obj_geo.keyc_incl_w_dataset",
- "xpath":"//gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:includedWithDataset/gco:Boolean",
- "transform":{
- "funct":transformGeneric,
- "params":[{"true":"1", "false":"0"}, false]
- }
- },
- // accept RS_Indentifier and MD_Identifier with xpath: "...identifier//code..."
- { "indexField":"t011_obj_geo.datasource_uuid",
- "xpath":"//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:identifier//gmd:code/gco:CharacterString"
- },
- // t011_obj_geo_keyc
- { "indexField":"t011_obj_geo_keyc.subject_cat",
- "xpath":"//gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:featureCatalogueCitation/gmd:CI_Citation/gmd:title/gco:CharacterString"
- },
- { "indexField":"t011_obj_geo_keyc.key_date",
- "xpath":"//gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:featureCatalogueCitation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:Date",
- "transform":{
- "funct":UtilsCSWDate.mapDateFromIso8601ToIndex
- }
- },
- { "indexField":"t011_obj_geo_keyc.edition",
- "xpath":"//gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:featureCatalogueCitation/gmd:CI_Citation/gmd:edition/gco:CharacterString"
- },
- // t011_obj_geo_scale
- { "indexField":"t011_obj_geo_scale.scale",
- "xpath":"//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:equivalentScale/gmd:MD_RepresentativeFraction/gmd:denominator/gco:Integer"
- },
- { "indexField":"t011_obj_geo_scale.resolution_ground",
- "xpath":"//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:distance/gmd:Distance[@uom='meter']"
- },
- { "indexField":"t011_obj_geo_scale.resolution_scan",
- "xpath":"//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:distance/gmd:Distance[@uom='dpi']"
- },
- // t011_obj_geo_spatial_rep
- { "indexField":"t011_obj_geo_spatial_rep.type",
- "xpath":"//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialRepresentationType/MD_SpatialRepresentationTypeCode/@codeListValue",
- "transform":{
- "funct":transformToIgcDomainId,
- "params":[526]
- }
- },
- // t011_obj_geo_supplinfo
- { "indexField":"t011_obj_geo_supplinfo.feature_type",
- "xpath":"//gmd:contentInfo/gmd:MD_FeatureCatalogueDescription/gmd:featureTypes/gco:LocalName"
- },
- // t011_obj_geo_symc
- { "indexField":"t011_obj_geo_symc.symbol_cat",
- "xpath":"//gmd:portrayalCatalogueInfo/gmd:MD_PortrayalCatalogueReference/gmd:portrayalCatalogueCitation/gmd:CI_Citation/gmd:title/gco:CharacterString"
- },
- { "indexField":"t011_obj_geo_symc.symbol_date",
- "xpath":"//gmd:portrayalCatalogueInfo/gmd:MD_PortrayalCatalogueReference/gmd:portrayalCatalogueCitation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:Date",
- "transform":{
- "funct":UtilsCSWDate.mapDateFromIso8601ToIndex
- }
- },
- { "indexField":"t011_obj_geo_symc.edition",
- "xpath":"//gmd:portrayalCatalogueInfo/gmd:MD_PortrayalCatalogueReference/gmd:portrayalCatalogueCitation/gmd:CI_Citation /gco:CharacterString"
- },
- // t011_obj_geo_vector
- { "indexField":"t011_obj_geo_vector.geometric_object_type",
- "xpath":"//gmd:spatialRepresentationInfo/gmd:MD_VectorSpatialRepresentation/gmd:geometricObjects/gmd:MD_GeometricObjects/gmd:geometricObjectType/gmd:MD_GeometricObjectTypeCode/@codeListValue",
- "transform":{
- "funct":transformToIgcDomainId,
- "params":[515]
- }
- },
- { "indexField":"t011_obj_geo_vector.geometric_object_count",
- "xpath":"//gmd:spatialRepresentationInfo/gmd:MD_VectorSpatialRepresentation/gmd:geometricObjects/gmd:MD_GeometricObjects/gmd:geometricObjectCount/gco:Integer"
- },
- // t017_url_ref
- { "indexField":"t017_url_ref.url_link",
- "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource/gmd:linkage/gmd:URL"
- },
- { "indexField":"t017_url_ref.content",
- "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource/name/gco:CharacterString"
- },
- { "indexField":"t017_url_ref.descr",
- "xpath":"//gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource/gmd:description/gco:CharacterString"
- },
- // add MD_BrowseGraphic as additional html
- { "indexField":"additional_html_1",
- "xpath":"//gmd:identificationInfo//gmd:graphicOverview/gmd:MD_BrowseGraphic/gmd:fileName/gco:CharacterString",
- "transform":{
- "funct":transformToPreviewGraphic
- }
- },
- { "indexField":"t017_url_ref.content",
- "xpath":"//gmd:identificationInfo//gmd:graphicOverview/gmd:MD_BrowseGraphic/gmd:fileDescription/gco:CharacterString"
- },
- // object_references
- { "execute":{
- "funct":mapReferences,
- "params":[recordNode]
- }
- },
- // keywords
- { "execute":{
- "funct":mapKeywords,
- "params":[recordNode]
- }
- },
- // geographic elements
- { "execute":{
- "funct":mapGeographicElements,
- "params":[recordNode]
- }
- },
- // time constraints
- { "execute":{
- "funct":addTimeConstraints,
- "params":[recordNode]
- }
- },
- // resource maintenance
- { "execute":{
- "funct":addResourceMaintenance,
- "params":[recordNode]
- }
- },
- // addresses
- { "execute":{
- "funct":mapAddresses,
- "params":[recordNode]
- }
- },
- { "execute":{
- "funct":addCoupledServices,
- "params":[recordNode]
- }
- }
- ];
-
-document.put( "datatype", "default" );
-
-// iterate over all transformation descriptions
-var value;
-for (var i in transformationDescriptions) {
- var t = transformationDescriptions[i];
-
- // check for execution (special function)
- if (hasValue(t.execute)) {
- if (log.isDebugEnabled()) {
- log.debug("Execute function: " + t.execute.funct.name)
- }
- call_f(t.execute.funct, t.execute.params)
- } else {
- if (log.isDebugEnabled()) {
- log.debug("Working on " + t.indexField)
- }
- var tokenized = true;
- // iterate over all xpath results
- var nodeList = XPathUtils.getNodeList(recordNode, t.xpath);
- if (nodeList && nodeList.getLength() > 0) {
- for (j=0; j";
- return previewImageHtmlTag;
- }
- return "";
-}
-
-
-function addResourceMaintenance() {
- var maintenanceFrequencyCode = XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode/@codeListValue")
- if (hasValue(maintenanceFrequencyCode)) {
- // transform to IGC domain id
- var idcCode = codelistService.getCodeListEntryId("518", maintenanceFrequencyCode, "iso");
- if (hasValue(idcCode)) {
- addToDoc("t01_object.time_period", idcCode, false);
- addToDoc("t01_object.time_descr", XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceNote/gco:CharacterString"), true);
- var periodDuration = XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:userDefinedMaintenanceFrequency/gmd:TM_PeriodDuration");
- addToDoc("t01_object.time_interval", new TM_PeriodDurationToTimeInterval().parse(periodDuration), false);
- addToDoc("t01_object.time_alle", new TM_PeriodDurationToTimeAlle().parse(periodDuration), false);
- } else {
- if (log.isDebugEnabled()) {
- log.debug("MD_MaintenanceFrequencyCode '" + maintenanceFrequencyCode + "' unknown.")
- }
- }
- }
-}
-
-/*
- * Set the boundaries of dates to values that can be compared with lucene. The
- * value of inifinite pas is '00000000' and the value for inifinit future is '99999999'.
- *
- * Makes sure that the fields are only set, if we have a UDK date type of 'seit' or 'bis'.
- * We can do this because the mapping filters and maps the dates to t0 in case of date type
- * 'am' and to t1 in case of 'seit', even if the database fields are the same. Thus we do not
- * need to look at the DB field time_type which controls the date
- * type ('am', 'seit', 'bis', 'von (von-bis)')
- *
- */
-function addTimeConstraints() {
- var t1 = UtilsCSWDate.mapDateFromIso8601ToIndex(XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition"));
- var t2 = UtilsCSWDate.mapDateFromIso8601ToIndex(XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition"));
- var timeType;
- if (hasValue(t1) && hasValue(t2)) {
- if (t1 == t2) {
- addToDoc("t01_object.time_type", "am", false);
- addToDoc("t0", t1, false);
- } else {
- addToDoc("t01_object.time_type", "von", false);
- addToDoc("t1", t1, false);
- addToDoc("t2", t2, false);
- }
- } else if (hasValue(t1) && !hasValue(t2)) {
- addToDoc("t01_object.time_type", "seit", false);
- addToDoc("t1", t1, false);
- addToDoc("t2", "99999999", false);
- } else if (!hasValue(t1) && hasValue(t2)) {
- addToDoc("t01_object.time_type", "bis", false);
- addToDoc("t1", "00000000", false);
- addToDoc("t2", t2, false);
- }
-}
-
-function mapObjectClass() {
- var hierarchyLevel = XPathUtils.getString(recordNode, "//gmd:hierarchyLevel/gmd:MD_ScopeCode/@codeListValue");
- var hierarchyLevelName = XPathUtils.getString(recordNode, "//gmd:hierarchyLevelName/gco:CharacterString");
- var objectClass = "1";
- if (hasValue(hierarchyLevel)) {
- if (hierarchyLevel.toLowerCase() == "service") {
- // "Geodatendienst"
- objectClass = "3";
- } else if (hierarchyLevel.toLowerCase() == "application") {
- // "Dienst / Anwendung / Informationssystem"
- objectClass = "6";
- } else if (hierarchyLevel.toLowerCase() == "nongeographicdataset") {
- if (hasValue(hierarchyLevelName)) {
- if (hierarchyLevelName == "job") {
- // "Organisation/Fachaufgabe"
- objectClass = "0";
- } else if (hierarchyLevelName == "document") {
- objectClass = "2";
- } else if (hierarchyLevelName == "project") {
- objectClass = "4";
- } else if (hierarchyLevelName == "database") {
- objectClass = "5";
- }
- }
- }
- }
- addToDoc("t01_object.obj_class", objectClass, false);
-}
-
-function addCoupledServices() {
- var crossReferences = XPathUtils.getNodeList(recordNode, "//idf:crossReference[./idf:objectType=3]");
- if (hasValue(crossReferences)) {
- for (i=0; i 0) {
+ for (j=0; j";
+ return previewImageHtmlTag;
+ }
+ return "";
+}
+
+
+function addResourceMaintenance() {
+ var maintenanceFrequencyCode = XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode/@codeListValue")
+ if (hasValue(maintenanceFrequencyCode)) {
+ // transform to IGC domain id
+ var idcCode = codelistService.getCodeListEntryId("518", maintenanceFrequencyCode, "iso");
+ if (hasValue(idcCode)) {
+ addToDoc("t01_object.time_period", idcCode, false);
+ addToDoc("t01_object.time_descr", XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceNote/gco:CharacterString"), true);
+ var periodDuration = XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:userDefinedMaintenanceFrequency/gmd:TM_PeriodDuration");
+ addToDoc("t01_object.time_interval", new TM_PeriodDurationToTimeInterval().parse(periodDuration), false);
+ addToDoc("t01_object.time_alle", new TM_PeriodDurationToTimeAlle().parse(periodDuration), false);
+ } else {
+ if (log.isDebugEnabled()) {
+ log.debug("MD_MaintenanceFrequencyCode '" + maintenanceFrequencyCode + "' unknown.")
+ }
+ }
+ }
+}
+
+/*
+ * Set the boundaries of dates to values that can be compared with lucene. The
+ * value of inifinite pas is '00000000' and the value for inifinit future is '99999999'.
+ *
+ * Makes sure that the fields are only set, if we have a UDK date type of 'seit' or 'bis'.
+ * We can do this because the mapping filters and maps the dates to t0 in case of date type
+ * 'am' and to t1 in case of 'seit', even if the database fields are the same. Thus we do not
+ * need to look at the DB field time_type which controls the date
+ * type ('am', 'seit', 'bis', 'von (von-bis)')
+ *
+ */
+function addTimeConstraints() {
+ var t1 = UtilsCSWDate.mapDateFromIso8601ToIndex(XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition"));
+ var t2 = UtilsCSWDate.mapDateFromIso8601ToIndex(XPathUtils.getString(recordNode, "//gmd:identificationInfo//gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition"));
+ var timeType;
+ if (hasValue(t1) && hasValue(t2)) {
+ if (t1 == t2) {
+ addToDoc("t01_object.time_type", "am", false);
+ addToDoc("t0", t1, false);
+ } else {
+ addToDoc("t01_object.time_type", "von", false);
+ addToDoc("t1", t1, false);
+ addToDoc("t2", t2, false);
+ }
+ } else if (hasValue(t1) && !hasValue(t2)) {
+ addToDoc("t01_object.time_type", "seit", false);
+ addToDoc("t1", t1, false);
+ addToDoc("t2", "99999999", false);
+ } else if (!hasValue(t1) && hasValue(t2)) {
+ addToDoc("t01_object.time_type", "bis", false);
+ addToDoc("t1", "00000000", false);
+ addToDoc("t2", t2, false);
+ }
+}
+
+function mapObjectClass() {
+ var hierarchyLevel = XPathUtils.getString(recordNode, "//gmd:hierarchyLevel/gmd:MD_ScopeCode/@codeListValue");
+ var hierarchyLevelName = XPathUtils.getString(recordNode, "//gmd:hierarchyLevelName/gco:CharacterString");
+ var objectClass = "1";
+ if (hasValue(hierarchyLevel)) {
+ if (hierarchyLevel.toLowerCase() == "service") {
+ // "Geodatendienst"
+ objectClass = "3";
+ } else if (hierarchyLevel.toLowerCase() == "application") {
+ // "Dienst / Anwendung / Informationssystem"
+ objectClass = "6";
+ } else if (hierarchyLevel.toLowerCase() == "nongeographicdataset") {
+ if (hasValue(hierarchyLevelName)) {
+ if (hierarchyLevelName == "job") {
+ // "Organisation/Fachaufgabe"
+ objectClass = "0";
+ } else if (hierarchyLevelName == "document") {
+ objectClass = "2";
+ } else if (hierarchyLevelName == "project") {
+ objectClass = "4";
+ } else if (hierarchyLevelName == "database") {
+ objectClass = "5";
+ }
+ }
+ }
+ }
+ addToDoc("t01_object.obj_class", objectClass, false);
+}
+
+function addCoupledServices() {
+ var crossReferences = XPathUtils.getNodeList(recordNode, "//idf:crossReference[./idf:objectType=3]");
+ if (hasValue(crossReferences)) {
+ for (i=0; i