Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: download linked objects in a WFS using "resolvedepth" #1180

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,9 @@ public interface GMLConstants {
*/
public static final String NS_GML_32 = "http://www.opengis.net/gml/3.2";

/**
* The GML 3.2 namespace
*/
public static final String NS_WFS = "http://www.opengis.net/wfs";

}
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,41 @@
assertEquals(expected, count)
}

@Test
public void testWfsPagination() {
/*
* FIXME relies on external resources that are not guaranteed to exist and is thus not enabled for automated testing.
* Better would be a test that could mock the WFS responses (e.g. a mock service running w/ testcontainers)
*/
def schemaUrl = 'https://geodienste.komm.one/ows/services/org.107.7e499bca-5e63-4595-b3c4-eaece8b68608_wfs?SERVICE=WFS&VERSION=2.0.0&REQUEST=DescribeFeatureType'
def dataUrl = 'https://geodienste.komm.one/ows/services/org.107.7e499bca-5e63-4595-b3c4-eaece8b68608_wfs?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature&typenames=xplan:BP_Plan&resolvedepth=*'
def paging = 100
def expected = 754

def schema = loadSchema(URI.create(schemaUrl))

Map<String, String> params = [
(StreamGmlReader.PARAM_FEATURES_PER_WFS_REQUEST): paging as String,
(StreamGmlReader.PARAM_PAGINATE_REQUEST): 'true'
]

def instances = loadGml(URI.create(dataUrl), schema, params)

int count = 0
instances.iterator().withCloseable { it ->
while (it.hasNext()) {
((InstanceIterator) it).skip()
count++
if (count % 100 == 0) {
println("$count instances skipped")
}
}
}

println("$count instances skipped")
assertEquals(expected, count)

Check failure on line 172 in io/plugins/eu.esdihumboldt.hale.io.gml.test/src/eu/esdihumboldt/hale/io/gml/reader/internal/StreamGmlReaderTest.groovy

View workflow job for this annotation

GitHub Actions / check

StreamGmlReaderTest.testWfsPagination

expected:<754> but was:<876>
}

// helpers

Schema loadSchema(URI schemaLocation) throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Stack;

import javax.annotation.Nullable;
import javax.xml.XMLConstants;
Expand All @@ -50,6 +51,7 @@
import eu.esdihumboldt.hale.common.instance.model.InstanceCollection;
import eu.esdihumboldt.hale.common.instance.model.InstanceReference;
import eu.esdihumboldt.hale.common.instance.model.InstanceResolver;
import eu.esdihumboldt.hale.common.instance.model.MutableInstance;
import eu.esdihumboldt.hale.common.instance.model.ResourceIterator;
import eu.esdihumboldt.hale.common.instance.model.ext.InstanceIterator;
import eu.esdihumboldt.hale.common.instance.model.impl.FilteredInstanceCollection;
Expand All @@ -61,6 +63,7 @@
import eu.esdihumboldt.hale.common.schema.model.TypeDefinition;
import eu.esdihumboldt.hale.common.schema.model.TypeIndex;
import eu.esdihumboldt.hale.common.schema.model.constraint.type.MappingRelevantFlag;
import eu.esdihumboldt.hale.io.gml.geometry.GMLConstants;
import eu.esdihumboldt.hale.io.gml.reader.internal.instance.StreamGmlHelper;
import eu.esdihumboldt.hale.io.gml.reader.internal.instance.StreamGmlInstance;
import eu.esdihumboldt.hale.io.xsd.constraint.XmlAttributeFlag;
Expand All @@ -75,6 +78,8 @@
*/
public class GmlInstanceCollection implements InstanceCollection, LogAware {

public static final String ADDITIONAL_OBJECTS = "ADDITIONAL_OBJECTS";

/**
* Iterates over {@link Instance}s in an XML/GML stream
*/
Expand All @@ -90,6 +95,7 @@ public class GmlInstanceIterator implements InstanceIterator {
private Map<QName, TypeDefinition> allElements;

private TypeDefinition nextType;
private Map<TypeDefinition, String> nextTypeDetails = new HashMap<>();

/**
* The index in the stream for the element returned next with
Expand All @@ -109,6 +115,7 @@ public class GmlInstanceIterator implements InstanceIterator {
* Uses a linked list to allow null items.
*/
private final Deque<TypeDefinition> typeStack = new LinkedList<>();
private final Stack<QName> elementStack = new Stack<>();

/**
* Default constructor
Expand All @@ -117,6 +124,7 @@ public GmlInstanceIterator() {
super();

nextType = null;
nextTypeDetails = new HashMap<>();

try {
in = new BufferedInputStream(source.getInput());
Expand Down Expand Up @@ -205,6 +213,17 @@ private void proceedToNext() throws XMLStreamException {
}
}
typeStack.push(def);
elementStack.push(elementName);

boolean isAnAdditionalObject = false;
// Now you want to find the parent element
for (int i = elementStack.size() - 1; i >= 0; i--) {
if (elementStack.get(i).getNamespaceURI().startsWith(GMLConstants.NS_WFS)
&& elementStack.get(i).getLocalPart().equals("additionalObjects")) {
isAnAdditionalObject = true;
break;
}
}

if (!rootEncountered) {
rootEncountered = true;
Expand All @@ -220,10 +239,14 @@ private void proceedToNext() throws XMLStreamException {

if (def != null && isAllowedType(def)) {
nextType = def;
if (isAnAdditionalObject) {
nextTypeDetails.put(def, ADDITIONAL_OBJECTS);
}
}
}
else if (event == XMLStreamConstants.END_ELEMENT) {
typeStack.pop();
elementStack.pop();
}
}
}
Expand Down Expand Up @@ -568,13 +591,25 @@ public synchronized Instance next() {
}

try {
return StreamGmlHelper.parseInstance(reader, nextType, elementIndex++, strict, null,
crsProvider, nextType, null, false, ignoreNamespaces, ioProvider);
Instance instance = StreamGmlHelper.parseInstance(reader, nextType, elementIndex++,
strict, null, crsProvider, nextType, null, false, ignoreNamespaces,
ioProvider);

if (nextTypeDetails != null && !nextTypeDetails.isEmpty()) {
for (Map.Entry<TypeDefinition, String> entry : nextTypeDetails.entrySet()) {
TypeDefinition key = entry.getKey();
String val = entry.getValue();
((MutableInstance) instance).setMetaData(ADDITIONAL_OBJECTS, key);
}
}
return instance;
} catch (XMLStreamException e) {
throw new IllegalStateException(e);
} finally {
nextType = null;
nextTypeDetails = new HashMap<>();
typeStack.pop(); // parseInstance consumes END_ELEMENT
elementStack.pop();
}
}

Expand Down Expand Up @@ -651,6 +686,7 @@ public synchronized void close() {
// ignore
}
nextType = null;
nextTypeDetails = null;
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

import com.google.common.collect.Iterables;
import org.locationtech.jts.geom.Geometry;

import com.google.common.collect.Iterables;

import de.fhg.igd.slf4jplus.ALogger;
import de.fhg.igd.slf4jplus.ALoggerFactory;
import eu.esdihumboldt.hale.common.core.io.IOProvider;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@
import java.net.URL;
import java.text.MessageFormat;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;

import javax.xml.namespace.QName;

import org.apache.commons.io.FileUtils;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URIBuilder;
Expand All @@ -47,6 +50,7 @@
import eu.esdihumboldt.hale.common.instance.model.impl.IndexInstanceReference;
import eu.esdihumboldt.hale.common.schema.model.TypeDefinition;
import eu.esdihumboldt.hale.common.schema.model.TypeIndex;
import eu.esdihumboldt.hale.io.gml.geometry.GMLConstants;
import eu.esdihumboldt.hale.io.gml.reader.internal.GmlInstanceCollection;
import eu.esdihumboldt.hale.io.gml.reader.internal.GmlInstanceCollection.GmlInstanceIterator;
import eu.esdihumboldt.hale.io.gml.reader.internal.instance.StreamGmlInstance;
Expand Down Expand Up @@ -225,9 +229,15 @@ public WfsBackedGmlInstanceCollection(LocatableInputSupplier<? extends InputStre
}
}

// for a query containing RESOLVEDEPTH we disable the pagination
// if (primordialQueryParams.containsKey("RESOLVEDEPTH")) {
// featuresPerRequest = UNLIMITED;
// }

// Use primordial URI and issue "hits" request to check if the WFS will
// return anything at all
int hits;

if (ignoreNumberMatched) {
hits = UNKNOWN_SIZE;
}
Expand Down Expand Up @@ -265,6 +275,7 @@ public WfsBackedGmlInstanceCollection(LocatableInputSupplier<? extends InputStre
"featuresPerRequest must be a positive integer or {0} to disable pagination",
UNLIMITED));
}

this.featuresPerRequest = featuresPerRequest;
}

Expand Down Expand Up @@ -417,6 +428,10 @@ public class WfsBackedGmlInstanceIterator implements InstanceIterator {
private GmlInstanceCollection currentCollection;
private GmlInstanceIterator iterator;
private int totalFeaturesProcessed;
// store the additional objects
private final HashSet<String> uniqueIDInstancesAdditionalObjects = new HashSet<String>();
// store the "main" features of the GML
private final HashSet<String> uniqueIDMainInstances = new HashSet<String>();

/**
* Create the iterator
Expand Down Expand Up @@ -556,9 +571,14 @@ public boolean hasNext() {
* number of results reported by the WFS.
*/
protected boolean isFeatureLimitReached() {
// the condition (totalFeaturesProcessed >= size &&
// !iterator.hasNext()) should be there in order to process the
// instances coming from the additionalObjects after the last "main"
// instance
return (maxNumberOfFeatures != UNLIMITED
&& totalFeaturesProcessed >= maxNumberOfFeatures)
|| (size != UNKNOWN_SIZE && totalFeaturesProcessed >= size);
|| (size != UNKNOWN_SIZE && totalFeaturesProcessed >= size
&& !iterator.hasNext());
}

/**
Expand All @@ -571,7 +591,127 @@ public Instance next() {
}

Instance instance = iterator.next();
return new StreamGmlInstance(instance, totalFeaturesProcessed++);

Copy link
Contributor Author

@emanuelaepure10 emanuelaepure10 Jun 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@stempler Could you help me understand how is it possible that for a total or
totalFeaturesProcessed: 754 which is the BP_Plan and additionalFeatureProcessed: 1844 processed here with a sum of 2598,
HS shows the following numbers:
image
Thank you

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest to add a unit test where a WFS FeatureCollection file with additional objects is read. That doesn't allow to test the pagination, but if you include at least one object with a duplicate GML ID you can use this to test:

  1. The correct detection of the "additional objects"
  2. If your mechanism for detecting GML ID duplicates works as expected

Once it has been ruled out that a problem is hidden there the paging behavior can be analysed.

if (primordialQueryParams.containsKey("RESOLVEDEPTH")) {
return processInstanceWithResolveDepth(instance);
}
else {
return new StreamGmlInstance(instance, totalFeaturesProcessed++);
}
}

/**
* @param instance Instance
* @return Instance
*/
private Instance processInstanceWithResolveDepth(Instance instance) {
for (QName propertyName : instance.getPropertyNames()) {
if (isGmlIdProperty(propertyName)) {
Object[] gmlID = instance.getProperty(propertyName);
if (gmlID[0] != null) {
String gmlIDToCheck = (String) gmlID[0];

if (instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS) != null
&& !instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS)
.isEmpty()) {
if (!uniqueIDInstancesAdditionalObjects.contains(gmlIDToCheck)) {
if (uniqueIDMainInstances.contains(gmlIDToCheck)) {
if (iterator.hasNext()) {
return next();
}
}
uniqueIDInstancesAdditionalObjects.add(gmlIDToCheck);
return new StreamGmlInstance(instance, totalFeaturesProcessed);
}
}
else {
if (!uniqueIDMainInstances.contains(gmlIDToCheck)) {
uniqueIDMainInstances.add(gmlIDToCheck);
totalFeaturesProcessed++;
if (uniqueIDInstancesAdditionalObjects.contains(gmlIDToCheck)) {
uniqueIDInstancesAdditionalObjects.remove(gmlIDToCheck);
if (iterator.hasNext()) {
return next();
}
}
return new StreamGmlInstance(instance, totalFeaturesProcessed);
}
}
}
}
}
return processRemainingInstances();
}

private Instance processRemainingInstances() {
if (iterator.hasNext()) {
return next();
}
else {
_closeAndRecreateIterator();
if (iterator.hasNext()) {
return next();
}
else {
return iterator.next();
}
}
}

private void _closeAndRecreateIterator() {
close();
createNextIterator();
}

private boolean isGmlIdProperty(QName propertyName) {
return (propertyName.getNamespaceURI().startsWith(GMLConstants.NS_WFS)
|| propertyName.getNamespaceURI().startsWith(GMLConstants.GML_NAMESPACE_CORE))
&& "id".equals(propertyName.getLocalPart())
&& "gml".equals(propertyName.getPrefix());
}

private String _getGmlId(Instance instance, QName propertyName) {
Object[] gmlID = instance.getProperty(propertyName);
return gmlID != null && gmlID.length > 0 ? (String) gmlID[0] : null;
}

private boolean _handleAdditionalObjects(Instance instance, String gmlIDToCheck) {
if (instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS) != null
&& !instance.getMetaData(GmlInstanceCollection.ADDITIONAL_OBJECTS).isEmpty()) {
if (!uniqueIDInstancesAdditionalObjects.contains(gmlIDToCheck)) {
if (uniqueIDMainInstances.contains(gmlIDToCheck) && iterator.hasNext()) {
next();
System.out.println("totalFeaturesProcessed:" + totalFeaturesProcessed
+ " - uniqueIDMainInstances:" + uniqueIDMainInstances.size()
+ " - uniqueIDInstancesAdditionalObjects:"
+ uniqueIDInstancesAdditionalObjects.size() + " 1SKIP");
return true;
}
uniqueIDInstancesAdditionalObjects.add(gmlIDToCheck);
return true;
}
}
return false;
}

private boolean _handleMainInstances(String gmlIDToCheck) {
if (!uniqueIDMainInstances.contains(gmlIDToCheck)) {
uniqueIDMainInstances.add(gmlIDToCheck);
totalFeaturesProcessed++;
if (uniqueIDInstancesAdditionalObjects.contains(gmlIDToCheck)) {
uniqueIDInstancesAdditionalObjects.remove(gmlIDToCheck);
if (iterator.hasNext()) {
System.out.println("totalFeaturesProcessed:" + totalFeaturesProcessed
+ " - uniqueIDMainInstances:" + uniqueIDMainInstances.size()
+ " - uniqueIDInstancesAdditionalObjects:"
+ uniqueIDInstancesAdditionalObjects.size() + " 2SKIP");
next();
return true;
}
}
return true;
}
return false;
}

/**
Expand Down Expand Up @@ -635,4 +775,4 @@ public void skip() {

}

}
}
Loading