Skip to content

Commit

Permalink
Merge pull request #435 from bennofs/fix-property-register
Browse files Browse the repository at this point in the history
Two small fixes for PropertyRegister API fetcher
  • Loading branch information
Tpt committed Aug 29, 2019
2 parents 79eaa01 + 05aa60c commit a3ca364
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 27 deletions.
27 changes: 15 additions & 12 deletions wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/PropertyRegister.java
Expand Up @@ -22,11 +22,7 @@
* #L%
*/

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.Map.Entry;

import org.slf4j.Logger;
Expand Down Expand Up @@ -105,6 +101,12 @@ public class PropertyRegister {
*/
int smallestUnfetchedPropertyIdNumber = 1;

/**
* Properties that are known to be missing. This is used to avoid making
* a request for this property again.
*/
final Set<String> knownMissing;

static final PropertyRegister WIKIDATA_PROPERTY_REGISTER = new PropertyRegister(
"P1921", ApiConnection.getWikidataApiConnection(),
Datamodel.SITE_WIKIDATA);
Expand All @@ -127,6 +129,7 @@ public PropertyRegister(String uriPatternPropertyId,
ApiConnection apiConnection, String siteUri) {
this.uriPatternPropertyId = uriPatternPropertyId;
this.siteUri = siteUri;
this.knownMissing = new HashSet<>();
dataFetcher = new WikibaseDataFetcher(apiConnection, siteUri);
}

Expand Down Expand Up @@ -312,7 +315,10 @@ protected void fetchPropertyInformation(PropertyIdValue property) {
// Don't do anything if all properties up to this index have already
// been fetched. In particular, don't try indefinitely to find a
// certain property type (maybe the property was deleted).
if (this.smallestUnfetchedPropertyIdNumber > propertyIdNumber) {
//
// If we previously tried to fetch this property and didn't
// find it, there is no point in trying again either.
if (this.smallestUnfetchedPropertyIdNumber > propertyIdNumber || knownMissing.contains(property.getId())) {
return;
}

Expand All @@ -331,11 +337,7 @@ protected void fetchPropertyInformation(PropertyIdValue property) {
Map<String, EntityDocument> properties;
try {
properties = dataFetcher.getEntityDocuments(propertyIds);
} catch (MediaWikiApiErrorException e) {
logger.error("Error when trying to fetch property data: "
+ e.toString());
properties = Collections.emptyMap();
} catch (IOException e) {
} catch (MediaWikiApiErrorException|IOException e) {
logger.error("Error when trying to fetch property data: "
+ e.toString());
properties = Collections.emptyMap();
Expand All @@ -353,7 +355,7 @@ protected void fetchPropertyInformation(PropertyIdValue property) {
logger.info("Fetched type information for property "
+ entry.getKey() + " online: " + datatype);

if (!DatatypeIdValue.DT_STRING.equals(datatype)) {
if (!DatatypeIdValue.DT_STRING.equals(datatype) && !DatatypeIdValue.DT_EXTERNAL_ID.equals(datatype)) {
continue;
}

Expand All @@ -380,6 +382,7 @@ protected void fetchPropertyInformation(PropertyIdValue property) {
if (!this.datatypes.containsKey(property.getId())) {
logger.error("Failed to fetch type information for property "
+ property.getId() + " online.");
knownMissing.add(property.getId());
}
}
}
Expand Up @@ -25,14 +25,12 @@
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;

import org.hamcrest.core.IsCollectionContaining;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Matchers;
import org.mockito.Mockito;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl;
Expand Down Expand Up @@ -67,6 +65,8 @@ public void setUp() throws MediaWikiApiErrorException, IOException {

PropertyIdValue pid434 = dataObjectFactory.getPropertyIdValue("P434",
this.siteIri);
PropertyIdValue pid508 = dataObjectFactory.getPropertyIdValue("P508",
this.siteIri);
PropertyIdValue pid23 = dataObjectFactory.getPropertyIdValue("P23",
this.siteIri);
PropertyIdValue pid1921 = dataObjectFactory.getPropertyIdValue("P1921",
Expand All @@ -90,6 +90,18 @@ Collections.<SnakGroup> emptyList(),
Collections.<Reference> emptyList(),
StatementRank.NORMAL, "000");

Statement p1921StatementExternalID = dataObjectFactory
.getStatement(
pid508,
dataObjectFactory
.getValueSnak(
pid1921,
dataObjectFactory
.getStringValue("http://purl.org/bncf/tid/$1")),
Collections.<SnakGroup> emptyList(),
Collections.<Reference> emptyList(),
StatementRank.NORMAL, "000");

mockStatementGroups.add(dataObjectFactory.getStatementGroup(Collections
.singletonList(p23Statement)));
mockStatementGroups.add(dataObjectFactory.getStatementGroup(Collections
Expand All @@ -110,20 +122,25 @@ Collections.<MonolingualTextValue> emptyList(),
Collections.<StatementGroup> emptyList(),
dataObjectFactory.getDatatypeIdValue(DatatypeIdValue.DT_ITEM),
0));
mockResult.put("P508", dataObjectFactory.getPropertyDocument(pid508,
Collections.emptyList(),
Collections.emptyList(),
Collections.emptyList(),
Collections.singletonList(dataObjectFactory.getStatementGroup(
Collections.singletonList(p1921StatementExternalID)
)),
dataObjectFactory.getDatatypeIdValue(DatatypeIdValue.DT_EXTERNAL_ID),
0));

this.propertyRegister = new PropertyRegister("P1921",
new ApiConnection("http://localhost/"), this.siteIri);

WikibaseDataFetcher dataFetcher = Mockito
.mock(WikibaseDataFetcher.class);

List<String> propertyIds = new ArrayList<String>();
propertyIds.add("P434");
for (int i = 1; i < 50; i++) {
propertyIds.add("P" + i);
}
Mockito.when(dataFetcher.getEntityDocuments(propertyIds)).thenReturn(
mockResult);
Mockito.when(dataFetcher.getEntityDocuments((List<String>)Matchers.argThat(IsCollectionContaining.hasItems("P434"))))
.thenReturn(mockResult);
Mockito.when(dataFetcher.getEntityDocuments((List<String>)Matchers.argThat(IsCollectionContaining.hasItems("P508"))))
.thenReturn(mockResult);
Mockito.when(dataFetcher.getFilter()).thenReturn(
new DocumentDataFilter());
this.propertyRegister.dataFetcher = dataFetcher;
Expand All @@ -148,6 +165,14 @@ public void testFetchPropertyUriPattern() {
assertTrue(this.propertyRegister.datatypes.keySet().contains("P434"));
}

@Test
public void testFetchPropertyUriPatternExternalID() {
PropertyIdValue pid = this.dataObjectFactory.getPropertyIdValue("P508",
this.siteIri);
assertEquals("http://purl.org/bncf/tid/$1",
this.propertyRegister.getPropertyUriPattern(pid));
}

@Test
public void testGetPropertyType() {
assertEquals(DatatypeIdValue.DT_STRING,
Expand All @@ -165,10 +190,13 @@ public void testGetPropertyType() {
@Test
public void testGetMissingPropertyType() {
assertNull(this.propertyRegister.getPropertyType(dataObjectFactory
.getPropertyIdValue("P10", this.siteIri)));
.getPropertyIdValue("P10000", this.siteIri)));
final int smallestBefore = this.propertyRegister.smallestUnfetchedPropertyIdNumber;
// Check twice to test fast failing on retry
assertNull(this.propertyRegister.getPropertyType(dataObjectFactory
.getPropertyIdValue("P10", this.siteIri)));
.getPropertyIdValue("P10000", this.siteIri)));
assertEquals("no requests should be made if the property is known to be missing",
smallestBefore, this.propertyRegister.smallestUnfetchedPropertyIdNumber);
}

@Test
Expand Down

0 comments on commit a3ca364

Please sign in to comment.