diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index e3b353e78..ef3bc3e57 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -4,6 +4,10 @@ Wikidata Toolkit Release Notes Version 0.4.0 ------------- +New features: +* Support statements on property documents +* More robust JSON parsing: recover after errors to process remaining file + Bug fixes: * Support RDF export of Monolingual Text Value data in statements. * Significant performance improvements in RDF export of taxonomy data. diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Datamodel.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Datamodel.java index 93a1e2721..e3917e3e1 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Datamodel.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Datamodel.java @@ -428,7 +428,7 @@ public static SiteLink makeSiteLink(String title, String siteKey, } /** - * Creates a {@link PropertyDocument}. + * Creates a {@link PropertyDocument} without statements. * * @param propertyId * the id of the property that data is about @@ -452,6 +452,35 @@ public static PropertyDocument makePropertyDocument( aliases, datatypeId); } + /** + * Creates a {@link PropertyDocument}. + * + * @param propertyId + * the id of the property that data is about + * @param labels + * the list of labels of this property, with at most one label + * for each language code + * @param descriptions + * the list of descriptions of this property, with at most one + * description for each language code + * @param aliases + * the list of aliases of this property + * @param statementGroups + * the list of statement groups of this item; all of them must + * have the given itemIdValue as their subject + * @param datatypeId + * the datatype of that property + * @return a {@link PropertyDocument} corresponding to the input + */ + public static PropertyDocument makePropertyDocument( + PropertyIdValue propertyId, List labels, + List descriptions, + List aliases, + List statementGroups, DatatypeIdValue datatypeId) { + return factory.getPropertyDocument(propertyId, labels, descriptions, + aliases, statementGroups, datatypeId); + } + /** * Creates an {@link ItemDocument}. * diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/DatamodelConverter.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/DatamodelConverter.java index f367b80f6..f4bfc1f1f 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/DatamodelConverter.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/DatamodelConverter.java @@ -164,7 +164,8 @@ public PropertyDocument copy(PropertyDocument object) { return this.dataObjectFactory.getPropertyDocument(object .getPropertyId(), new ArrayList<>(object.getLabels().values()), new ArrayList<>(object.getDescriptions().values()), - convertAliasList(object.getAliases()), object.getDatatype()); + convertAliasList(object.getAliases()), object + .getStatementGroups(), object.getDatatype()); } public ItemDocument copy(ItemDocument object) { diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Equality.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Equality.java index 58c2c6d36..6f2f5e721 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Equality.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Equality.java @@ -505,7 +505,8 @@ public static boolean equalsPropertyDocument(PropertyDocument o1, Object o2) { } PropertyDocument other = (PropertyDocument) o2; // Note: property id already compared by equalsTermedDocument() - return o1.getDatatype().equals(other.getDatatype()); + return o1.getDatatype().equals(other.getDatatype()) + && o1.getStatementGroups().equals(other.getStatementGroups()); } /** diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Hash.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Hash.java index 5dac6e2d2..8314bdd70 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Hash.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/Hash.java @@ -308,6 +308,7 @@ public static int hashCode(SiteLink o) { public static int hashCode(PropertyDocument o) { int result; result = hashCodeForTermedDocument(o); + result = prime * result + o.getStatementGroups().hashCode(); result = prime * result + o.getDatatype().hashCode(); return result; } diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/ToString.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/ToString.java index 78506a774..da314dc56 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/ToString.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/helpers/ToString.java @@ -40,6 +40,7 @@ import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; import org.wikidata.wdtk.datamodel.interfaces.SomeValueSnak; import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.StatementDocument; import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; import org.wikidata.wdtk.datamodel.interfaces.StatementRank; import org.wikidata.wdtk.datamodel.interfaces.StringValue; @@ -353,7 +354,8 @@ public static String toString(SiteLink o) { public static String toString(PropertyDocument o) { return "==PropertyDocument " + o.getPropertyId().getIri() + "==\n" + "* Datatype: " + o.getDatatype() - + toStringForTermedDocument(o); + + toStringForTermedDocument(o) + + toStringForStatementDocument(o); } /** @@ -368,16 +370,10 @@ public static String toString(ItemDocument o) { StringBuilder sb = new StringBuilder(); sb.append("==ItemDocument ").append(o.getItemId().getIri()); sb.append("==").append(toStringForTermedDocument(o)); - boolean first; - - sb.append("\n===Statements===\n"); - for (StatementGroup sg : o.getStatementGroups()) { - sb.append(toString(sg)); - } - sb.append("\n===End of statements===\n"); + sb.append(toStringForStatementDocument(o)); sb.append("* Site links: "); - first = true; + boolean first = true; SortedSet siteKeys = new TreeSet(o.getSiteLinks() .keySet()); for (String key : siteKeys) { @@ -392,6 +388,18 @@ public static String toString(ItemDocument o) { return sb.toString(); } + protected static String toStringForStatementDocument(StatementDocument o) { + StringBuilder sb = new StringBuilder(); + + sb.append("\n===Statements===\n"); + for (StatementGroup sg : o.getStatementGroups()) { + sb.append(toString(sg)); + } + sb.append("\n===End of statements===\n"); + + return sb.toString(); + } + /** * Returns a human-readable string representation of the given * {@link TermedDocument}. diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/DataObjectFactoryImpl.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/DataObjectFactoryImpl.java index a8dcf52b9..3a90107a3 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/DataObjectFactoryImpl.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/DataObjectFactoryImpl.java @@ -21,6 +21,7 @@ */ import java.math.BigDecimal; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -165,7 +166,17 @@ public PropertyDocument getPropertyDocument(PropertyIdValue propertyId, List descriptions, List aliases, DatatypeIdValue datatypeId) { return new PropertyDocumentImpl(propertyId, labels, descriptions, - aliases, datatypeId); + aliases, Collections. emptyList(), datatypeId); + } + + @Override + public PropertyDocument getPropertyDocument(PropertyIdValue propertyId, + List labels, + List descriptions, + List aliases, + List statementGroups, DatatypeIdValue datatypeId) { + return new PropertyDocumentImpl(propertyId, labels, descriptions, + aliases, statementGroups, datatypeId); } @Override diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/ItemDocumentImpl.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/ItemDocumentImpl.java index 6df06602a..318aa906c 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/ItemDocumentImpl.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/ItemDocumentImpl.java @@ -21,7 +21,6 @@ */ import java.util.Collections; -import java.util.Iterator; import java.util.List; import java.util.Map; @@ -34,15 +33,12 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import org.wikidata.wdtk.datamodel.interfaces.SiteLink; -import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; -import org.wikidata.wdtk.util.NestedIterator; -public class ItemDocumentImpl extends TermedDocumentImpl implements +public class ItemDocumentImpl extends TermedStatementDocumentImpl implements ItemDocument { final ItemIdValue itemId; - final List statementGroups; final Map siteLinks; /** @@ -70,24 +66,11 @@ public class ItemDocumentImpl extends TermedDocumentImpl implements List aliases, List statementGroups, Map siteLinks) { - super(labels, descriptions, aliases); + super(itemIdValue, labels, descriptions, aliases, statementGroups); Validate.notNull(itemIdValue, "item ID cannot be null"); - Validate.notNull(statementGroups, "statement list cannot be null"); Validate.notNull(siteLinks, "site links cannot be null"); - if (!statementGroups.isEmpty()) { - for (StatementGroup sg : statementGroups) { - if (!itemIdValue.equals(sg.getSubject())) { - throw new IllegalArgumentException( - "All statement groups in a document must have the same subject: found " - + sg.getSubject() + " but expected " - + itemIdValue); - } - } - } - this.itemId = itemIdValue; - this.statementGroups = statementGroups; this.siteLinks = siteLinks; } @@ -101,16 +84,6 @@ public ItemIdValue getItemId() { return itemId; } - @Override - public List getStatementGroups() { - return Collections.unmodifiableList(statementGroups); - } - - @Override - public Iterator getAllStatements() { - return new NestedIterator<>(statementGroups); - } - @Override public Map getSiteLinks() { return Collections.unmodifiableMap(siteLinks); diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/PropertyDocumentImpl.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/PropertyDocumentImpl.java index 164f854e9..f3b753348 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/PropertyDocumentImpl.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/PropertyDocumentImpl.java @@ -31,6 +31,7 @@ import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; /** * Implementation of {@link PropertyDocument}. @@ -38,8 +39,8 @@ * @author Markus Kroetzsch * */ -public class PropertyDocumentImpl extends TermedDocumentImpl implements -PropertyDocument { +public class PropertyDocumentImpl extends TermedStatementDocumentImpl implements + PropertyDocument { final PropertyIdValue propertyId; final DatatypeIdValue datatypeId; @@ -57,14 +58,18 @@ public class PropertyDocumentImpl extends TermedDocumentImpl implements * description for each language code * @param aliases * the list of aliases of this property + * @param statementGroups + * the list of statement groups of this item; all of them must + * have the given itemIdValue as their subject * @param datatypeId * the datatype of that property */ PropertyDocumentImpl(PropertyIdValue propertyId, List labels, List descriptions, - List aliases, DatatypeIdValue datatypeId) { - super(labels, descriptions, aliases); + List aliases, + List statementGroups, DatatypeIdValue datatypeId) { + super(propertyId, labels, descriptions, aliases, statementGroups); Validate.notNull(propertyId, "property ID cannot be null"); Validate.notNull(datatypeId, "datatype ID cannot be null"); this.propertyId = propertyId; diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/SitesImpl.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/SitesImpl.java index ff33107e5..ef5df7f50 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/SitesImpl.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/SitesImpl.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/TermedDocumentImpl.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/TermedStatementDocumentImpl.java similarity index 69% rename from wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/TermedDocumentImpl.java rename to wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/TermedStatementDocumentImpl.java index 90770ce6f..20c65b140 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/TermedDocumentImpl.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/TermedStatementDocumentImpl.java @@ -23,12 +23,18 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.commons.lang3.Validate; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; +import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.StatementDocument; +import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; import org.wikidata.wdtk.datamodel.interfaces.TermedDocument; +import org.wikidata.wdtk.util.NestedIterator; /** * Implementation of {@link TermedDocument}. This abstract class defines the @@ -37,15 +43,20 @@ * @author Markus Kroetzsch * */ -public abstract class TermedDocumentImpl implements TermedDocument { +public abstract class TermedStatementDocumentImpl implements TermedDocument, + StatementDocument { final Map labels; final Map descriptions; final Map> aliases; + final List statementGroups; /** * Constructor. * + * @param entityIdValue + * the entity that this document refers to; used to validate + * statements * @param labels * the list of labels of this entity, with at most one label for * each language code @@ -54,13 +65,19 @@ public abstract class TermedDocumentImpl implements TermedDocument { * description for each language code * @param aliases * the list of aliases of this entity + * @param statementGroups + * the list of statement groups of this item; all of them must + * have the given itemIdValue as their subject */ - TermedDocumentImpl(List labels, + TermedStatementDocumentImpl(EntityIdValue entityIdValue, + List labels, List descriptions, - List aliases) { + List aliases, + List statementGroups) { Validate.notNull(labels, "list of labels cannot be null"); Validate.notNull(descriptions, "list of descriptions cannot be null"); Validate.notNull(aliases, "list of aliases cannot be null"); + Validate.notNull(statementGroups, "statement list cannot be null"); this.labels = new HashMap(); for (MonolingualTextValue label : labels) { @@ -93,6 +110,19 @@ public abstract class TermedDocumentImpl implements TermedDocument { this.aliases.put(alias.getLanguageCode(), aliasesForLanguage); } } + + if (!statementGroups.isEmpty()) { + for (StatementGroup sg : statementGroups) { + if (!entityIdValue.equals(sg.getSubject())) { + throw new IllegalArgumentException( + "All statement groups in a document must have the same subject: found " + + sg.getSubject() + " but expected " + + entityIdValue); + } + } + } + + this.statementGroups = statementGroups; } @Override @@ -112,4 +142,14 @@ public Map> getAliases() { return Collections.unmodifiableMap(aliases); } + @Override + public List getStatementGroups() { + return Collections.unmodifiableList(statementGroups); + } + + @Override + public Iterator getAllStatements() { + return new NestedIterator<>(statementGroups); + } + } diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/DataObjectFactory.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/DataObjectFactory.java index 149018192..1b4b17a56 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/DataObjectFactory.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/DataObjectFactory.java @@ -261,7 +261,9 @@ Statement getStatement(Claim claim, List references, SiteLink getSiteLink(String title, String siteKey, List badges); /** - * Creates a {@link PropertyDocument}. + * Creates a {@link PropertyDocument} without any statements. This is + * provided for backwards compatibility (property documents did not support + * statements in the past). * * @param propertyId * the id of the property that data is about @@ -282,6 +284,32 @@ PropertyDocument getPropertyDocument(PropertyIdValue propertyId, List descriptions, List aliases, DatatypeIdValue datatypeId); + /** + * Creates a {@link PropertyDocument}. + * + * @param propertyId + * the id of the property that data is about + * @param labels + * the list of labels of this property, with at most one label + * for each language code + * @param descriptions + * the list of descriptions of this property, with at most one + * description for each language code + * @param aliases + * the list of aliases of this property + * @param statementGroups + * the list of statement groups of this item; all of them must + * have the given itemIdValue as their subject + * @param datatypeId + * the datatype of that property + * @return a {@link PropertyDocument} corresponding to the input + */ + PropertyDocument getPropertyDocument(PropertyIdValue propertyId, + List labels, + List descriptions, + List aliases, + List statementGroups, DatatypeIdValue datatypeId); + /** * Creates an {@link ItemDocument}. * diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/DatatypeIdValue.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/DatatypeIdValue.java index bea69feb3..d2d75abd8 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/DatatypeIdValue.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/DatatypeIdValue.java @@ -24,15 +24,19 @@ * A value that represents one of the available Wikibase datatypes. The method * {@link IriIdentifiedValue#getIri() getIri()} will always return one of the * datatype IRIs defined in this interface. - * + * * @author Markus Kroetzsch - * + * */ public interface DatatypeIdValue extends IriIdentifiedValue { /** * IRI of the item datatype in Wikibase. */ static final String DT_ITEM = "http://www.wikidata.org/ontology#propertyTypeItem"; + /** + * IRI of the property datatype in Wikibase. + */ + static final String DT_PROPERTY = "http://www.wikidata.org/ontology#propertyTypeProperty"; /** * IRI of the string datatype in Wikibase. */ diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/EntityDocumentProcessorBroker.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/EntityDocumentProcessorBroker.java index 3522b65f1..4e5ce625f 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/EntityDocumentProcessorBroker.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/EntityDocumentProcessorBroker.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/ItemDocument.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/ItemDocument.java index 6e6778529..d9f292c09 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/ItemDocument.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/ItemDocument.java @@ -20,47 +20,29 @@ * #L% */ -import java.util.Iterator; -import java.util.List; import java.util.Map; /** * Interface for datasets that describe items. It extends {@link EntityDocument} * with information about site links and statements. - * + * * @author Markus Kroetzsch - * + * */ -public interface ItemDocument extends TermedDocument { +public interface ItemDocument extends TermedDocument, StatementDocument { /** * Return the ID of the item that the data refers to. The result is the same * as that of {@link EntityDocument#getEntityId()}, but declared with a more * specific result type. - * + * * @return item id */ ItemIdValue getItemId(); - /** - * Return the list of all StatementGroups stored for this item. The order of - * StatementGroups is significant. - * - * @return list of StatementGroups - */ - List getStatementGroups(); - - /** - * Returns an iterator that provides access to all statements, without - * considering the statement groups. The order of statements is preserved. - * - * @return iterator over all statements - */ - Iterator getAllStatements(); - /** * Get a Map of site keys to {@link SiteLink} objects. - * + * * @return map of SiteLinks */ Map getSiteLinks(); diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/PropertyDocument.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/PropertyDocument.java index 807a74beb..7c0766d91 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/PropertyDocument.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/PropertyDocument.java @@ -25,24 +25,24 @@ * {@link EntityDocument} with information about the datatype of a property. *

* Claims or Statements on properties might be supported in the future. - * + * * @author Markus Kroetzsch - * + * */ -public interface PropertyDocument extends TermedDocument { +public interface PropertyDocument extends TermedDocument, StatementDocument { /** * Return the ID of the property that the data refers to. The result is the * same as that of {@link EntityDocument#getEntityId()}, but declared with a * more specific result type. - * + * * @return property id */ PropertyIdValue getPropertyId(); /** * Get the datatype id of the datatype defined for this property. - * + * * @return {@link DatatypeIdValue} */ DatatypeIdValue getDatatype(); diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/Sites.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/Sites.java index fc89c63d7..a40214052 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/Sites.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/Sites.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/StatementDocument.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/StatementDocument.java new file mode 100644 index 000000000..7731cdfb2 --- /dev/null +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/interfaces/StatementDocument.java @@ -0,0 +1,49 @@ +package org.wikidata.wdtk.datamodel.interfaces; + +/* + * #%L + * Wikidata Toolkit Data Model + * %% + * Copyright (C) 2014 Wikidata Toolkit Developers + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.Iterator; +import java.util.List; + +/** + * Interface for EntityDocuments that can have statements. + * + * @author Markus Kroetzsch + */ +public interface StatementDocument extends EntityDocument { + + /** + * Return the list of all StatementGroups stored for this item. The order of + * StatementGroups is significant. + * + * @return list of StatementGroups + */ + List getStatementGroups(); + + /** + * Returns an iterator that provides access to all statements, without + * considering the statement groups. The order of statements is preserved. + * + * @return iterator over all statements + */ + Iterator getAllStatements(); + +} diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/AliasesDeserializer.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/AliasesDeserializer.java index b283e6d31..a6daa6a4e 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/AliasesDeserializer.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/AliasesDeserializer.java @@ -36,14 +36,14 @@ /** * A deserializer implementation for the aliases in an - * {@link JacksonTermedDocument}. + * {@link JacksonTermedStatementDocument}. *

* It implements a workaround to cope with empty aliases being represented as * "aliases":[] despite its declaration as map and not as list or * array. This is neither nice nor fast, and should be obsolete as soon as * possible. * - * @see JacksonTermedDocument#setAliases(Map) + * @see JacksonTermedStatementDocument#setAliases(Map) * * @author Fredo Erxleben * diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/ClaimFromJson.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/ClaimFromJson.java index ed8304b5b..0d36d0013 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/ClaimFromJson.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/ClaimFromJson.java @@ -27,6 +27,7 @@ import org.wikidata.wdtk.datamodel.helpers.Hash; import org.wikidata.wdtk.datamodel.helpers.ToString; import org.wikidata.wdtk.datamodel.interfaces.Claim; +import org.wikidata.wdtk.datamodel.interfaces.EntityDocument; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.Snak; import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; @@ -51,7 +52,7 @@ public ClaimFromJson(JacksonStatement statement) { @Override public EntityIdValue getSubject() { - JacksonItemDocument parentDocument = this.statement.getParentDocument(); + EntityDocument parentDocument = this.statement.getParentDocument(); if (parentDocument != null) { return parentDocument.getEntityId(); } else { diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonDatatypeId.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonDatatypeId.java index 22103713c..320606634 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonDatatypeId.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonDatatypeId.java @@ -40,6 +40,11 @@ public class JacksonDatatypeId implements DatatypeIdValue { * {@link DatatypeIdValue#DT_ITEM} in JSON. */ public static final String JSON_DT_ITEM = "wikibase-item"; + /** + * String used to refer to the property datatype + * {@link DatatypeIdValue#DT_PROPERTY} in JSON. + */ + public static final String JSON_DT_PROPERTY = "wikibase-property"; /** * String used to refer to the property datatype * {@link DatatypeIdValue#DT_GLOBE_COORDINATES} in JSON. @@ -94,6 +99,8 @@ public static String getDatatypeIriFromJsonDatatype(String jsonDatatype) { switch (jsonDatatype) { case JSON_DT_ITEM: return DT_ITEM; + case JSON_DT_PROPERTY: + return DT_PROPERTY; case JSON_DT_GLOBE_COORDINATES: return DT_GLOBE_COORDINATES; case JSON_DT_URL: diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonItemDocument.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonItemDocument.java index a7fc9c372..385504ce3 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonItemDocument.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonItemDocument.java @@ -20,13 +20,9 @@ * #L% */ -import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.Iterator; -import java.util.List; import java.util.Map; -import java.util.Map.Entry; import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Equality; @@ -36,9 +32,6 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemDocument; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.SiteLink; -import org.wikidata.wdtk.datamodel.interfaces.Statement; -import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; -import org.wikidata.wdtk.util.NestedIterator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @@ -57,25 +50,14 @@ */ @JsonInclude(Include.NON_EMPTY) @JsonIgnoreProperties(ignoreUnknown = true) -public class JacksonItemDocument extends JacksonTermedDocument implements +public class JacksonItemDocument extends JacksonTermedStatementDocument implements ItemDocument { - /** - * This is what is called claim in the JSON model. It corresponds to - * the statement group in the WDTK model. - */ - Map> claims = new HashMap<>(); /** * Map to store site links. */ private Map sitelinks = new HashMap<>(); - /** - * Statement groups. This member is initialized when statements are - * accessed. - */ - private List statementGroups = null; - /** * Constructor. Creates an empty object that can be populated during JSON * deserialization. Should only be used by Jackson for this very purpose. @@ -85,7 +67,7 @@ public JacksonItemDocument() { @Override public String getJsonType() { - return JacksonTermedDocument.JSON_TYPE_ITEM; + return JacksonTermedStatementDocument.JSON_TYPE_ITEM; } @JsonIgnore @@ -104,19 +86,6 @@ public EntityIdValue getEntityId() { return getItemId(); } - @JsonIgnore - @Override - public List getStatementGroups() { - if (this.statementGroups == null) { - this.statementGroups = new ArrayList<>(this.claims.size()); - for (List statements : this.claims.values()) { - this.statementGroups - .add(new StatementGroupFromJson(statements)); - } - } - return this.statementGroups; - } - /** * Sets the site links to the given value. Only for use by Jackson during * deserialization. @@ -135,60 +104,6 @@ public Map getSiteLinks() { return Collections. unmodifiableMap(this.sitelinks); } - /** - * Sets the "claims" to the given value. Only for use by Jackson during - * deserialization. - *

- * The name refers to the JSON model, where claims are similar to statement - * groups. This should not be confused with claims as used in the WDTK data - * model. This will probably only be used by the Jacksons' ObjectMapper. - * - * @param claims - */ - @JsonProperty("claims") - public void setJsonClaims(Map> claims) { - this.claims = claims; - this.statementGroups = null; // clear cache - updateClaims(); - } - - /** - * Sets the subject of each of the current statements ("claims" in JSON) to - * the current entity id. This is required since the JSON serialization of - * statements does not contain a subject id, but subject ids are part of the - * statement data in WDTK. The update is needed whenever the statements have - * changed. - */ - private void updateClaims() { - this.statementGroups = null; // clear cache - - for (Entry> entry : this.claims - .entrySet()) { - for (JacksonStatement statement : entry.getValue()) { - statement.setParentDocument(this); - } - } - } - - /** - * Returns the "claims". Only used by Jackson. - *

- * JSON "claims" correspond to statement groups in the WDTK model. You - * should use {@link JacksonItemDocument#getStatementGroups()} to obtain - * this data. - * - * @return map of statement groups - */ - @JsonProperty("claims") - public Map> getJsonClaims() { - return this.claims; - } - - @Override - public Iterator getAllStatements() { - return new NestedIterator<>(this.getStatementGroups()); - } - @Override public int hashCode() { return Hash.hashCode(this); diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonMonolingualTextValue.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonMonolingualTextValue.java index 7d0f1aa9d..c57e1bac2 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonMonolingualTextValue.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonMonolingualTextValue.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonObjectFactory.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonObjectFactory.java index 4b6aeb65f..1ea8213e6 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonObjectFactory.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonObjectFactory.java @@ -27,8 +27,8 @@ import java.util.List; import java.util.Map; -import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter; import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter; import org.wikidata.wdtk.datamodel.interfaces.Claim; import org.wikidata.wdtk.datamodel.interfaces.DataObjectFactory; import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue; @@ -62,6 +62,7 @@ import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueGlobeCoordinates; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueItemId; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueMonolingualText; +import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValuePropertyId; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueQuantity; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueString; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueTime; @@ -82,7 +83,8 @@ public class JacksonObjectFactory implements DataObjectFactory { public ItemIdValue getItemIdValue(String id, String siteIri) { if (id.length() > 0 && id.charAt(0) == 'Q') { Integer numericId = Integer.valueOf(id.substring(1)); - JacksonInnerEntityId innerEntity = new JacksonInnerEntityId( + JacksonInnerEntityId innerEntity; + innerEntity = new JacksonInnerEntityId( JacksonInnerEntityId.JSON_ENTITY_TYPE_ITEM, numericId); JacksonValueItemId result = new JacksonValueItemId(); @@ -96,8 +98,19 @@ public ItemIdValue getItemIdValue(String id, String siteIri) { @Override public PropertyIdValue getPropertyIdValue(String id, String siteIri) { - // Jackson has no dedicated property id values: - return Datamodel.makePropertyIdValue(id, siteIri); + if (id.length() > 0 && id.charAt(0) == 'P') { + Integer numericId = Integer.valueOf(id.substring(1)); + JacksonInnerEntityId innerEntity; + innerEntity = new JacksonInnerEntityId( + JacksonInnerEntityId.JSON_ENTITY_TYPE_PROPERTY, numericId); + + JacksonValuePropertyId result = new JacksonValuePropertyId(); + result.setValue(innerEntity); + result.setParentDocument(getParentItemDocument("Qunknown", siteIri)); + return result; + } else { + throw new IllegalArgumentException("Illegal property id: " + id); + } } @Override @@ -121,9 +134,10 @@ public TimeValue getTimeValue(long year, byte month, byte day, byte hour, public GlobeCoordinatesValue getGlobeCoordinatesValue(long latitude, long longitude, long precision, String globeIri) { JacksonInnerGlobeCoordinates innerCoordinates = new JacksonInnerGlobeCoordinates( - ((double)latitude / GlobeCoordinatesValue.PREC_DEGREE), - ((double)longitude / GlobeCoordinatesValue.PREC_DEGREE), - ((double)precision / GlobeCoordinatesValue.PREC_DEGREE), globeIri); + ((double) latitude / GlobeCoordinatesValue.PREC_DEGREE), + ((double) longitude / GlobeCoordinatesValue.PREC_DEGREE), + ((double) precision / GlobeCoordinatesValue.PREC_DEGREE), + globeIri); JacksonValueGlobeCoordinates result = new JacksonValueGlobeCoordinates(); result.setValue(innerCoordinates); return result; @@ -237,8 +251,8 @@ public Statement getStatement(Claim claim, if (claim.getMainSnak() instanceof JacksonSnak) { result.setMainsnak((JacksonSnak) claim.getMainSnak()); } else { - result.setMainsnak((JacksonSnak) dataModelConverter - .copySnak(claim.getMainSnak())); + result.setMainsnak((JacksonSnak) dataModelConverter.copySnak(claim + .getMainSnak())); } Map> qualifiers = new HashMap<>(); @@ -312,9 +326,19 @@ public PropertyDocument getPropertyDocument(PropertyIdValue propertyId, List labels, List descriptions, List aliases, DatatypeIdValue datatypeId) { + return getPropertyDocument(propertyId, labels, descriptions, aliases, + Collections. emptyList(), datatypeId); + } + + @Override + public PropertyDocument getPropertyDocument(PropertyIdValue propertyId, + List labels, + List descriptions, + List aliases, + List statementGroups, DatatypeIdValue datatypeId) { JacksonPropertyDocument result = new JacksonPropertyDocument(); - initializeTermedDocument(result, propertyId, labels, descriptions, - aliases); + initializeTermedStatementDocument(result, propertyId, labels, + descriptions, aliases, statementGroups); switch (datatypeId.getIri()) { case DatatypeIdValue.DT_ITEM: @@ -354,27 +378,8 @@ public ItemDocument getItemDocument(ItemIdValue itemIdValue, List statementGroups, Map siteLinks) { JacksonItemDocument result = new JacksonItemDocument(); - initializeTermedDocument(result, itemIdValue, labels, descriptions, - aliases); - - Map> jacksonStatements = new HashMap<>(); - for (StatementGroup sg : statementGroups) { - String propertyId = sg.getProperty().getId(); - List propertyStatements = new ArrayList<>(sg - .getStatements().size()); - jacksonStatements.put(propertyId, propertyStatements); - - for (Statement s : sg) { - if (s instanceof JacksonStatement) { - propertyStatements.add((JacksonStatement) s); - } else { - propertyStatements - .add((JacksonStatement) this.dataModelConverter - .copy(s)); - } - } - } - result.setJsonClaims(jacksonStatements); + initializeTermedStatementDocument(result, itemIdValue, labels, + descriptions, aliases, statementGroups); Map jacksonSiteLinks = new HashMap<>( siteLinks.size()); @@ -393,10 +398,12 @@ public ItemDocument getItemDocument(ItemIdValue itemIdValue, return result; } - private void initializeTermedDocument(JacksonTermedDocument document, + private void initializeTermedStatementDocument( + JacksonTermedStatementDocument document, EntityIdValue entityIdValue, List labels, List descriptions, - List aliases) { + List aliases, + List statementGroups) { document.setJsonId(entityIdValue.getId()); document.setSiteIri(entityIdValue.getSiteIri()); @@ -415,6 +422,25 @@ private void initializeTermedDocument(JacksonTermedDocument document, document.setLabels(buildTermMapFromTermList(labels)); document.setDescriptions(buildTermMapFromTermList(descriptions)); + + Map> jacksonStatements = new HashMap<>(); + for (StatementGroup sg : statementGroups) { + String propertyId = sg.getProperty().getId(); + List propertyStatements = new ArrayList<>(sg + .getStatements().size()); + jacksonStatements.put(propertyId, propertyStatements); + + for (Statement s : sg) { + if (s instanceof JacksonStatement) { + propertyStatements.add((JacksonStatement) s); + } else { + propertyStatements + .add((JacksonStatement) this.dataModelConverter + .copy(s)); + } + } + } + document.setJsonClaims(jacksonStatements); } private Map buildTermMapFromTermList( diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonPropertyDocument.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonPropertyDocument.java index c5b47d75f..9a31c99ef 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonPropertyDocument.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonPropertyDocument.java @@ -43,7 +43,7 @@ * */ @JsonIgnoreProperties(ignoreUnknown = true) -public class JacksonPropertyDocument extends JacksonTermedDocument implements +public class JacksonPropertyDocument extends JacksonTermedStatementDocument implements PropertyDocument { /** @@ -108,7 +108,7 @@ public DatatypeIdValue getDatatype() { @Override public String getJsonType() { - return JacksonTermedDocument.JSON_TYPE_PROPERTY; + return JacksonTermedStatementDocument.JSON_TYPE_PROPERTY; } @Override diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonSnak.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonSnak.java index c5c2b0e3b..f268a2bd9 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonSnak.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonSnak.java @@ -75,10 +75,10 @@ public abstract class JacksonSnak implements Snak { * snaks, but is needed in WDTK to build {@link PropertyIdValue} objects * etc. Thus, it is necessary to set this information after each * deserialization using - * {@link JacksonSnak#setParentDocument(JacksonItemDocument)}. + * {@link JacksonSnak#setParentDocument(JacksonTermedStatementDocument)}. */ @JsonIgnore - JacksonTermedDocument parentDocument; + JacksonTermedStatementDocument parentDocument; /** * Constructor. Creates an empty object that can be populated during JSON @@ -153,7 +153,7 @@ public void setSnakType(String snacktype) { * new value */ @JsonIgnore - void setParentDocument(JacksonTermedDocument parentDocument) { + void setParentDocument(JacksonTermedStatementDocument parentDocument) { this.parentDocument = parentDocument; } diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonStatement.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonStatement.java index 005aef061..9052e82dd 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonStatement.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonStatement.java @@ -30,6 +30,7 @@ import org.wikidata.wdtk.datamodel.helpers.Hash; import org.wikidata.wdtk.datamodel.helpers.ToString; import org.wikidata.wdtk.datamodel.interfaces.Claim; +import org.wikidata.wdtk.datamodel.interfaces.EntityDocument; import org.wikidata.wdtk.datamodel.interfaces.Reference; import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.StatementRank; @@ -70,10 +71,10 @@ public class JacksonStatement implements Statement { * serialization of statements, but is needed in WDTK as part of * {@link Claim}. Thus, it is necessary to set this information after each * deserialization using - * {@link JacksonStatement#setParentDocument(JacksonItemDocument)}. + * {@link JacksonStatement#setParentDocument(JacksonTermedStatementDocument)}. */ @JsonIgnore - JacksonItemDocument parentDocument; + JacksonTermedStatementDocument parentDocument; /** * Rank of this statement. @@ -139,7 +140,7 @@ public String getJsonType() { * @return the parent document of this statement */ @JsonIgnore - JacksonItemDocument getParentDocument() { + EntityDocument getParentDocument() { return this.parentDocument; } @@ -153,7 +154,7 @@ JacksonItemDocument getParentDocument() { * new value */ @JsonIgnore - void setParentDocument(JacksonItemDocument parentDocument) { + void setParentDocument(JacksonTermedStatementDocument parentDocument) { this.parentDocument = parentDocument; this.mainsnak.setParentDocument(parentDocument); diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonTermedDocument.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonTermedStatementDocument.java similarity index 66% rename from wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonTermedDocument.java rename to wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonTermedStatementDocument.java index 7267ec613..c2d5f7705 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonTermedDocument.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonTermedStatementDocument.java @@ -20,15 +20,21 @@ * #L% */ +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; +import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.StatementDocument; +import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; import org.wikidata.wdtk.datamodel.interfaces.TermedDocument; +import org.wikidata.wdtk.util.NestedIterator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; @@ -48,9 +54,10 @@ */ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type") @JsonSubTypes({ - @Type(value = JacksonItemDocument.class, name = JacksonTermedDocument.JSON_TYPE_ITEM), - @Type(value = JacksonPropertyDocument.class, name = JacksonTermedDocument.JSON_TYPE_PROPERTY) }) -public abstract class JacksonTermedDocument implements TermedDocument { + @Type(value = JacksonItemDocument.class, name = JacksonTermedStatementDocument.JSON_TYPE_ITEM), + @Type(value = JacksonPropertyDocument.class, name = JacksonTermedStatementDocument.JSON_TYPE_PROPERTY) }) +public abstract class JacksonTermedStatementDocument implements TermedDocument, + StatementDocument { /** * String used to refer to items in JSON. @@ -66,6 +73,18 @@ public abstract class JacksonTermedDocument implements TermedDocument { protected Map labels = new HashMap<>(); protected Map descriptions = new HashMap<>(); + /** + * This is what is called claim in the JSON model. It corresponds to + * the statement group in the WDTK model. + */ + private Map> claims = new HashMap<>(); + + /** + * Statement groups. This member is initialized when statements are + * accessed. + */ + private List statementGroups = null; + /** * The id of the entity that the document refers to. This is not mapped to * JSON directly by Jackson but split into two fields, "type" and "id". The @@ -91,7 +110,7 @@ public abstract class JacksonTermedDocument implements TermedDocument { * Constructor. Creates an empty object that can be populated during JSON * deserialization. Should only be used by Jackson for this very purpose. */ - public JacksonTermedDocument() { + public JacksonTermedStatementDocument() { } /** @@ -199,12 +218,79 @@ public String getSiteIri() { return this.siteIri; } + @JsonIgnore + @Override + public List getStatementGroups() { + if (this.statementGroups == null) { + this.statementGroups = new ArrayList<>(this.claims.size()); + for (List statements : this.claims.values()) { + this.statementGroups + .add(new StatementGroupFromJson(statements)); + } + } + return this.statementGroups; + } + + /** + * Sets the "claims" to the given value. Only for use by Jackson during + * deserialization. + *

+ * The name refers to the JSON model, where claims are similar to statement + * groups. This should not be confused with claims as used in the WDTK data + * model. This will probably only be used by the Jacksons' ObjectMapper. + * + * @param claims + */ + @JsonProperty("claims") + public void setJsonClaims(Map> claims) { + this.claims = claims; + this.statementGroups = null; // clear cache + updateClaims(); + } + + /** + * Sets the subject of each of the current statements ("claims" in JSON) to + * the current entity id. This is required since the JSON serialization of + * statements does not contain a subject id, but subject ids are part of the + * statement data in WDTK. The update is needed whenever the statements have + * changed. + */ + private void updateClaims() { + this.statementGroups = null; // clear cache + + for (Entry> entry : this.claims + .entrySet()) { + for (JacksonStatement statement : entry.getValue()) { + statement.setParentDocument(this); + } + } + } + + /** + * Returns the "claims". Only used by Jackson. + *

+ * JSON "claims" correspond to statement groups in the WDTK model. You + * should use {@link JacksonItemDocument#getStatementGroups()} to obtain + * this data. + * + * @return map of statement groups + */ + @JsonProperty("claims") + public Map> getJsonClaims() { + return this.claims; + } + + @Override + public Iterator getAllStatements() { + return new NestedIterator<>(this.getStatementGroups()); + } + /** * Returns the JSON type string of the entity that this document refers to. * Only used by Jackson. * - * @return either {@link JacksonTermedDocument#JSON_TYPE_ITEM} or - * {@link JacksonTermedDocument#JSON_TYPE_PROPERTY} + * @return either {@link JacksonTermedStatementDocument#JSON_TYPE_ITEM} or + * {@link JacksonTermedStatementDocument#JSON_TYPE_PROPERTY} */ @JsonProperty("type") public abstract String getJsonType(); diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonValueSnak.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonValueSnak.java index c61ec9bbf..0cba7d1ab 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonValueSnak.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/JacksonValueSnak.java @@ -27,7 +27,7 @@ import org.wikidata.wdtk.datamodel.interfaces.Value; import org.wikidata.wdtk.datamodel.interfaces.ValueSnak; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValue; -import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueItemId; +import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueEntityId; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -108,10 +108,10 @@ public JacksonValue getDatavalue() { } @Override - void setParentDocument(JacksonTermedDocument parentDocument) { + void setParentDocument(JacksonTermedStatementDocument parentDocument) { super.setParentDocument(parentDocument); - if (this.datavalue instanceof JacksonValueItemId) { - ((JacksonValueItemId) this.datavalue) + if (this.datavalue instanceof JacksonValueEntityId) { + ((JacksonValueEntityId) this.datavalue) .setParentDocument(parentDocument); } } diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerEntityId.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerEntityId.java index 732141715..6cc426285 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerEntityId.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerEntityId.java @@ -32,15 +32,19 @@ * */ public class JacksonInnerEntityId { - // TODO replace IllegalArgumentException with a checked one - // NOTE make sure to adapt all methods, once more types than only - // "item" are supported + // TODO maybe replace IllegalArgumentException with a checked one; maybe do + // the check when the type is set /** * The string used in JSON to denote the type of entity id values that are * items. */ public final static String JSON_ENTITY_TYPE_ITEM = "item"; + /** + * The string used in JSON to denote the type of entity id values that are + * properties. + */ + public final static String JSON_ENTITY_TYPE_PROPERTY = "property"; @JsonProperty("entity-type") private String entityType; @@ -56,19 +60,14 @@ public JacksonInnerEntityId() { } /** - * Constructor. The only known entity type so far is "item". In the future - * "property" might also be available. + * Constructor. Supported entity types so far are "item" and "property". * * @param entityType * (case-sensitive) * @param numericId - * @throws IllegalArgumentException - * if the entity type was unrecognized */ - public JacksonInnerEntityId(String entityType, int numericId) - throws IllegalArgumentException { - - setEntityType(entityType); + public JacksonInnerEntityId(String entityType, int numericId) { + setJsonEntityType(entityType); this.numericId = numericId; } @@ -79,7 +78,7 @@ public JacksonInnerEntityId(String entityType, int numericId) * @return the entity type string */ @JsonProperty("entity-type") - public String getEntityType() { + public String getJsonEntityType() { return entityType; } @@ -91,14 +90,7 @@ public String getEntityType() { * new value */ @JsonProperty("entity-type") - public void setEntityType(String entityType) - throws IllegalArgumentException { - - if (!JSON_ENTITY_TYPE_ITEM.equals(entityType)) { - throw new IllegalArgumentException("Entities of type " + entityType - + " are not supported in property values."); - } - + public void setJsonEntityType(String entityType) { this.entityType = entityType; } @@ -114,7 +106,7 @@ public int getNumericId() { } /** - * Sets thenumeric item id to the given value. Only for use by Jackson + * Sets the numeric item id to the given value. Only for use by Jackson * during deserialization. * * @param numericId @@ -131,10 +123,21 @@ public void setNumericId(int numericId) { * normally identified as "Q42". * * @return the string id + * @throws IllegalArgumentException + * if the entity type of this value is unknown and can thus not + * be mapped to a string id */ @JsonIgnore - public String getStringId() { - return "Q" + this.numericId; + public String getStringId() throws IllegalArgumentException { + switch (entityType) { + case JSON_ENTITY_TYPE_ITEM: + return "Q" + this.numericId; + case JSON_ENTITY_TYPE_PROPERTY: + return "P" + this.numericId; + default: + throw new IllegalArgumentException("Entities of type \"" + + entityType + "\" are not supported in property values."); + } } @Override diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerGlobeCoordinates.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerGlobeCoordinates.java index a5acfe23b..e87df7a77 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerGlobeCoordinates.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerGlobeCoordinates.java @@ -13,9 +13,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerMonolingualText.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerMonolingualText.java index 475efbcfd..359ab75f3 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerMonolingualText.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerMonolingualText.java @@ -11,9 +11,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerQuantity.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerQuantity.java index 3118e6651..3fbe0d0b6 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerQuantity.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonInnerQuantity.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValue.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValue.java index 70d6d5fc4..101b1dc84 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValue.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValue.java @@ -23,9 +23,7 @@ import org.wikidata.wdtk.datamodel.interfaces.Value; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonSubTypes.Type; -import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; /** * Abstract Jackson implementation of {@link Value}. @@ -33,14 +31,7 @@ * @author Fredo Erxleben * */ -@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type") -@JsonSubTypes({ - @Type(value = JacksonValueString.class, name = "string"), - @Type(value = JacksonValueTime.class, name = "time"), - @Type(value = JacksonValueItemId.class, name = "wikibase-entityid"), - @Type(value = JacksonValueGlobeCoordinates.class, name = "globecoordinate"), - @Type(value = JacksonValueQuantity.class, name = "quantity"), - @Type(value = JacksonValueMonolingualText.class, name = "monolingualtext") }) +@JsonDeserialize(using = JacksonValueDeserializer.class) @JsonIgnoreProperties(ignoreUnknown = true) public abstract class JacksonValue implements Value { diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueDeserializer.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueDeserializer.java new file mode 100644 index 000000000..2f7edf7f6 --- /dev/null +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueDeserializer.java @@ -0,0 +1,114 @@ +package org.wikidata.wdtk.datamodel.json.jackson.datavalues; + +/* + * #%L + * Wikidata Toolkit Data Model + * %% + * Copyright (C) 2014 Wikidata Toolkit Developers + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.IOException; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; + +/** + * Custom Jackson deserializer that maps the JSON representation of Wikibase + * values to WDTK classes. In most cases, the class to use is defined by the + * value of the "type" field, but for entities one has to look deeper into the + * structure to get the "entity-type" field as well. This is not possible using + * simpler mechanisms. + * + * @author Markus Kroetzsch + * + */ +public class JacksonValueDeserializer extends StdDeserializer { + + /** + * + */ + private static final long serialVersionUID = -2851517075035995962L; + + /** + * Constructor. + */ + public JacksonValueDeserializer() { + super(JacksonValue.class); + } + + @Override + public JacksonValue deserialize(JsonParser jsonParser, + DeserializationContext ctxt) throws IOException, + JsonProcessingException { + + ObjectMapper mapper = (ObjectMapper) jsonParser.getCodec(); + JsonNode root = mapper.readTree(jsonParser); + Class valueClass = getValueClass(root); + + return mapper.treeToValue(root, valueClass); + } + + /** + * Finds the Java class to use for deserializing the JSON structure + * represented by the given node. + * + * @param jsonNode + * the JSON node that represents the value to deserialize + * @return the Java class to use for deserialization + * @throws JsonMappingException + * if we do not have a class for the given JSON + */ + private Class getValueClass(JsonNode jsonNode) + throws JsonMappingException { + String jsonType = jsonNode.get("type").asText(); + + switch (jsonType) { + case JacksonValue.JSON_VALUE_TYPE_ENTITY_ID: + JsonNode valueNode = jsonNode.get("value"); + if (valueNode != null) { + String entityType = valueNode.get("entity-type").asText(); + switch (entityType) { + case JacksonInnerEntityId.JSON_ENTITY_TYPE_ITEM: + return JacksonValueItemId.class; + case JacksonInnerEntityId.JSON_ENTITY_TYPE_PROPERTY: + return JacksonValuePropertyId.class; + default: + throw new JsonMappingException("Entities of type \"" + + entityType + + "\" are not supported as property values yet."); + } + } + case JacksonValue.JSON_VALUE_TYPE_STRING: + return JacksonValueString.class; + case JacksonValue.JSON_VALUE_TYPE_TIME: + return JacksonValueTime.class; + case JacksonValue.JSON_VALUE_TYPE_GLOBE_COORDINATES: + return JacksonValueGlobeCoordinates.class; + case JacksonValue.JSON_VALUE_TYPE_QUANTITY: + return JacksonValueQuantity.class; + case JacksonValue.JSON_VALUE_TYPE_MONOLINGUAL_TEXT: + return JacksonValueMonolingualText.class; + default: + throw new JsonMappingException("Property values of type \"" + + jsonType + "\" are not supported yet."); + } + } +} diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueEntityId.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueEntityId.java new file mode 100644 index 000000000..a4df0112e --- /dev/null +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueEntityId.java @@ -0,0 +1,121 @@ +package org.wikidata.wdtk.datamodel.json.jackson.datavalues; + +/* + * #%L + * Wikidata Toolkit Data Model + * %% + * Copyright (C) 2014 Wikidata Toolkit Developers + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.json.jackson.JacksonTermedStatementDocument; + +import com.fasterxml.jackson.annotation.JsonIgnore; + +/** + * Abstract base implementation of {@link EntityIdValue} for Jackson. + * + * @author Markus Kroetzsch + * + */ +public abstract class JacksonValueEntityId extends JacksonValue implements + EntityIdValue { + + /** + * The parent document that this value is part of. This is needed since the + * site that this value refers to is not part of the JSON serialization of + * value, but is needed in WDTK to build all current types of + * {@link EntityIdValue} objects. Thus, it is necessary to set this + * information after each deserialization using + * {@link JacksonValueEntityId#setParentDocument(JacksonTermedStatementDocument)} + * . + */ + @JsonIgnore + JacksonTermedStatementDocument parentDocument; + + /** + * Inner helper object to store the actual data. Used to get the nested JSON + * structure that is required here. + */ + protected JacksonInnerEntityId value; + + /** + * Constructor. Creates an empty object that can be populated during JSON + * deserialization. Should only be used by Jackson for this very purpose. + */ + public JacksonValueEntityId() { + super(JSON_VALUE_TYPE_ENTITY_ID); + } + + /** + * Returns the inner value helper object. Only for use by Jackson during + * serialization. + * + * @return the inner entity id value + */ + public JacksonInnerEntityId getValue() { + return value; + } + + /** + * Sets the inner value helper object to the given value. Only for use by + * Jackson during deserialization. + * + * @param value + * new value + */ + public void setValue(JacksonInnerEntityId value) { + this.value = value; + } + + @JsonIgnore + @Override + public String getIri() { + return this.getSiteIri().concat(this.getId()); + } + + @JsonIgnore + @Override + public String getId() { + return this.value.getStringId(); + } + + @JsonIgnore + @Override + public String getSiteIri() { + if (this.parentDocument != null + && this.parentDocument.getSiteIri() != null) { + return this.parentDocument.getSiteIri(); + } else { + throw new RuntimeException( + "Cannot access the site IRI id of an insufficiently initialised Jackson value."); + } + } + + /** + * Sets the parent document of this value to the given value. This document + * provides the value with information about its site IRI, which is not part + * of the JSON serialization of values. This method should only be used + * during deserialization. + * + * @param parentDocument + * new value + */ + @JsonIgnore + public void setParentDocument(JacksonTermedStatementDocument parentDocument) { + this.parentDocument = parentDocument; + } +} diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueGlobeCoordinates.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueGlobeCoordinates.java index 1d8fb1a85..440a855c0 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueGlobeCoordinates.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueGlobeCoordinates.java @@ -28,6 +28,8 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.JsonDeserializer.None; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; /** * Jackson implementation of {@link GlobeCoordinatesValue}. @@ -36,6 +38,7 @@ * */ @JsonIgnoreProperties(ignoreUnknown = true) +@JsonDeserialize(using = None.class) public class JacksonValueGlobeCoordinates extends JacksonValue implements GlobeCoordinatesValue { @@ -77,19 +80,19 @@ public void setValue(JacksonInnerGlobeCoordinates value) { @JsonIgnore @Override public long getLatitude() { - return (long)(this.value.getLatitude() * GlobeCoordinatesValue.PREC_DEGREE); + return (long) (this.value.getLatitude() * GlobeCoordinatesValue.PREC_DEGREE); } @JsonIgnore @Override public long getLongitude() { - return (long)(this.value.getLongitude() * GlobeCoordinatesValue.PREC_DEGREE); + return (long) (this.value.getLongitude() * GlobeCoordinatesValue.PREC_DEGREE); } @JsonIgnore @Override public long getPrecision() { - return (long)(this.value.getPrecision() * GlobeCoordinatesValue.PREC_DEGREE); + return (long) (this.value.getPrecision() * GlobeCoordinatesValue.PREC_DEGREE); } @JsonIgnore diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueItemId.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueItemId.java index e25fb8636..4e49074a4 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueItemId.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueItemId.java @@ -6,11 +6,11 @@ import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor; -import org.wikidata.wdtk.datamodel.json.jackson.JacksonItemDocument; -import org.wikidata.wdtk.datamodel.json.jackson.JacksonTermedDocument; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.JsonDeserializer.None; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; /* * #%L @@ -33,48 +33,15 @@ */ /** - * Jackson implementation of {@link ItemIdValue}. So far this is the only kind - * of {@link EntityIdValue} that can occur as a value of properties. + * Jackson implementation of {@link ItemIdValue}. * * @author Fredo Erxleben * */ @JsonIgnoreProperties(ignoreUnknown = true) -public class JacksonValueItemId extends JacksonValue implements ItemIdValue { - - /** - * The parent document that this value is part of. This is needed since the - * site that this value refers to is not part of the JSON serialization of - * value, but is needed in WDTK to build {@link ItemIdValue} objects. Thus, - * it is necessary to set this information after each deserialization using - * {@link JacksonValueItemId#setParentDocument(JacksonItemDocument)}. - */ - @JsonIgnore - JacksonTermedDocument parentDocument; - - /** - * Inner helper object to store the actual data. Used to get the nested JSON - * structure that is required here. - */ - private JacksonInnerEntityId value; - - /** - * Constructor. Creates an empty object that can be populated during JSON - * deserialization. Should only be used by Jackson for this very purpose. - */ - public JacksonValueItemId() { - super(JSON_VALUE_TYPE_ENTITY_ID); - } - - /** - * Returns the inner value helper object. Only for use by Jackson during - * serialization. - * - * @return the inner entity id value - */ - public JacksonInnerEntityId getValue() { - return value; - } +@JsonDeserialize(using = None.class) +public class JacksonValueItemId extends JacksonValueEntityId implements + ItemIdValue { /** * Sets the inner value helper object to the given value. Only for use by @@ -83,32 +50,14 @@ public JacksonInnerEntityId getValue() { * @param value * new value */ - public void setValue(JacksonInnerEntityId value) { - this.value = value; - } - - @JsonIgnore - @Override - public String getIri() { - return this.getSiteIri().concat(this.getId()); - } - - @JsonIgnore - @Override - public String getId() { - return value.getStringId(); - } - - @JsonIgnore @Override - public String getSiteIri() { - if (this.parentDocument != null - && this.parentDocument.getSiteIri() != null) { - return this.parentDocument.getSiteIri(); - } else { - throw new RuntimeException( - "Cannot access the site IRI id of an insufficiently initialised Jackson value."); + public void setValue(JacksonInnerEntityId value) { + if (!JacksonInnerEntityId.JSON_ENTITY_TYPE_ITEM.equals(value + .getJsonEntityType())) { + throw new RuntimeException("Unexpected inner value type: " + + value.getJsonEntityType()); } + this.value = value; } @JsonIgnore @@ -117,20 +66,6 @@ public String getEntityType() { return EntityIdValue.ET_ITEM; } - /** - * Sets the parent document of this value to the given value. This document - * provides the value with information about its site IRI, which is not part - * of the JSON serialization of values. This method should only be used - * during deserialization. - * - * @param parentDocument - * new value - */ - @JsonIgnore - public void setParentDocument(JacksonTermedDocument parentDocument) { - this.parentDocument = parentDocument; - } - @Override public T accept(ValueVisitor valueVisitor) { return valueVisitor.visit(this); diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueMonolingualText.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueMonolingualText.java index 20c8c670a..dce46dafe 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueMonolingualText.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueMonolingualText.java @@ -28,6 +28,8 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.JsonDeserializer.None; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; /** * Jackson implementation of {@link MonolingualTextValue}. Java attributes are @@ -42,6 +44,7 @@ * */ @JsonIgnoreProperties(ignoreUnknown = true) +@JsonDeserialize(using = None.class) public class JacksonValueMonolingualText extends JacksonValue implements MonolingualTextValue { diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValuePropertyId.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValuePropertyId.java new file mode 100644 index 000000000..fe32e6ec7 --- /dev/null +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValuePropertyId.java @@ -0,0 +1,88 @@ +package org.wikidata.wdtk.datamodel.json.jackson.datavalues; + +/* + * #%L + * Wikidata Toolkit Data Model + * %% + * Copyright (C) 2014 Wikidata Toolkit Developers + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.wikidata.wdtk.datamodel.helpers.Equality; +import org.wikidata.wdtk.datamodel.helpers.Hash; +import org.wikidata.wdtk.datamodel.helpers.ToString; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.JsonDeserializer.None; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; + +/** + * Jackson implementation of {@link PropertyIdValue}. + * + * @author Markus Kroetzsch + * + */ +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonDeserialize(using = None.class) +public class JacksonValuePropertyId extends JacksonValueEntityId implements + PropertyIdValue { + + /** + * Sets the inner value helper object to the given value. Only for use by + * Jackson during deserialization. + * + * @param value + * new value + */ + @Override + public void setValue(JacksonInnerEntityId value) { + if (!JacksonInnerEntityId.JSON_ENTITY_TYPE_PROPERTY.equals(value + .getJsonEntityType())) { + throw new RuntimeException("Unexpected inner value type: " + + value.getJsonEntityType()); + } + this.value = value; + } + + @JsonIgnore + @Override + public String getEntityType() { + return EntityIdValue.ET_PROPERTY; + } + + @Override + public T accept(ValueVisitor valueVisitor) { + return valueVisitor.visit(this); + } + + @Override + public int hashCode() { + return Hash.hashCode(this); + } + + @Override + public boolean equals(Object obj) { + return Equality.equalsEntityIdValue(this, obj); + } + + @Override + public String toString() { + return ToString.toString(this); + } +} diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueQuantity.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueQuantity.java index 9f1ae2ace..34bb6da64 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueQuantity.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueQuantity.java @@ -30,6 +30,8 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.JsonDeserializer.None; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; /** * Jackson implementation of {@link QuantityValue}. @@ -38,6 +40,7 @@ * */ @JsonIgnoreProperties(ignoreUnknown = true) +@JsonDeserialize(using = None.class) public class JacksonValueQuantity extends JacksonValue implements QuantityValue { /** diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueString.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueString.java index 3a02b3df3..3d3e95169 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueString.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueString.java @@ -8,6 +8,8 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.JsonDeserializer.None; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; /* * #%L @@ -36,6 +38,7 @@ * */ @JsonIgnoreProperties(ignoreUnknown = true) +@JsonDeserialize(using = None.class) public class JacksonValueString extends JacksonValue implements StringValue { private String value; diff --git a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueTime.java b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueTime.java index cf0c5b091..9b031310c 100644 --- a/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueTime.java +++ b/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/json/jackson/datavalues/JacksonValueTime.java @@ -28,6 +28,8 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.JsonDeserializer.None; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; /** * Jackson implementation of {@link TimeValue}. @@ -36,6 +38,7 @@ * */ @JsonIgnoreProperties(ignoreUnknown = true) +@JsonDeserialize(using = None.class) public class JacksonValueTime extends JacksonValue implements TimeValue { /** diff --git a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/DumpProcessingTest.java b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/DumpProcessingTest.java index c087d6654..ce580ecaa 100644 --- a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/DumpProcessingTest.java +++ b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/DumpProcessingTest.java @@ -25,7 +25,7 @@ import javax.swing.JFileChooser; -import org.wikidata.wdtk.datamodel.json.jackson.JacksonTermedDocument; +import org.wikidata.wdtk.datamodel.json.jackson.JacksonTermedStatementDocument; import org.wikidata.wdtk.datamodel.json.jackson.JacksonItemDocument; import org.wikidata.wdtk.datamodel.json.jackson.JacksonPropertyDocument; @@ -47,7 +47,7 @@ public class DumpProcessingTest { public static void main(String[] args){ ObjectMapper mapper = new ObjectMapper(); - ObjectReader reader = mapper.reader(JacksonTermedDocument.class); + ObjectReader reader = mapper.reader(JacksonTermedStatementDocument.class); //ObjectReader propReader = mapper.reader(PropertyDocumentImpl.class); File dumpFile; @@ -67,10 +67,10 @@ public static void main(String[] args){ int props = 0; int lastReport = 0; try { - MappingIterator documentIter = reader.readValues(dumpFile); + MappingIterator documentIter = reader.readValues(dumpFile); while(documentIter.hasNextValue()){ - JacksonTermedDocument document = documentIter.nextValue(); + JacksonTermedStatementDocument document = documentIter.nextValue(); if(document != null){ // TODO do more useful and thorough check here processed++; if(document instanceof JacksonItemDocument){ diff --git a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/DataObjectFactoryImplTest.java b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/DataObjectFactoryImplTest.java index 00438136a..3a388c9a3 100644 --- a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/DataObjectFactoryImplTest.java +++ b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/DataObjectFactoryImplTest.java @@ -34,6 +34,7 @@ import org.wikidata.wdtk.datamodel.interfaces.Claim; import org.wikidata.wdtk.datamodel.interfaces.DataObjectFactory; import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue; import org.wikidata.wdtk.datamodel.interfaces.ItemDocument; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; @@ -113,6 +114,18 @@ public static PropertyIdValue getTestPropertyIdValue(int seed) { return new PropertyIdValueImpl("P4" + seed, "foo:"); } + public static EntityIdValue getTestEntityIdValue(int seed, String entityType) { + switch (entityType) { + case EntityIdValue.ET_ITEM: + return getTestItemIdValue(seed); + case EntityIdValue.ET_PROPERTY: + return getTestPropertyIdValue(seed); + default: + throw new IllegalArgumentException("Unsupported entity type " + + entityType); + } + } + @Test public final void testGetDatatypeId() { DatatypeIdValue o1 = new DatatypeIdImpl(DatatypeIdValue.DT_TIME); @@ -281,17 +294,18 @@ public static List getTestValueSnakGroups(int seed, int size) { @Test public final void testGetClaim() { - Claim o1 = getTestClaim(0, 0, 2); + Claim o1 = getTestClaim(0, 0, 2, EntityIdValue.ET_ITEM); Claim o2 = converter.copy(o1); assertEquals(o1.toString(), o2.toString()); assertEquals(o1.hashCode(), o2.hashCode()); assertEquals(o2, o1); } - public static Claim getTestClaim(int subjectSeed, int seed, int size) { - return new ClaimImpl(getTestItemIdValue(subjectSeed), getTestValueSnak( - ValueType.fromInt(seed), seed, seed), getTestValueSnakGroups( - seed * 100, size)); + public static Claim getTestClaim(int subjectSeed, int seed, int size, + String entityType) { + return new ClaimImpl(getTestEntityIdValue(subjectSeed, entityType), + getTestValueSnak(ValueType.fromInt(seed), seed, seed), + getTestValueSnakGroups(seed * 100, size)); } @Test @@ -314,22 +328,24 @@ public static List getReferenceList(int seed, int size) { @Test public final void testGetStatement() { - Statement o1 = getTestStatement(0, 42, 3); + Statement o1 = getTestStatement(0, 42, 3, EntityIdValue.ET_ITEM); Statement o2 = converter.copy(o1); assertEquals(o1.toString(), o2.toString()); assertEquals(o1.hashCode(), o2.hashCode()); assertEquals(o2, o1); } - public static Statement getTestStatement(int subjectSeed, int seed, int size) { - return new StatementImpl(getTestClaim(subjectSeed, seed, size), - getReferenceList(seed, size), StatementRank.NORMAL, "MyId" - + seed); + public static Statement getTestStatement(int subjectSeed, int seed, + int size, String entityType) { + return new StatementImpl(getTestClaim(subjectSeed, seed, size, + entityType), getReferenceList(seed, size), + StatementRank.NORMAL, "MyId" + seed); } @Test public final void testGetStatementGroup() { - StatementGroup o1 = getTestStatementGroup(0, 17, 10); + StatementGroup o1 = getTestStatementGroup(0, 17, 10, + EntityIdValue.ET_ITEM); StatementGroup o2 = converter.copy(o1); assertEquals(o1.toString(), o2.toString()); @@ -338,20 +354,20 @@ public final void testGetStatementGroup() { } public static StatementGroup getTestStatementGroup(int subjectSeed, - int seed, int size) { + int seed, int size, String entityType) { List statements = new ArrayList<>(size); for (int i = 0; i < size; i++) { - statements.add(getTestStatement(subjectSeed, seed, i)); + statements.add(getTestStatement(subjectSeed, seed, i, entityType)); } return new StatementGroupImpl(statements); } public static List getTestStatementGroups(int subjectSeed, - int seed, int size) { + int seed, int size, String entityType) { List statementGroups = new ArrayList<>(size); for (int i = 0; i < size; i++) { statementGroups.add(getTestStatementGroup(subjectSeed, i + seed, - i * 2 + 1)); + i * 2 + 1, entityType)); } return statementGroups; } @@ -367,9 +383,11 @@ public final void testGetSiteLink() { @Test public final void testGetPropertyDocument() { PropertyDocument o1 = new PropertyDocumentImpl( - getTestPropertyIdValue(2), getTestMtvList(1, 0), // labels + getTestPropertyIdValue(2), + getTestMtvList(1, 0), // labels getTestMtvList(4, 13), // descriptions getTestMtvList(0, 0), // aliases + getTestStatementGroups(2, 17, 1, EntityIdValue.ET_PROPERTY), new DatatypeIdImpl(DatatypeIdValue.DT_TIME)); PropertyDocument o2 = converter.copy(o1); @@ -380,11 +398,13 @@ public final void testGetPropertyDocument() { @Test public final void testGetItemDocument() { - ItemDocument o1 = new ItemDocumentImpl(getTestItemIdValue(2), + ItemDocument o1 = new ItemDocumentImpl( + getTestItemIdValue(2), getTestMtvList(5, 0), // labels getTestMtvList(0, 0), // descriptions getTestMtvList(15, 12), // aliases - getTestStatementGroups(2, 17, 1), getTestSiteLinks(20)); + getTestStatementGroups(2, 17, 1, EntityIdValue.ET_ITEM), + getTestSiteLinks(20)); ItemDocument o2 = converter.copy(o1); assertEquals(o1.toString(), o2.toString()); diff --git a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/ItemDocumentImplTest.java b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/ItemDocumentImplTest.java index 04333561b..297cff985 100644 --- a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/ItemDocumentImplTest.java +++ b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/ItemDocumentImplTest.java @@ -112,7 +112,8 @@ Collections. emptyList(), "P42", "foo"), Collections. emptyList(), Collections. emptyList(), Collections. emptyList(), - new DatatypeIdImpl(DatatypeIdValue.DT_STRING)); + Collections. emptyList(), new DatatypeIdImpl( + DatatypeIdValue.DT_STRING)); // we need to use empty lists of Statement groups to test inequality // based on different item ids with all other data being equal diff --git a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/PropertyDocumentImplTest.java b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/PropertyDocumentImplTest.java index f78583e7f..3a6f5b617 100644 --- a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/PropertyDocumentImplTest.java +++ b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/implementation/PropertyDocumentImplTest.java @@ -35,6 +35,7 @@ import org.junit.Before; import org.junit.Test; import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemDocument; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument; @@ -54,11 +55,12 @@ public class PropertyDocumentImplTest { List labels; List descriptions; List aliases; + List statementGroups; DatatypeIdValue datatypeId; @Before public void setUp() throws Exception { - pid = new PropertyIdValueImpl("P42", "http://wikibase.org/entity/"); + pid = DataObjectFactoryImplTest.getTestPropertyIdValue(2); labelMap = new HashMap(); labelMap.put("en", new MonolingualTextValueImpl("Property 42", "en")); @@ -90,12 +92,15 @@ public void setUp() throws Exception { aliases.add(alias2); aliases.add(alias3); + statementGroups = DataObjectFactoryImplTest.getTestStatementGroups(2, + 10, 3, EntityIdValue.ET_PROPERTY); + datatypeId = new DatatypeIdImpl(DatatypeIdValue.DT_ITEM); pd1 = new PropertyDocumentImpl(pid, labels, descriptions, aliases, - datatypeId); + statementGroups, datatypeId); pd2 = new PropertyDocumentImpl(pid, labels, descriptions, aliases, - datatypeId); + statementGroups, datatypeId); } @Test @@ -111,19 +116,24 @@ public void fieldsAreCorrect() { @Test public void equalityBasedOnContent() { PropertyDocument pdDiffSubject = new PropertyDocumentImpl( - new PropertyIdValueImpl("P43", "http://wikibase.org/entity/"), - labels, descriptions, aliases, datatypeId); + DataObjectFactoryImplTest.getTestPropertyIdValue(3), labels, + descriptions, aliases, + DataObjectFactoryImplTest.getTestStatementGroups(3, 10, 3, + EntityIdValue.ET_PROPERTY), datatypeId); PropertyDocument pdDiffLabels = new PropertyDocumentImpl(pid, Collections. emptyList(), descriptions, - aliases, datatypeId); + aliases, statementGroups, datatypeId); PropertyDocument pdDiffDescriptions = new PropertyDocumentImpl(pid, labels, Collections. emptyList(), - aliases, datatypeId); + aliases, statementGroups, datatypeId); PropertyDocument pdDiffAliases = new PropertyDocumentImpl(pid, labels, descriptions, Collections. emptyList(), - datatypeId); + statementGroups, datatypeId); + PropertyDocument pdDiffStatements = new PropertyDocumentImpl(pid, + labels, descriptions, aliases, + Collections. emptyList(), datatypeId); PropertyDocument pdDiffDatatype = new PropertyDocumentImpl(pid, labels, - descriptions, aliases, new DatatypeIdImpl( + descriptions, aliases, statementGroups, new DatatypeIdImpl( DatatypeIdValue.DT_STRING)); ItemDocument id = new ItemDocumentImpl( @@ -137,6 +147,7 @@ aliases, Collections. emptyList(), assertThat(pd1, not(equalTo(pdDiffLabels))); assertThat(pd1, not(equalTo(pdDiffDescriptions))); assertThat(pd1, not(equalTo(pdDiffAliases))); + assertThat(pd1, not(equalTo(pdDiffStatements))); assertThat(pd1, not(equalTo(pdDiffDatatype))); assertFalse(pd1.equals(id)); assertThat(pd1, not(equalTo(null))); @@ -151,27 +162,37 @@ public void hashBasedOnContent() { @Test(expected = NullPointerException.class) public void idNotNull() { new PropertyDocumentImpl(null, labels, descriptions, aliases, - datatypeId); + statementGroups, datatypeId); } @Test(expected = NullPointerException.class) public void labelsNotNull() { - new PropertyDocumentImpl(pid, null, descriptions, aliases, datatypeId); + new PropertyDocumentImpl(pid, null, descriptions, aliases, + statementGroups, datatypeId); } @Test(expected = NullPointerException.class) public void descriptionsNotNull() { - new PropertyDocumentImpl(pid, labels, null, aliases, datatypeId); + new PropertyDocumentImpl(pid, labels, null, aliases, statementGroups, + datatypeId); } @Test(expected = NullPointerException.class) public void aliasesNotNull() { - new PropertyDocumentImpl(pid, labels, descriptions, null, datatypeId); + new PropertyDocumentImpl(pid, labels, descriptions, null, + statementGroups, datatypeId); + } + + @Test(expected = NullPointerException.class) + public void statementGroupsNotNull() { + new PropertyDocumentImpl(pid, labels, descriptions, aliases, null, + datatypeId); } @Test(expected = NullPointerException.class) public void datatypeNotNull() { - new PropertyDocumentImpl(pid, labels, descriptions, aliases, null); + new PropertyDocumentImpl(pid, labels, descriptions, aliases, + statementGroups, null); } @Test(expected = IllegalArgumentException.class) @@ -181,7 +202,8 @@ public void labelUniquePerLanguage() { labels2.add(new MonolingualTextValueImpl("Property 42 label duplicate", "en")); - new PropertyDocumentImpl(pid, labels2, descriptions, aliases, null); + new PropertyDocumentImpl(pid, labels2, descriptions, aliases, + statementGroups, null); } @Test(expected = IllegalArgumentException.class) @@ -191,7 +213,8 @@ public void descriptionUniquePerLanguage() { descriptions2.add(new MonolingualTextValueImpl( "Noch eine Beschreibung fuer P42", "de")); - new PropertyDocumentImpl(pid, labels, descriptions2, aliases, null); + new PropertyDocumentImpl(pid, labels, descriptions2, aliases, + statementGroups, null); } } diff --git a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/JsonTestData.java b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/JsonTestData.java index ed9555bb4..35af61b5e 100644 --- a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/JsonTestData.java +++ b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/JsonTestData.java @@ -37,6 +37,7 @@ import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueGlobeCoordinates; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueItemId; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueMonolingualText; +import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValuePropertyId; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueQuantity; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueString; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueTime; @@ -52,14 +53,13 @@ public class JsonTestData { public static final DataObjectFactory JACKSON_OBJECT_FACTORY = new JacksonObjectFactory(); - // TODO maybe decompose the time a bit to have less magic strings in it + // TODO maybe decompose the time a bit to have fewer magic strings in it public static final String JSON_ENTITY_TYPE_ITEM = "item"; + public static final String JSON_ENTITY_TYPE_PROPERTY = "property"; // the id's used in the tests public static final String TEST_PROPERTY_ID = "P1"; - public static final PropertyIdValue TEST_PROPERTY_ID_VALUE = Datamodel - .makeWikidataPropertyIdValue(TEST_PROPERTY_ID); public static final String TEST_ITEM_ID = "Q1"; public static final int TEST_NUMERIC_ID = 1; public static final String TEST_STATEMENT_ID = "statement_foobar"; @@ -71,10 +71,14 @@ public class JsonTestData { // stand-alone descriptions of Value-parts public static final String JSON_STRING_VALUE = "{\"type\":\"" + JacksonValue.JSON_VALUE_TYPE_STRING + "\",\"value\":\"foobar\"}"; - public static final String JSON_ENTITY_ID_VALUE = "{\"type\":\"" + public static final String JSON_ITEM_ID_VALUE = "{\"type\":\"" + JacksonValue.JSON_VALUE_TYPE_ENTITY_ID + "\",\"value\":{\"entity-type\":\"" + JSON_ENTITY_TYPE_ITEM + "\",\"numeric-id\":" + TEST_NUMERIC_ID + "}}"; + public static final String JSON_PROPERTY_ID_VALUE = "{\"type\":\"" + + JacksonValue.JSON_VALUE_TYPE_ENTITY_ID + + "\",\"value\":{\"entity-type\":\"" + JSON_ENTITY_TYPE_PROPERTY + + "\",\"numeric-id\":" + TEST_NUMERIC_ID + "}}"; public static final String JSON_TIME_VALUE = "{\"type\":\"" + JacksonValue.JSON_VALUE_TYPE_TIME + "\", \"value\":{\"time\":\"+00000002013-10-28T00:00:00Z\",\"timezone\":0,\"before\":0,\"after\":0,\"precision\":11,\"calendarmodel\":\"http://www.wikidata.org/entity/Q1985727\"}}"; @@ -130,8 +134,10 @@ public class JsonTestData { public static final JacksonValueString TEST_STRING_VALUE = (JacksonValueString) JACKSON_OBJECT_FACTORY .getStringValue("foobar"); - public static final JacksonValueItemId TEST_ENTITY_ID_VALUE = (JacksonValueItemId) JACKSON_OBJECT_FACTORY + public static final JacksonValueItemId TEST_ITEM_ID_VALUE = (JacksonValueItemId) JACKSON_OBJECT_FACTORY .getItemIdValue("Q1", Datamodel.SITE_WIKIDATA); + public static final JacksonValuePropertyId TEST_PROPERTY_ID_VALUE = (JacksonValuePropertyId) JACKSON_OBJECT_FACTORY + .getPropertyIdValue("P1", Datamodel.SITE_WIKIDATA); public static final JacksonValueTime TEST_TIME_VALUE = (JacksonValueTime) JACKSON_OBJECT_FACTORY .getTimeValue(2013, (byte) 10, (byte) 28, (byte) 0, (byte) 0, (byte) 0, (byte) 11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO); diff --git a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/TestInnerValueObjects.java b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/TestInnerValueObjects.java index c9432a6c5..1a5875ae5 100644 --- a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/TestInnerValueObjects.java +++ b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/TestInnerValueObjects.java @@ -28,6 +28,8 @@ import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonInnerEntityId; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonInnerMonolingualText; +import com.fasterxml.jackson.databind.JsonMappingException; + /** * This class tests the inner objects lying behind the …ValueImpl-classes. * @@ -43,7 +45,7 @@ public class TestInnerValueObjects { private JacksonInnerMonolingualText testMonolingualText; @Before - public void setupTestEntityIds() { + public void setupTestEntityIds() throws JsonMappingException { this.testEntityId = new JacksonInnerEntityId(itemType, 1); } @@ -54,20 +56,21 @@ public void setupTestMonolingualText() { } @Test(expected = IllegalArgumentException.class) - public void testEntityIdConstructor() { - new JacksonInnerEntityId(wrongType, 1); + public void testEntityIdConstructor() throws JsonMappingException { + JacksonInnerEntityId testId = new JacksonInnerEntityId(wrongType, 1); + testId.getStringId(); // should fail } @Test(expected = IllegalArgumentException.class) - public void testEntityIdSetter() { - JacksonInnerEntityId emptyId = new JacksonInnerEntityId(); - emptyId.setNumericId(1); - emptyId.setEntityType(itemType); // should work - emptyId.setEntityType(wrongType); // should fail + public void testEntityIdSetter() throws JsonMappingException { + JacksonInnerEntityId testId = new JacksonInnerEntityId(); + testId.setNumericId(1); + testId.setJsonEntityType(wrongType); + testId.getStringId(); // should fail } @Test - public void testEntityIdMethods() { + public void testEntityIdMethods() throws JsonMappingException { assertEquals("Q1", this.testEntityId.getStringId()); assertEquals(this.testEntityId.getNumericId(), 1); diff --git a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/TestValue.java b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/TestValue.java index bfb2462de..5f58f26d5 100644 --- a/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/TestValue.java +++ b/wdtk-datamodel/src/test/java/org/wikidata/wdtk/datamodel/json/jackson/TestValue.java @@ -29,9 +29,10 @@ import org.junit.Test; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonInnerTime; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValue; -import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueItemId; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueGlobeCoordinates; +import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueItemId; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueMonolingualText; +import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValuePropertyId; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueQuantity; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueString; import org.wikidata.wdtk.datamodel.json.jackson.datavalues.JacksonValueTime; @@ -67,25 +68,47 @@ public void testStringValueToJava() throws JsonParseException, } @Test - public void testEntityIdValueToJson() throws JsonProcessingException { + public void testItemIdValueToJson() throws JsonProcessingException { String result = mapper - .writeValueAsString(JsonTestData.TEST_ENTITY_ID_VALUE); - JsonComparator.compareJsonStrings(JsonTestData.JSON_ENTITY_ID_VALUE, + .writeValueAsString(JsonTestData.TEST_ITEM_ID_VALUE); + JsonComparator.compareJsonStrings(JsonTestData.JSON_ITEM_ID_VALUE, result); } @Test - public void testEntityIdValueToJava() throws JsonParseException, + public void testItemIdValueToJava() throws JsonParseException, JsonMappingException, IOException { - JacksonValue result = mapper.readValue( - JsonTestData.JSON_ENTITY_ID_VALUE, JacksonValue.class); + JacksonValue result = mapper.readValue(JsonTestData.JSON_ITEM_ID_VALUE, + JacksonValue.class); assertNotNull(result); assertTrue(result instanceof JacksonValueItemId); assertEquals(result.getType(), - JsonTestData.TEST_ENTITY_ID_VALUE.getType()); + JsonTestData.TEST_ITEM_ID_VALUE.getType()); assertEquals(((JacksonValueItemId) result).getValue(), - JsonTestData.TEST_ENTITY_ID_VALUE.getValue()); + JsonTestData.TEST_ITEM_ID_VALUE.getValue()); + } + + @Test + public void testPropertyIdValueToJson() throws JsonProcessingException { + String result = mapper + .writeValueAsString(JsonTestData.TEST_PROPERTY_ID_VALUE); + JsonComparator.compareJsonStrings(JsonTestData.JSON_PROPERTY_ID_VALUE, + result); + } + + @Test + public void testPropertyIdValueToJava() throws JsonParseException, + JsonMappingException, IOException { + JacksonValue result = mapper.readValue( + JsonTestData.JSON_PROPERTY_ID_VALUE, JacksonValue.class); + + assertNotNull(result); + assertTrue(result instanceof JacksonValuePropertyId); + assertEquals(result.getType(), + JsonTestData.TEST_PROPERTY_ID_VALUE.getType()); + assertEquals(((JacksonValuePropertyId) result).getValue(), + JsonTestData.TEST_PROPERTY_ID_VALUE.getValue()); } @Test diff --git a/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/DumpProcessingController.java b/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/DumpProcessingController.java index 8c30bd495..75027dc58 100644 --- a/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/DumpProcessingController.java +++ b/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/DumpProcessingController.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/JsonDumpFileProcessor.java b/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/JsonDumpFileProcessor.java index 10e9e6937..de117319a 100644 --- a/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/JsonDumpFileProcessor.java +++ b/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/JsonDumpFileProcessor.java @@ -20,14 +20,19 @@ * #L% */ +import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.wikidata.wdtk.datamodel.interfaces.EntityDocumentProcessor; import org.wikidata.wdtk.datamodel.json.jackson.JacksonItemDocument; import org.wikidata.wdtk.datamodel.json.jackson.JacksonPropertyDocument; -import org.wikidata.wdtk.datamodel.json.jackson.JacksonTermedDocument; +import org.wikidata.wdtk.datamodel.json.jackson.JacksonTermedStatementDocument; +import com.fasterxml.jackson.core.JsonParser.Feature; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.MappingIterator; import com.fasterxml.jackson.databind.ObjectMapper; @@ -41,9 +46,12 @@ */ public class JsonDumpFileProcessor implements MwDumpFileProcessor { + static final Logger logger = LoggerFactory + .getLogger(JsonDumpFileProcessor.class); + private final ObjectMapper mapper = new ObjectMapper(); - private final ObjectReader documentReader = mapper - .reader(JacksonTermedDocument.class); + private final ObjectReader documentReader = this.mapper + .reader(JacksonTermedStatementDocument.class); private final EntityDocumentProcessor entityDocumentProcessor; private final String siteIri; @@ -54,40 +62,123 @@ public JsonDumpFileProcessor( this.siteIri = siteIri; } + /** + * Process dump file data from the given input stream. This method uses the + * efficient Jackson {@link MappingIterator}. However, this class cannot + * recover from processing errors. If an error occurs in one entity, the + * (presumably) less efficient processing method + * {@link #processDumpFileContentsRecovery(InputStream)} is used instead. + * + * @see MwDumpFileProcessor#processDumpFileContents(InputStream, MwDumpFile) + */ @Override public void processDumpFileContents(InputStream inputStream, MwDumpFile dumpFile) { try { - MappingIterator documentIterator = documentReader - .readValues(inputStream); - - while (documentIterator.hasNextValue()) { - JacksonTermedDocument document = documentIterator.nextValue(); - document.setSiteIri(siteIri); - if (document != null) { - if (document instanceof JacksonItemDocument) { - this.handleItemDocument((JacksonItemDocument) document); - } else if (document instanceof JacksonPropertyDocument) { - this.handlePropertyDocument((JacksonPropertyDocument) document); - } + try { + MappingIterator documentIterator = documentReader + .readValues(inputStream); + documentIterator.getParser().disable(Feature.AUTO_CLOSE_SOURCE); + + while (documentIterator.hasNextValue()) { + JacksonTermedStatementDocument document = documentIterator + .nextValue(); + handleDocument(document); } + documentIterator.close(); + } catch (JsonProcessingException e) { + logJsonProcessingException(e); + processDumpFileContentsRecovery(inputStream); } - - } catch (JsonProcessingException e) { - e.printStackTrace(); } catch (IOException e) { - e.printStackTrace(); + throw new RuntimeException("Cannot read JSON input: " + + e.getMessage(), e); } } - private void handleItemDocument(JacksonItemDocument document) { - this.entityDocumentProcessor.processItemDocument(document); + /** + * Reports the error of a JSON processing exception that was caught when + * trying to read an entity. + * + * @param exception + * the exception to log + */ + private void logJsonProcessingException(JsonProcessingException exception) { + JsonDumpFileProcessor.logger + .error("Error when reading JSON for entity: " + + exception.getMessage()); } - private void handlePropertyDocument(JacksonPropertyDocument document) { - this.entityDocumentProcessor.processPropertyDocument(document); + /** + * Handles a {@link JacksonTermedStatementDocument} that was retrieved by + * parsing the JSON input. It will call appropriate processing methods + * depending on the type of document. + * + * @param document + * the document to process + */ + private void handleDocument(JacksonTermedStatementDocument document) { + document.setSiteIri(siteIri); + if (document != null) { + if (document instanceof JacksonItemDocument) { + this.entityDocumentProcessor + .processItemDocument((JacksonItemDocument) document); + } else if (document instanceof JacksonPropertyDocument) { + this.entityDocumentProcessor + .processPropertyDocument((JacksonPropertyDocument) document); + } + } } + /** + * Process dump file data from the given input stream. The method can + * recover from an errors that occurred while processing an input stream, + * which is assumed to contain the JSON serialization of a list of JSON + * entities, with each entity serialization in one line. To recover from the + * previous error, the first line is skipped. + * + * @param inputStream + * the stream to read from + * @throws IOException + * if there is a problem reading the stream + */ + private void processDumpFileContentsRecovery(InputStream inputStream) + throws IOException { + JsonDumpFileProcessor.logger + .warn("Entering recovery mode to parse rest of file. This might be slightly slower."); + + BufferedReader br = new BufferedReader(new InputStreamReader( + inputStream)); + + String line = br.readLine(); + if (line.length() >= 100) { + line = line.substring(0, 100) + "[...]" + + line.substring(line.length() - 50); + } + JsonDumpFileProcessor.logger.warn("Skipping rest of current line: " + + line); + + line = br.readLine(); + while (line != null && line.length() > 1) { + try { + JacksonTermedStatementDocument document; + if (line.charAt(line.length() - 1) == ',') { + document = documentReader.readValue(line.substring(0, + line.length() - 1)); + } else { + document = documentReader.readValue(line); + } + handleDocument(document); + } catch (JsonProcessingException e) { + logJsonProcessingException(e); + JsonDumpFileProcessor.logger.error("Problematic line was: " + + line.substring(0, Math.min(50, line.length())) + + "..."); + } + + line = br.readLine(); + } + } } diff --git a/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/MwSitesDumpFileProcessor.java b/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/MwSitesDumpFileProcessor.java index 2332fe44c..8eeba7032 100644 --- a/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/MwSitesDumpFileProcessor.java +++ b/wdtk-dumpfiles/src/main/java/org/wikidata/wdtk/dumpfiles/MwSitesDumpFileProcessor.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/EntityStatisticsProcessor.java b/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/EntityStatisticsProcessor.java index 8a1514c22..61f41d9ec 100644 --- a/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/EntityStatisticsProcessor.java +++ b/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/EntityStatisticsProcessor.java @@ -32,7 +32,9 @@ import org.wikidata.wdtk.datamodel.interfaces.Reference; import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.StatementDocument; import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; +import org.wikidata.wdtk.datamodel.interfaces.TermedDocument; /** * A simple example class that processes EntityDocuments to compute basic @@ -45,19 +47,30 @@ */ class EntityStatisticsProcessor implements EntityDocumentProcessor { - // Counters to keep track of overall numbers: - long countItems = 0; - long countProperties = 0; - long countLabels = 0; - long countDescriptions = 0; - long countAliases = 0; - long countStatements = 0; - long countSiteLinks = 0; + /** + * Simple record class to keep track of some usage numbers for one type of + * entity. + * + * @author Markus Kroetzsch + * + */ + class UsageStatistics { + long count = 0; + long countLabels = 0; + long countDescriptions = 0; + long countAliases = 0; + long countStatements = 0; + + // Maps to store property usage data for each property: + final HashMap propertyCountsMain = new HashMap(); + final HashMap propertyCountsQualifier = new HashMap(); + final HashMap propertyCountsReferences = new HashMap(); - // Maps to store property usage data for each property: - final HashMap propertyCountsMain = new HashMap(); - final HashMap propertyCountsQualifier = new HashMap(); - final HashMap propertyCountsReferences = new HashMap(); + } + + UsageStatistics itemStatistics = new UsageStatistics(); + UsageStatistics propertyStatistics = new UsageStatistics(); + long countSiteLinks = 0; /** * Main method. Processes the whole dump using this processor and writes the @@ -80,49 +93,82 @@ public static void main(String[] args) throws IOException { @Override public void processItemDocument(ItemDocument itemDocument) { // Count items: - this.countItems++; + this.itemStatistics.count++; + + countTerms(this.itemStatistics, itemDocument); + countStatements(this.itemStatistics, itemDocument); + + // Count site links: + this.countSiteLinks += itemDocument.getSiteLinks().size(); + // Print a report every 10000 items: + if (this.itemStatistics.count % 10000 == 0) { + printStatus(); + } + } + + @Override + public void processPropertyDocument(PropertyDocument propertyDocument) { + // Count properties: + this.propertyStatistics.count++; + + countTerms(this.propertyStatistics, propertyDocument); + countStatements(this.propertyStatistics, propertyDocument); + } + + /** + * Count the terms (labels, descriptions, aliases) of an item or property + * document. + * + * @param usageStatistics + * statistics object to store counters in + * @param termedDocument + * document to count the terms of + */ + protected void countTerms(UsageStatistics usageStatistics, + TermedDocument termedDocument) { // Count several kinds of terms: - this.countLabels += itemDocument.getLabels().size(); - this.countDescriptions += itemDocument.getDescriptions().size(); - for (String languageKey : itemDocument.getAliases().keySet()) { - this.countAliases += itemDocument.getAliases().get(languageKey) - .size(); + usageStatistics.countLabels += termedDocument.getLabels().size(); + usageStatistics.countDescriptions += termedDocument.getDescriptions() + .size(); + for (String languageKey : termedDocument.getAliases().keySet()) { + usageStatistics.countAliases += termedDocument.getAliases() + .get(languageKey).size(); } + } + /** + * Count the statements and property uses of an item or property document. + * + * @param usageStatistics + * statistics object to store counters in + * @param statementDocument + * document to count the statements of + */ + protected void countStatements(UsageStatistics usageStatistics, + StatementDocument statementDocument) { // Count Statement data: - for (StatementGroup sg : itemDocument.getStatementGroups()) { + for (StatementGroup sg : statementDocument.getStatementGroups()) { // Count Statements: - this.countStatements += sg.getStatements().size(); + usageStatistics.countStatements += sg.getStatements().size(); // Count uses of properties in Statements: - countPropertyMain(sg.getProperty(), sg.getStatements().size()); + countPropertyMain(usageStatistics, sg.getProperty(), sg + .getStatements().size()); for (Statement s : sg.getStatements()) { for (SnakGroup q : s.getClaim().getQualifiers()) { - countPropertyQualifier(q.getProperty(), q.getSnaks().size()); + countPropertyQualifier(usageStatistics, q.getProperty(), q + .getSnaks().size()); } for (Reference r : s.getReferences()) { for (SnakGroup snakGroup : r.getSnakGroups()) { - countPropertyReference(snakGroup.getProperty(), - snakGroup.getSnaks().size()); + countPropertyReference(usageStatistics, + snakGroup.getProperty(), snakGroup.getSnaks() + .size()); } } } } - - // Count site links: - this.countSiteLinks += itemDocument.getSiteLinks().size(); - - // Print a report every 10000 items: - if (this.countItems % 10000 == 0) { - printStatus(); - } - } - - @Override - public void processPropertyDocument(PropertyDocument propertyDocument) { - // Count properties: - this.countProperties++; } /** @@ -142,10 +188,12 @@ public void writeFinalResults() { out.println("Property id,in statements,in qualifiers,in references,total"); - for (Entry entry : this.propertyCountsMain + for (Entry entry : this.propertyStatistics.propertyCountsMain .entrySet()) { - int qCount = this.propertyCountsQualifier.get(entry.getKey()); - int rCount = this.propertyCountsReferences.get(entry.getKey()); + int qCount = this.propertyStatistics.propertyCountsQualifier + .get(entry.getKey()); + int rCount = this.propertyStatistics.propertyCountsReferences + .get(entry.getKey()); int total = entry.getValue() + qCount + rCount; out.println(entry.getKey().getId() + "," + entry.getValue() + "," + qCount + "," + rCount + "," + total); @@ -160,14 +208,29 @@ public void writeFinalResults() { * Prints a report about the statistics gathered so far. */ private void printStatus() { - System.out.println("Processed " + this.countItems + " items:"); - System.out.println(" * Labels: " + this.countLabels); - System.out.println(" * Descriptions: " + this.countDescriptions); - System.out.println(" * Aliases: " + this.countAliases); - System.out.println(" * Statements: " + this.countStatements); + printStatistics(this.itemStatistics, "items"); System.out.println(" * Site links: " + this.countSiteLinks); - System.out - .println("Processed " + this.countProperties + " properties."); + + printStatistics(this.propertyStatistics, "properties"); + } + + /** + * Prints a report about the statistics stored in the given data object. + * + * @param usageStatistics + * the statistics object to print + * @param entityLabel + * the label to use to refer to this kind of entities ("items" or + * "properties") + */ + private void printStatistics(UsageStatistics usageStatistics, + String entityLabel) { + System.out.println("Processed " + usageStatistics.count + " " + + entityLabel + ":"); + System.out.println(" * Labels: " + usageStatistics.countLabels + + ", descriptions: " + usageStatistics.countDescriptions + + ", aliases: " + usageStatistics.countAliases); + System.out.println(" * Statements: " + usageStatistics.countStatements); } /** @@ -193,57 +256,69 @@ public static void printDocumentation() { * Counts additional occurrences of a property as the main property of * statements. * + * @param usageStatistics + * statistics object where count is stored * @param property * the property to count * @param count * the number of times to count the property */ - private void countPropertyMain(PropertyIdValue property, int count) { - addPropertyCounters(property); - this.propertyCountsMain.put(property, - this.propertyCountsMain.get(property) + count); + private void countPropertyMain(UsageStatistics usageStatistics, + PropertyIdValue property, int count) { + addPropertyCounters(usageStatistics, property); + usageStatistics.propertyCountsMain.put(property, + usageStatistics.propertyCountsMain.get(property) + count); } /** * Counts additional occurrences of a property as qualifier property of * statements. * + * @param usageStatistics + * statistics object where count is stored * @param property * the property to count * @param count * the number of times to count the property */ - private void countPropertyQualifier(PropertyIdValue property, int count) { - addPropertyCounters(property); - this.propertyCountsQualifier.put(property, - this.propertyCountsQualifier.get(property) + count); + private void countPropertyQualifier(UsageStatistics usageStatistics, + PropertyIdValue property, int count) { + addPropertyCounters(usageStatistics, property); + usageStatistics.propertyCountsQualifier.put(property, + usageStatistics.propertyCountsQualifier.get(property) + count); } /** * Counts additional occurrences of a property as property in references. * + * @param usageStatistics + * statistics object where count is stored * @param property * the property to count * @param count * the number of times to count the property */ - private void countPropertyReference(PropertyIdValue property, int count) { - addPropertyCounters(property); - this.propertyCountsReferences.put(property, - this.propertyCountsReferences.get(property) + count); + private void countPropertyReference(UsageStatistics usageStatistics, + PropertyIdValue property, int count) { + addPropertyCounters(usageStatistics, property); + usageStatistics.propertyCountsReferences.put(property, + usageStatistics.propertyCountsReferences.get(property) + count); } /** * Initializes the counters for a property to zero if not done yet. * + * @param usageStatistics + * statistics object to initialize * @param property * the property to count */ - private void addPropertyCounters(PropertyIdValue property) { - if (!this.propertyCountsMain.containsKey(property)) { - this.propertyCountsMain.put(property, 0); - this.propertyCountsQualifier.put(property, 0); - this.propertyCountsReferences.put(property, 0); + private void addPropertyCounters(UsageStatistics usageStatistics, + PropertyIdValue property) { + if (!usageStatistics.propertyCountsMain.containsKey(property)) { + usageStatistics.propertyCountsMain.put(property, 0); + usageStatistics.propertyCountsQualifier.put(property, 0); + usageStatistics.propertyCountsReferences.put(property, 0); } } -} \ No newline at end of file +} diff --git a/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/ExampleHelpers.java b/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/ExampleHelpers.java index ed8e5cf3f..60baaf832 100644 --- a/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/ExampleHelpers.java +++ b/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/ExampleHelpers.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/RdfSerializationExample.java b/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/RdfSerializationExample.java index 0bd39bd60..e241a8f70 100644 --- a/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/RdfSerializationExample.java +++ b/wdtk-examples/src/main/java/org/wikidata/wdtk/examples/RdfSerializationExample.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/OwlDeclarationBuffer.java b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/OwlDeclarationBuffer.java index 3f18212c9..44b1a69dd 100644 --- a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/OwlDeclarationBuffer.java +++ b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/OwlDeclarationBuffer.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java index cfe3c2ff8..ad44d85bc 100644 --- a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java +++ b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -41,6 +41,7 @@ import org.wikidata.wdtk.datamodel.interfaces.Snak; import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.StatementDocument; import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; import org.wikidata.wdtk.datamodel.interfaces.TermedDocument; import org.wikidata.wdtk.datamodel.interfaces.ValueSnak; @@ -201,17 +202,21 @@ public void writePropertyDocument(PropertyDocument document) this.rdfWriter.getUri(document.getDatatype().getIri())); } - // Not needed for properties -- might change in future: - // this.snakRdfConverter.writeAuxiliaryTriples(); - // this.owlDeclarationBuffer.writePropertyDeclarations(this.rdfWriter, - // hasTask(RdfSerializer.TASK_STATEMENTS), - // hasTask(RdfSerializer.TASK_SIMPLE_STATEMENTS)); - // this.referenceRdfConverter.writeReferences(); + if (hasTask(RdfSerializer.TASK_STATEMENTS)) { + writeStatements(subject, document); + } + + this.snakRdfConverter.writeAuxiliaryTriples(); + this.owlDeclarationBuffer.writePropertyDeclarations(this.rdfWriter, + hasTask(RdfSerializer.TASK_STATEMENTS), + hasTask(RdfSerializer.TASK_SIMPLE_STATEMENTS)); + this.referenceRdfConverter.writeReferences(); } - void writeStatements(Resource subject, ItemDocument itemDocument) + void writeStatements(Resource subject, StatementDocument statementDocument) throws RDFHandlerException { - for (StatementGroup statementGroup : itemDocument.getStatementGroups()) { + for (StatementGroup statementGroup : statementDocument + .getStatementGroups()) { URI property = this.rdfWriter.getUri(Vocabulary.getPropertyUri( statementGroup.getProperty(), PropertyContext.STATEMENT)); for (Statement statement : statementGroup.getStatements()) { @@ -220,15 +225,18 @@ void writeStatements(Resource subject, ItemDocument itemDocument) } } - for (StatementGroup statementGroup : itemDocument.getStatementGroups()) { + for (StatementGroup statementGroup : statementDocument + .getStatementGroups()) { for (Statement statement : statementGroup.getStatements()) { writeStatement(statement); } } } - void writeSimpleStatements(Resource subject, ItemDocument itemDocument) { - for (StatementGroup statementGroup : itemDocument.getStatementGroups()) { + void writeSimpleStatements(Resource subject, + StatementDocument statementDocument) { + for (StatementGroup statementGroup : statementDocument + .getStatementGroups()) { for (Statement statement : statementGroup.getStatements()) { if (statement.getClaim().getQualifiers().size() == 0) { this.snakRdfConverter.setSnakContext(subject, @@ -273,8 +281,8 @@ void writeInstanceOfStatements(Resource subject, ItemDocument itemDocument) { void writeSubclassOfStatements(Resource subject, ItemDocument itemDocument) { for (StatementGroup statementGroup : itemDocument.getStatementGroups()) { - boolean isSubClassOf = "P279".equals(statementGroup - .getProperty().getId()); + boolean isSubClassOf = "P279".equals(statementGroup.getProperty() + .getId()); boolean isInstanceOf = "P31".equals(statementGroup.getProperty() .getId()); if (!isInstanceOf && !isSubClassOf) { diff --git a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/Vocabulary.java b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/Vocabulary.java index 61c50ebba..7fdce31b5 100644 --- a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/Vocabulary.java +++ b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/Vocabulary.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/WikidataPropertyTypes.java b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/WikidataPropertyTypes.java index 688476572..940b7cc7f 100644 --- a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/WikidataPropertyTypes.java +++ b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/WikidataPropertyTypes.java @@ -9,9 +9,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.