Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into clt-with-metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
guenthermi committed Jan 7, 2015
2 parents 32778dc + 3d9946d commit c5df517
Show file tree
Hide file tree
Showing 60 changed files with 1,220 additions and 531 deletions.
4 changes: 4 additions & 0 deletions RELEASE-NOTES.md
Expand Up @@ -4,6 +4,10 @@ Wikidata Toolkit Release Notes
Version 0.4.0
-------------

New features:
* Support statements on property documents
* More robust JSON parsing: recover after errors to process remaining file

Bug fixes:
* Support RDF export of Monolingual Text Value data in statements.
* Significant performance improvements in RDF export of taxonomy data.
Expand Down
Expand Up @@ -428,7 +428,7 @@ public static SiteLink makeSiteLink(String title, String siteKey,
}

/**
* Creates a {@link PropertyDocument}.
* Creates a {@link PropertyDocument} without statements.
*
* @param propertyId
* the id of the property that data is about
Expand All @@ -452,6 +452,35 @@ public static PropertyDocument makePropertyDocument(
aliases, datatypeId);
}

/**
* Creates a {@link PropertyDocument}.
*
* @param propertyId
* the id of the property that data is about
* @param labels
* the list of labels of this property, with at most one label
* for each language code
* @param descriptions
* the list of descriptions of this property, with at most one
* description for each language code
* @param aliases
* the list of aliases of this property
* @param statementGroups
* the list of statement groups of this item; all of them must
* have the given itemIdValue as their subject
* @param datatypeId
* the datatype of that property
* @return a {@link PropertyDocument} corresponding to the input
*/
public static PropertyDocument makePropertyDocument(
PropertyIdValue propertyId, List<MonolingualTextValue> labels,
List<MonolingualTextValue> descriptions,
List<MonolingualTextValue> aliases,
List<StatementGroup> statementGroups, DatatypeIdValue datatypeId) {
return factory.getPropertyDocument(propertyId, labels, descriptions,
aliases, statementGroups, datatypeId);
}

/**
* Creates an {@link ItemDocument}.
*
Expand Down
Expand Up @@ -164,7 +164,8 @@ public PropertyDocument copy(PropertyDocument object) {
return this.dataObjectFactory.getPropertyDocument(object
.getPropertyId(), new ArrayList<>(object.getLabels().values()),
new ArrayList<>(object.getDescriptions().values()),
convertAliasList(object.getAliases()), object.getDatatype());
convertAliasList(object.getAliases()), object
.getStatementGroups(), object.getDatatype());
}

public ItemDocument copy(ItemDocument object) {
Expand Down
Expand Up @@ -505,7 +505,8 @@ public static boolean equalsPropertyDocument(PropertyDocument o1, Object o2) {
}
PropertyDocument other = (PropertyDocument) o2;
// Note: property id already compared by equalsTermedDocument()
return o1.getDatatype().equals(other.getDatatype());
return o1.getDatatype().equals(other.getDatatype())
&& o1.getStatementGroups().equals(other.getStatementGroups());
}

/**
Expand Down
Expand Up @@ -308,6 +308,7 @@ public static int hashCode(SiteLink o) {
public static int hashCode(PropertyDocument o) {
int result;
result = hashCodeForTermedDocument(o);
result = prime * result + o.getStatementGroups().hashCode();
result = prime * result + o.getDatatype().hashCode();
return result;
}
Expand Down
Expand Up @@ -40,6 +40,7 @@
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.SomeValueSnak;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementDocument;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
Expand Down Expand Up @@ -353,7 +354,8 @@ public static String toString(SiteLink o) {
public static String toString(PropertyDocument o) {
return "==PropertyDocument " + o.getPropertyId().getIri() + "==\n"
+ "* Datatype: " + o.getDatatype()
+ toStringForTermedDocument(o);
+ toStringForTermedDocument(o)
+ toStringForStatementDocument(o);
}

/**
Expand All @@ -368,16 +370,10 @@ public static String toString(ItemDocument o) {
StringBuilder sb = new StringBuilder();
sb.append("==ItemDocument ").append(o.getItemId().getIri());
sb.append("==").append(toStringForTermedDocument(o));
boolean first;

sb.append("\n===Statements===\n");
for (StatementGroup sg : o.getStatementGroups()) {
sb.append(toString(sg));
}
sb.append("\n===End of statements===\n");
sb.append(toStringForStatementDocument(o));

sb.append("* Site links: ");
first = true;
boolean first = true;
SortedSet<String> siteKeys = new TreeSet<String>(o.getSiteLinks()
.keySet());
for (String key : siteKeys) {
Expand All @@ -392,6 +388,18 @@ public static String toString(ItemDocument o) {
return sb.toString();
}

protected static String toStringForStatementDocument(StatementDocument o) {
StringBuilder sb = new StringBuilder();

sb.append("\n===Statements===\n");
for (StatementGroup sg : o.getStatementGroups()) {
sb.append(toString(sg));
}
sb.append("\n===End of statements===\n");

return sb.toString();
}

/**
* Returns a human-readable string representation of the given
* {@link TermedDocument}.
Expand Down
Expand Up @@ -21,6 +21,7 @@
*/

import java.math.BigDecimal;
import java.util.Collections;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -165,7 +166,17 @@ public PropertyDocument getPropertyDocument(PropertyIdValue propertyId,
List<MonolingualTextValue> descriptions,
List<MonolingualTextValue> aliases, DatatypeIdValue datatypeId) {
return new PropertyDocumentImpl(propertyId, labels, descriptions,
aliases, datatypeId);
aliases, Collections.<StatementGroup> emptyList(), datatypeId);
}

@Override
public PropertyDocument getPropertyDocument(PropertyIdValue propertyId,
List<MonolingualTextValue> labels,
List<MonolingualTextValue> descriptions,
List<MonolingualTextValue> aliases,
List<StatementGroup> statementGroups, DatatypeIdValue datatypeId) {
return new PropertyDocumentImpl(propertyId, labels, descriptions,
aliases, statementGroups, datatypeId);
}

@Override
Expand Down
Expand Up @@ -21,7 +21,6 @@
*/

import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

Expand All @@ -34,15 +33,12 @@
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.SiteLink;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
import org.wikidata.wdtk.util.NestedIterator;

public class ItemDocumentImpl extends TermedDocumentImpl implements
public class ItemDocumentImpl extends TermedStatementDocumentImpl implements
ItemDocument {

final ItemIdValue itemId;
final List<StatementGroup> statementGroups;
final Map<String, SiteLink> siteLinks;

/**
Expand Down Expand Up @@ -70,24 +66,11 @@ public class ItemDocumentImpl extends TermedDocumentImpl implements
List<MonolingualTextValue> aliases,
List<StatementGroup> statementGroups,
Map<String, SiteLink> siteLinks) {
super(labels, descriptions, aliases);
super(itemIdValue, labels, descriptions, aliases, statementGroups);
Validate.notNull(itemIdValue, "item ID cannot be null");
Validate.notNull(statementGroups, "statement list cannot be null");
Validate.notNull(siteLinks, "site links cannot be null");

if (!statementGroups.isEmpty()) {
for (StatementGroup sg : statementGroups) {
if (!itemIdValue.equals(sg.getSubject())) {
throw new IllegalArgumentException(
"All statement groups in a document must have the same subject: found "
+ sg.getSubject() + " but expected "
+ itemIdValue);
}
}
}

this.itemId = itemIdValue;
this.statementGroups = statementGroups;
this.siteLinks = siteLinks;
}

Expand All @@ -101,16 +84,6 @@ public ItemIdValue getItemId() {
return itemId;
}

@Override
public List<StatementGroup> getStatementGroups() {
return Collections.unmodifiableList(statementGroups);
}

@Override
public Iterator<Statement> getAllStatements() {
return new NestedIterator<>(statementGroups);
}

@Override
public Map<String, SiteLink> getSiteLinks() {
return Collections.unmodifiableMap(siteLinks);
Expand Down
Expand Up @@ -31,15 +31,16 @@
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;

/**
* Implementation of {@link PropertyDocument}.
*
* @author Markus Kroetzsch
*
*/
public class PropertyDocumentImpl extends TermedDocumentImpl implements
PropertyDocument {
public class PropertyDocumentImpl extends TermedStatementDocumentImpl implements
PropertyDocument {

final PropertyIdValue propertyId;
final DatatypeIdValue datatypeId;
Expand All @@ -57,14 +58,18 @@ public class PropertyDocumentImpl extends TermedDocumentImpl implements
* description for each language code
* @param aliases
* the list of aliases of this property
* @param statementGroups
* the list of statement groups of this item; all of them must
* have the given itemIdValue as their subject
* @param datatypeId
* the datatype of that property
*/
PropertyDocumentImpl(PropertyIdValue propertyId,
List<MonolingualTextValue> labels,
List<MonolingualTextValue> descriptions,
List<MonolingualTextValue> aliases, DatatypeIdValue datatypeId) {
super(labels, descriptions, aliases);
List<MonolingualTextValue> aliases,
List<StatementGroup> statementGroups, DatatypeIdValue datatypeId) {
super(propertyId, labels, descriptions, aliases, statementGroups);
Validate.notNull(propertyId, "property ID cannot be null");
Validate.notNull(datatypeId, "datatype ID cannot be null");
this.propertyId = propertyId;
Expand Down
Expand Up @@ -9,9 +9,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand Down
Expand Up @@ -23,12 +23,18 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.Validate;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementDocument;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
import org.wikidata.wdtk.datamodel.interfaces.TermedDocument;
import org.wikidata.wdtk.util.NestedIterator;

/**
* Implementation of {@link TermedDocument}. This abstract class defines the
Expand All @@ -37,15 +43,20 @@
* @author Markus Kroetzsch
*
*/
public abstract class TermedDocumentImpl implements TermedDocument {
public abstract class TermedStatementDocumentImpl implements TermedDocument,
StatementDocument {

final Map<String, MonolingualTextValue> labels;
final Map<String, MonolingualTextValue> descriptions;
final Map<String, List<MonolingualTextValue>> aliases;
final List<StatementGroup> statementGroups;

/**
* Constructor.
*
* @param entityIdValue
* the entity that this document refers to; used to validate
* statements
* @param labels
* the list of labels of this entity, with at most one label for
* each language code
Expand All @@ -54,13 +65,19 @@ public abstract class TermedDocumentImpl implements TermedDocument {
* description for each language code
* @param aliases
* the list of aliases of this entity
* @param statementGroups
* the list of statement groups of this item; all of them must
* have the given itemIdValue as their subject
*/
TermedDocumentImpl(List<MonolingualTextValue> labels,
TermedStatementDocumentImpl(EntityIdValue entityIdValue,
List<MonolingualTextValue> labels,
List<MonolingualTextValue> descriptions,
List<MonolingualTextValue> aliases) {
List<MonolingualTextValue> aliases,
List<StatementGroup> statementGroups) {
Validate.notNull(labels, "list of labels cannot be null");
Validate.notNull(descriptions, "list of descriptions cannot be null");
Validate.notNull(aliases, "list of aliases cannot be null");
Validate.notNull(statementGroups, "statement list cannot be null");

this.labels = new HashMap<String, MonolingualTextValue>();
for (MonolingualTextValue label : labels) {
Expand Down Expand Up @@ -93,6 +110,19 @@ public abstract class TermedDocumentImpl implements TermedDocument {
this.aliases.put(alias.getLanguageCode(), aliasesForLanguage);
}
}

if (!statementGroups.isEmpty()) {
for (StatementGroup sg : statementGroups) {
if (!entityIdValue.equals(sg.getSubject())) {
throw new IllegalArgumentException(
"All statement groups in a document must have the same subject: found "
+ sg.getSubject() + " but expected "
+ entityIdValue);
}
}
}

this.statementGroups = statementGroups;
}

@Override
Expand All @@ -112,4 +142,14 @@ public Map<String, List<MonolingualTextValue>> getAliases() {
return Collections.unmodifiableMap(aliases);
}

@Override
public List<StatementGroup> getStatementGroups() {
return Collections.unmodifiableList(statementGroups);
}

@Override
public Iterator<Statement> getAllStatements() {
return new NestedIterator<>(statementGroups);
}

}

0 comments on commit c5df517

Please sign in to comment.