Skip to content

Commit

Permalink
Add property declarations and make value exports unique
Browse files Browse the repository at this point in the history
  • Loading branch information
mkroetzsch committed May 3, 2014
1 parent 8f3e48a commit 3a30478
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@
*/

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

import org.openrdf.model.Resource;
import org.openrdf.rio.RDFHandlerException;
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;

Expand All @@ -42,12 +44,16 @@
*/
public class RdfConversionBuffer {

List<QuantityValue> quantityValueQueue;
List<Resource> quantityValueSubjectQueue;
List<TimeValue> timeValueQueue;
List<Resource> timeValueSubjectQueue;
List<GlobeCoordinatesValue> coordinatesValueQueue;
List<Resource> coordinatesValueSubjectQueue;
final List<QuantityValue> quantityValueQueue;
final List<Resource> quantityValueSubjectQueue;
final List<TimeValue> timeValueQueue;
final List<Resource> timeValueSubjectQueue;
final List<GlobeCoordinatesValue> coordinatesValueQueue;
final List<Resource> coordinatesValueSubjectQueue;
final List<PropertyIdValue> objectPropertyQueue;
final List<PropertyIdValue> datatypePropertyQueue;
final HashSet<PropertyIdValue> declaredProperties;
final HashSet<Resource> declaredValues;

public RdfConversionBuffer() {
this.quantityValueQueue = new ArrayList<QuantityValue>();
Expand All @@ -56,6 +62,10 @@ public RdfConversionBuffer() {
this.timeValueSubjectQueue = new ArrayList<Resource>();
this.coordinatesValueQueue = new ArrayList<GlobeCoordinatesValue>();
this.coordinatesValueSubjectQueue = new ArrayList<Resource>();
this.objectPropertyQueue = new ArrayList<PropertyIdValue>();
this.datatypePropertyQueue = new ArrayList<PropertyIdValue>();
this.declaredProperties = new HashSet<PropertyIdValue>();
this.declaredValues = new HashSet<Resource>();
}

/**
Expand Down Expand Up @@ -101,11 +111,81 @@ public void addGlobeCoordinatesValue(
this.coordinatesValueSubjectQueue.add(resource);
}

/**
* Adds the given property id value to the list of properties that should be
* declared as OWL object properties.
*
* @param propertyIdValue
* the property to declare
*/
public void addObjectProperty(PropertyIdValue propertyIdValue) {
if (!this.declaredProperties.contains(propertyIdValue)) {
this.objectPropertyQueue.add(propertyIdValue);
}
}

/**
* Adds the given property id value to the list of properties that should be
* declared as OWL datatype properties.
*
* @param propertyIdValue
* the property to declare
*/
public void addDatatypeProperty(PropertyIdValue propertyIdValue) {
if (!this.declaredProperties.contains(propertyIdValue)) {
this.datatypePropertyQueue.add(propertyIdValue);
}
}

public void writePropertyDeclarations(RdfWriter rdfWriter)
throws RDFHandlerException {
for (PropertyIdValue propertyIdValue : this.objectPropertyQueue) {
if (!this.declaredProperties.add(propertyIdValue)) {
continue;
}
rdfWriter.writeTripleUriObject(Vocabulary.getPropertyUri(
propertyIdValue, PropertyContext.STATEMENT),
Vocabulary.RDF_TYPE, Vocabulary.OWL_OBJECT_PROPERTY);
rdfWriter.writeTripleUriObject(Vocabulary.getPropertyUri(
propertyIdValue, PropertyContext.VALUE),
Vocabulary.RDF_TYPE, Vocabulary.OWL_OBJECT_PROPERTY);
rdfWriter.writeTripleUriObject(Vocabulary.getPropertyUri(
propertyIdValue, PropertyContext.QUALIFIER),
Vocabulary.RDF_TYPE, Vocabulary.OWL_OBJECT_PROPERTY);
rdfWriter.writeTripleUriObject(Vocabulary.getPropertyUri(
propertyIdValue, PropertyContext.REFERENCE),
Vocabulary.RDF_TYPE, Vocabulary.OWL_OBJECT_PROPERTY);
}
this.objectPropertyQueue.clear();

for (PropertyIdValue propertyIdValue : this.datatypePropertyQueue) {
if (!this.declaredProperties.add(propertyIdValue)) {
continue;
}
rdfWriter.writeTripleUriObject(Vocabulary.getPropertyUri(
propertyIdValue, PropertyContext.STATEMENT),
Vocabulary.RDF_TYPE, Vocabulary.OWL_OBJECT_PROPERTY);
rdfWriter.writeTripleUriObject(Vocabulary.getPropertyUri(
propertyIdValue, PropertyContext.VALUE),
Vocabulary.RDF_TYPE, Vocabulary.OWL_DATATYPE_PROPERTY);
rdfWriter.writeTripleUriObject(Vocabulary.getPropertyUri(
propertyIdValue, PropertyContext.QUALIFIER),
Vocabulary.RDF_TYPE, Vocabulary.OWL_DATATYPE_PROPERTY);
rdfWriter.writeTripleUriObject(Vocabulary.getPropertyUri(
propertyIdValue, PropertyContext.REFERENCE),
Vocabulary.RDF_TYPE, Vocabulary.OWL_DATATYPE_PROPERTY);
}
this.datatypePropertyQueue.clear();
}

public void writeValues(ValueRdfConverter valueRdfConverter)
throws RDFHandlerException {
Iterator<QuantityValue> quantitiyValueIterator = this.quantityValueQueue
.iterator();
for (Resource resource : this.quantityValueSubjectQueue) {
if (!this.declaredValues.add(resource)) {
continue;
}
QuantityValue quantityValue = quantitiyValueIterator.next();
valueRdfConverter.writeQuantityValue(quantityValue, resource);
}
Expand All @@ -114,6 +194,9 @@ public void writeValues(ValueRdfConverter valueRdfConverter)

Iterator<TimeValue> timeValueIterator = this.timeValueQueue.iterator();
for (Resource resource : this.timeValueSubjectQueue) {
if (!this.declaredValues.add(resource)) {
continue;
}
TimeValue timeValue = timeValueIterator.next();
valueRdfConverter.writeTimeValue(timeValue, resource);
}
Expand All @@ -123,6 +206,9 @@ public void writeValues(ValueRdfConverter valueRdfConverter)
Iterator<GlobeCoordinatesValue> globeCoordinatesValueIterator = this.coordinatesValueQueue
.iterator();
for (Resource resource : this.coordinatesValueSubjectQueue) {
if (!this.declaredValues.add(resource)) {
continue;
}
GlobeCoordinatesValue globeCoordinatesValue = globeCoordinatesValueIterator
.next();
valueRdfConverter.writeGlobeCoordinatesValue(globeCoordinatesValue,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public void writeItemDocument(ItemDocument document)
// TODO: add SiteLinks

this.rdfConversionBuffer.writeValues(this.valueRdfConverter);

this.rdfConversionBuffer.writePropertyDeclarations(this.writer);
}

public void writeSiteLinks(Map<String, SiteLink> siteLinks)
Expand Down Expand Up @@ -156,6 +156,9 @@ public void writePropertyDocument(PropertyDocument document)
writeDocumentTerms(document);

// TODO add datatype

this.rdfConversionBuffer.writeValues(this.valueRdfConverter);
this.rdfConversionBuffer.writePropertyDeclarations(this.writer);
}

void writeDocumentTerms(TermedDocument document) throws RDFHandlerException {
Expand All @@ -175,7 +178,7 @@ void writeTermTriples(String subjectUri, String predicateUri,
Collection<MonolingualTextValue> terms) throws RDFHandlerException {
for (MonolingualTextValue mtv : terms) {
this.writer.writeTripleValueObject(subjectUri, predicateUri,
mtv.accept(this.valueRdfConverter));
this.valueRdfConverter.getMonolingualTextValueLiteral(mtv));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,13 @@ public Value getRdfValueForWikidataValue(

@Override
public Value visit(DatatypeIdValue value) {
this.rdfConversionBuffer.addObjectProperty(this.currentPropertyIdValue);
return this.factory.createURI(value.getIri());
}

@Override
public Value visit(EntityIdValue value) {
this.rdfConversionBuffer.addObjectProperty(this.currentPropertyIdValue);
return this.factory.createURI(Vocabulary.getEntityUri(value));
}

Expand All @@ -165,13 +167,20 @@ public Value visit(GlobeCoordinatesValue value) {
URI valueUri = this.factory.createURI(Vocabulary.PREFIX_WIKIDATA
+ VALUE_PREFIX_GLOBECOORDS + hash);

this.rdfConversionBuffer.addObjectProperty(this.currentPropertyIdValue);
this.rdfConversionBuffer.addGlobeCoordinatesValue(value, valueUri);

return valueUri;
}

@Override
public Value visit(MonolingualTextValue value) {
this.rdfConversionBuffer
.addDatatypeProperty(this.currentPropertyIdValue);
return getMonolingualTextValueLiteral(value);
}

public Value getMonolingualTextValueLiteral(MonolingualTextValue value) {
String languageCode = WikimediaLanguageCodes.getLanguageCode(value
.getLanguageCode());
return factory.createLiteral(value.getText(), languageCode);
Expand All @@ -185,6 +194,7 @@ public Value visit(QuantityValue value) {
URI valueUri = this.factory.createURI(Vocabulary.PREFIX_WIKIDATA
+ VALUE_PREFIX_QUANTITY + hash);

this.rdfConversionBuffer.addObjectProperty(this.currentPropertyIdValue);
this.rdfConversionBuffer.addQuantityValue(value, valueUri);

return valueUri;
Expand All @@ -202,8 +212,12 @@ public Value visit(StringValue value) {

switch (datatype) {
case DatatypeIdValue.DT_STRING:
this.rdfConversionBuffer
.addDatatypeProperty(this.currentPropertyIdValue);
return factory.createLiteral(value.getString());
case DatatypeIdValue.DT_COMMONS_MEDIA:
this.rdfConversionBuffer
.addObjectProperty(this.currentPropertyIdValue);
// TODO use a smarter function to build those URLs
return factory.createURI("http://commons.wikimedia.org/wiki/File:"
+ value.getString().replace(' ', '_'));
Expand All @@ -220,6 +234,7 @@ public Value visit(TimeValue value) {
URI valueUri = this.factory.createURI(Vocabulary.PREFIX_WIKIDATA
+ VALUE_PREFIX_TIME + hash);

this.rdfConversionBuffer.addObjectProperty(this.currentPropertyIdValue);
this.rdfConversionBuffer.addTimeValue(value, valueUri);

return valueUri;
Expand Down

0 comments on commit 3a30478

Please sign in to comment.