From 753c66e8a6ed802a6a573fd52eff44c53d0405bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benno=20F=C3=BCnfst=C3=BCck?= Date: Fri, 30 Aug 2019 20:41:57 +0200 Subject: [PATCH 1/2] Expose RDFConverter building blocks to improve extensibility This patch factors code from RdfConverter into a new AbstractRdfConverter base class. The advantage of that approach is that extending the RDF export becomes easier: you can create your own subclass of the AbstractRdfConverter but still reuse all the `writeXXXX` parts to perform the RDF generation for property/item documents and their parts. The current RdfConverter now makes use of that to provide the previous interface, where control of the parts to export is guided by a set of simple flags. Using AbstractRdfConverter, much more complex filtering can be implemented by extending the class, for example exporting terms only for some entities. --- .../wdtk/rdf/AbstractRdfConverter.java | 420 +++++++++++++ .../org/wikidata/wdtk/rdf/RdfConverter.java | 590 ++++-------------- .../org/wikidata/wdtk/rdf/Vocabulary.java | 22 +- .../wikidata/wdtk/rdf/RdfConverterTest.java | 26 +- wdtk-rdf/src/test/resources/Statement.rdf | 2 +- wdtk-rdf/src/test/resources/StatementCplx.rdf | 2 +- .../src/test/resources/StatementNoValue.rdf | 2 +- .../resources/StatementRankTripleBest.rdf | 4 + 8 files changed, 570 insertions(+), 498 deletions(-) create mode 100644 wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/AbstractRdfConverter.java create mode 100644 wdtk-rdf/src/test/resources/StatementRankTripleBest.rdf diff --git a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/AbstractRdfConverter.java b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/AbstractRdfConverter.java new file mode 100644 index 000000000..a27a04814 --- /dev/null +++ b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/AbstractRdfConverter.java @@ -0,0 +1,420 @@ +package org.wikidata.wdtk.rdf; + +/* + * #%L + * Wikidata Toolkit RDF + * %% + * Copyright (C) 2014 Wikidata Toolkit Developers + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.rio.RDFHandlerException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.wikidata.wdtk.datamodel.interfaces.*; +import org.wikidata.wdtk.rdf.values.AnyValueConverter; + +/** + * This class provides functions to convert objects of wdtk-datamodel in a rdf + * graph. + * + * @author Michael Günther + * + */ +abstract public class AbstractRdfConverter { + + static final Logger logger = LoggerFactory.getLogger(AbstractRdfConverter.class); + + final RdfWriter rdfWriter; + final AnyValueConverter valueRdfConverter; + final SnakRdfConverter snakRdfConverter; + final OwlDeclarationBuffer owlDeclarationBuffer = new OwlDeclarationBuffer(); + final ReferenceRdfConverter referenceRdfConverter; + final PropertyRegister propertyRegister; + final Sites sites; + + public enum TermKind { + LABEL, + DESCRIPTION, + ALIAS + } + + public AbstractRdfConverter(RdfWriter rdfWriter, Sites sites, + PropertyRegister propertyRegister) { + this.sites = sites; + this.rdfWriter = rdfWriter; + this.propertyRegister = propertyRegister; + + this.valueRdfConverter = new AnyValueConverter(rdfWriter, + this.owlDeclarationBuffer, this.propertyRegister); + this.snakRdfConverter = new SnakRdfConverter(rdfWriter, + this.owlDeclarationBuffer, this.propertyRegister, + this.valueRdfConverter); + this.referenceRdfConverter = new ReferenceRdfConverter(rdfWriter, + this.snakRdfConverter, this.propertyRegister.siteUri); + } + + /** + * Writes OWL declarations for all basic vocabulary elements used in the + * dump. + * + * Example of the triples written by this method: + * {@code wikibase:propertyType rdf:type owl:ObjectProperty} + */ + public void writeBasicDeclarations() throws RDFHandlerException { + for (Map.Entry uriType : Vocabulary + .getKnownVocabularyTypes().entrySet()) { + this.rdfWriter.writeTripleUriObject(uriType.getKey(), + RdfWriter.RDF_TYPE, uriType.getValue()); + } + } + + /** + * Writes all namespace declarations used in the dump, for example {@code wikibase:} or {@code schema:}. + */ + public void writeNamespaceDeclarations() throws RDFHandlerException { + this.rdfWriter.writeNamespaceDeclaration("wd", + this.propertyRegister.getUriPrefix()); + this.rdfWriter + .writeNamespaceDeclaration("wikibase", Vocabulary.PREFIX_WBONTO); + this.rdfWriter.writeNamespaceDeclaration("rdf", Vocabulary.PREFIX_RDF); + this.rdfWriter + .writeNamespaceDeclaration("rdfs", Vocabulary.PREFIX_RDFS); + this.rdfWriter.writeNamespaceDeclaration("owl", Vocabulary.PREFIX_OWL); + this.rdfWriter.writeNamespaceDeclaration("xsd", Vocabulary.PREFIX_XSD); + this.rdfWriter.writeNamespaceDeclaration("schema", + Vocabulary.PREFIX_SCHEMA); + this.rdfWriter + .writeNamespaceDeclaration("skos", Vocabulary.PREFIX_SKOS); + this.rdfWriter + .writeNamespaceDeclaration("prov", Vocabulary.PREFIX_PROV); + } + + /** + * Writes all buffered triples and finishes writing a document. + * + * This will take care of writing auxiliary triples that got buffered during serialization, + * such as OWL declarations, references and auxiliary triples for complex values. + */ + public void finishDocument() throws RDFHandlerException { + this.snakRdfConverter.writeAuxiliaryTriples(); + this.writeOWLDeclarations(); + this.referenceRdfConverter.writeReferences(); + } + + public void writeOWLDeclarations() { + this.owlDeclarationBuffer.writePropertyDeclarations(this.rdfWriter, true, true); + } + + public void writeDocumentType(Resource subject, IRI type) { + this.rdfWriter.writeTripleUriObject(subject, RdfWriter.RDF_TYPE, type.toString()); + } + + public void writeItemDocument(ItemDocument document) + throws RDFHandlerException { + final String subjectUri = document.getEntityId().getIri(); + final Resource subject = this.rdfWriter.getUri(subjectUri); + + writeDocumentType(subject, RdfWriter.WB_ITEM); + writeDocumentTerms(document); + writeStatements(document); + writeSiteLinks(subject, document.getSiteLinks()); + + finishDocument(); + } + + public void writePropertyDatatype(PropertyDocument document) { + this.rdfWriter.writeTripleValueObject( + this.rdfWriter.getUri(document.getEntityId().getIri()), + RdfWriter.WB_PROPERTY_TYPE, + this.rdfWriter.getUri(document.getDatatype().getIri())); + } + + public void writePropertyDocument(PropertyDocument document) + throws RDFHandlerException { + + propertyRegister.setPropertyType(document.getEntityId(), document + .getDatatype().getIri()); + + final String subjectUri = document.getEntityId().getIri(); + final Resource subject = this.rdfWriter.getUri(subjectUri); + + writeDocumentType(subject, RdfWriter.WB_PROPERTY); + writePropertyDatatype(document); + writeDocumentTerms(document); + writeStatements(document); + writeInterPropertyLinks(document); + + finishDocument(); + } + + /** + * Writes triples which connect properties with their corresponding rdf + * properties for statements, simple statements, qualifiers, reference + * attributes and values. + */ + public void writeInterPropertyLinks(PropertyDocument document) + throws RDFHandlerException { + Resource subject = this.rdfWriter.getUri(document.getEntityId() + .getIri()); + this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter + .getUri(Vocabulary.WB_DIRECT_CLAIM_PROP), Vocabulary + .getPropertyUri(document.getEntityId(), + PropertyContext.DIRECT)); + + this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter + .getUri(Vocabulary.WB_CLAIM_PROP), Vocabulary.getPropertyUri( + document.getEntityId(), PropertyContext.STATEMENT)); + + this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter + .getUri(Vocabulary.WB_STATEMENT_PROP), Vocabulary + .getPropertyUri(document.getEntityId(), + PropertyContext.VALUE_SIMPLE)); + + this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter + .getUri(Vocabulary.WB_STATEMENT_VALUE_PROP), + Vocabulary.getPropertyUri(document.getEntityId(), + PropertyContext.VALUE)); + + this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter + .getUri(Vocabulary.WB_QUALIFIER_PROP), Vocabulary + .getPropertyUri(document.getEntityId(), + PropertyContext.QUALIFIER_SIMPLE)); + + this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter + .getUri(Vocabulary.WB_QUALIFIER_VALUE_PROP), Vocabulary + .getPropertyUri(document.getEntityId(), + PropertyContext.QUALIFIER)); + + this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter + .getUri(Vocabulary.WB_REFERENCE_PROP), Vocabulary + .getPropertyUri(document.getEntityId(), + PropertyContext.REFERENCE_SIMPLE)); + + this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter + .getUri(Vocabulary.WB_REFERENCE_VALUE_PROP), Vocabulary + .getPropertyUri(document.getEntityId(), + PropertyContext.REFERENCE)); + + this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter + .getUri(Vocabulary.WB_NO_VALUE_PROP), Vocabulary + .getPropertyUri(document.getEntityId(), + PropertyContext.NO_VALUE)); + this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter + .getUri(Vocabulary.WB_NO_QUALIFIER_VALUE_PROP), Vocabulary + .getPropertyUri(document.getEntityId(), + PropertyContext.NO_QUALIFIER_VALUE)); + // TODO something more with NO_VALUE + } + + public void writeDocumentTerms(TermedDocument document) + throws RDFHandlerException { + final Resource subject = this.rdfWriter.getUri(document.getEntityId().getIri()); + writeTermTriples(subject, TermKind.LABEL, document.getLabels().values()); + writeTermTriples(subject, TermKind.DESCRIPTION, document.getDescriptions().values()); + for (List aliases : document.getAliases().values()) { + writeTermTriples(subject, TermKind.ALIAS, aliases); + } + } + + public void writeTermTriples(Resource subject, TermKind kind, + Collection terms) throws RDFHandlerException { + final IRI predicate; + switch (kind) { + case LABEL: + predicate = RdfWriter.RDFS_LABEL; + break; + case DESCRIPTION: + predicate = RdfWriter.SCHEMA_DESCRIPTION; + break; + case ALIAS: + predicate = RdfWriter.SKOS_ALT_LABEL; + break; + default: + throw new IllegalArgumentException(); + } + for (MonolingualTextValue mtv : terms) { + this.rdfWriter.writeTripleValueObject(subject, predicate, + AbstractRdfConverter.getMonolingualTextValueLiteral(mtv, + this.rdfWriter)); + } + } + + public void writeStatements(StatementDocument statementDocument) + throws RDFHandlerException { + for (StatementGroup statementGroup : statementDocument.getStatementGroups()) { + // determine the rank of the best statement + final StatementGroup bestStatements = statementGroup.getBestStatements(); + final StatementRank bestRank; + if (statementGroup.getBestStatements() != null) { + bestRank = bestStatements.iterator().next().getRank(); + } else { + bestRank = null; + } + + for (Statement statement : statementGroup) { + writeStatement(statement, statement.getRank() == bestRank); + } + } + } + + public void writeStatement(Statement statement, boolean best) throws RDFHandlerException { + if (best) { + writeSimpleStatement(statement); + } + writeFullStatement(statement, best); + } + + public void writeFullStatement(Statement statement, boolean best) throws RDFHandlerException { + final Resource subject = this.rdfWriter.getUri(statement.getSubject().getIri()); + + String statementUri = Vocabulary.getStatementUri(statement); + Resource statementResource = this.rdfWriter.getUri(statementUri); + final IRI propertyIri = this.rdfWriter.getUri( + Vocabulary.getPropertyUri(statement.getMainSnak().getPropertyId(), PropertyContext.STATEMENT)); + + this.rdfWriter.writeTripleUriObject(subject, propertyIri, statementUri); + this.rdfWriter.writeTripleValueObject(statementResource, + RdfWriter.RDF_TYPE, RdfWriter.WB_STATEMENT); + writeClaim(statementResource, statement.getClaim()); + writeReferences(statementResource, statement.getReferences()); + writeStatementRankTriple(statementResource, statement.getRank(), best); + } + + public void writeSimpleStatement(Statement statement) { + final Resource subject = this.rdfWriter.getUri(statement.getSubject().getIri()); + + this.snakRdfConverter.setSnakContext(subject, PropertyContext.DIRECT); + statement.getMainSnak().accept(this.snakRdfConverter); + } + + /** + * Writes a triple for the {@link StatementRank} of a {@link Statement} to + * the dump. If this is a best-rank statement, also writes a best rank triple. + * + * @param subject The IRI of the statement + * @param rank The rank of the statement + * @param best True if this statement is a best-rank statement + */ + public void writeStatementRankTriple(Resource subject, StatementRank rank, boolean best) { + try { + this.rdfWriter.writeTripleUriObject(subject, RdfWriter.WB_RANK, + Vocabulary.getStatementRankUri(rank)); + if (best) { + this.rdfWriter.writeTripleUriObject(subject, RdfWriter.RDF_TYPE, Vocabulary.WB_BEST_RANK); + } + } catch (RDFHandlerException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + public void writeReferences(Resource statementResource, + List references) throws RDFHandlerException { + for (Reference reference : references) { + Resource resource = this.referenceRdfConverter + .addReference(reference); + this.rdfWriter.writeTripleValueObject(statementResource, + RdfWriter.PROV_WAS_DERIVED_FROM, resource); + } + } + + public void writeClaim(Resource claimResource, Claim claim) { + // write main snak + this.snakRdfConverter.setSnakContext(claimResource, + PropertyContext.VALUE); + claim.getMainSnak().accept(this.snakRdfConverter); + this.snakRdfConverter.setSnakContext(claimResource, + PropertyContext.VALUE_SIMPLE); + claim.getMainSnak().accept(this.snakRdfConverter); + // write qualifier + this.snakRdfConverter.setSnakContext(claimResource, + PropertyContext.QUALIFIER); + for (SnakGroup snakGroup : claim.getQualifiers()) { + for (Snak snak : snakGroup) { + snak.accept(this.snakRdfConverter); + } + } + this.snakRdfConverter.setSnakContext(claimResource, + PropertyContext.QUALIFIER_SIMPLE); + for (SnakGroup snakGroup : claim.getQualifiers()) { + for (Snak snak : snakGroup) { + snak.accept(this.snakRdfConverter); + } + } + } + + public void writeSiteLinks(Resource subject, Map siteLinks) + throws RDFHandlerException { + + for (String key : siteLinks.keySet()) { + SiteLink siteLink = siteLinks.get(key); + String siteLinkUrl = this.sites.getSiteLinkUrl(siteLink); + if (siteLinkUrl != null) { + IRI siteLinkUri = this.rdfWriter.getUri(siteLinkUrl); + + this.rdfWriter.writeTripleValueObject(siteLinkUri, + RdfWriter.RDF_TYPE, RdfWriter.SCHEMA_ARTICLE); + this.rdfWriter.writeTripleValueObject(siteLinkUri, + RdfWriter.SCHEMA_ABOUT, subject); + + String siteLanguageCode = this.sites.getLanguageCode(siteLink.getSiteKey()); + this.rdfWriter.writeTripleStringObject(siteLinkUri, + RdfWriter.SCHEMA_IN_LANGUAGE, convertSiteLanguageCode(siteLanguageCode)); + + for(ItemIdValue badge : siteLink.getBadges()) { + this.rdfWriter.writeTripleUriObject(siteLinkUri, + RdfWriter.WB_BADGE, badge.getIri()); + } + } else { + logger.warn("Failed to find URL for page \"" + + siteLink.getPageTitle() + "\" on site \"" + + siteLink.getSiteKey() + "\""); + } + } + } + + private String convertSiteLanguageCode(String languageCode) { + try { + return WikimediaLanguageCodes.getLanguageCode(languageCode); + } catch (IllegalArgumentException e) { + logger.warn("Unknown Wikimedia language code \"" + + languageCode + + "\". Using this code in RDF now, but this might be wrong."); + return languageCode; + } + } + + public static Value getMonolingualTextValueLiteral( + MonolingualTextValue value, RdfWriter rdfWriter) { + String languageCode; + try { + languageCode = WikimediaLanguageCodes.getLanguageCode(value + .getLanguageCode()); + } catch (IllegalArgumentException e) { + languageCode = value.getLanguageCode(); + logger.warn("Unknown Wikimedia language code \"" + + languageCode + + "\". Using this code in RDF now, but this might be wrong."); + } + return rdfWriter.getLiteral(value.getText(), languageCode); + } +} diff --git a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java index 333d89be4..1170d0a39 100644 --- a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java +++ b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java @@ -1,10 +1,10 @@ package org.wikidata.wdtk.rdf; -/* +/*- * #%L * Wikidata Toolkit RDF * %% - * Copyright (C) 2014 Wikidata Toolkit Developers + * Copyright (C) 2014 - 2019 Wikidata Toolkit Developers * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,488 +20,116 @@ * #L% */ -import java.util.Collection; -import java.util.List; -import java.util.Map; - import org.eclipse.rdf4j.model.Resource; -import org.eclipse.rdf4j.model.IRI; -import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.rio.RDFHandlerException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.wikidata.wdtk.datamodel.interfaces.*; -import org.wikidata.wdtk.rdf.values.AnyValueConverter; - -/** - * This class provides functions to convert objects of wdtk-datamodel in a rdf - * graph. - * - * @author Michael Günther - * - */ -public class RdfConverter { - - static final Logger logger = LoggerFactory.getLogger(RdfConverter.class); - - final RdfWriter rdfWriter; - final AnyValueConverter valueRdfConverter; - final SnakRdfConverter snakRdfConverter; - final OwlDeclarationBuffer owlDeclarationBuffer = new OwlDeclarationBuffer(); - final ReferenceRdfConverter referenceRdfConverter; - final PropertyRegister propertyRegister; - final Sites sites; - final RankBuffer rankBuffer = new RankBuffer(); - - int tasks = RdfSerializer.TASK_ALL_ENTITIES - | RdfSerializer.TASK_ALL_EXACT_DATA; - - public RdfConverter(RdfWriter rdfWriter, Sites sites, - PropertyRegister propertyRegister) { - this.sites = sites; - this.rdfWriter = rdfWriter; - this.propertyRegister = propertyRegister; - - this.valueRdfConverter = new AnyValueConverter(rdfWriter, - this.owlDeclarationBuffer, this.propertyRegister); - this.snakRdfConverter = new SnakRdfConverter(rdfWriter, - this.owlDeclarationBuffer, this.propertyRegister, - this.valueRdfConverter); - this.referenceRdfConverter = new ReferenceRdfConverter(rdfWriter, - this.snakRdfConverter, this.propertyRegister.siteUri); - } - - /** - * Sets the tasks that should be performed during export. The value should - * be a combination of flags such as {@link RdfSerializer#TASK_STATEMENTS}. - * - * @param tasks - * the tasks to be performed - */ - public void setTasks(int tasks) { - this.tasks = tasks; - } - - /** - * Returns the tasks that should be performed during export. The value - * should be a combination of flags such as - * {@link RdfSerializer#TASK_STATEMENTS}. - * - * @return tasks to be performed - */ - public int getTasks() { - return this.tasks; - } - - /** - * Writes OWL declarations for all basic vocabulary elements used in the - * dump. - * - * @throws RDFHandlerException - */ - public void writeBasicDeclarations() throws RDFHandlerException { - for (Map.Entry uriType : Vocabulary - .getKnownVocabularyTypes().entrySet()) { - this.rdfWriter.writeTripleUriObject(uriType.getKey(), - RdfWriter.RDF_TYPE, uriType.getValue()); - } - } - - public void writeNamespaceDeclarations() throws RDFHandlerException { - this.rdfWriter.writeNamespaceDeclaration("wd", - this.propertyRegister.getUriPrefix()); - this.rdfWriter - .writeNamespaceDeclaration("wikibase", Vocabulary.PREFIX_WBONTO); - this.rdfWriter.writeNamespaceDeclaration("rdf", Vocabulary.PREFIX_RDF); - this.rdfWriter - .writeNamespaceDeclaration("rdfs", Vocabulary.PREFIX_RDFS); - this.rdfWriter.writeNamespaceDeclaration("owl", Vocabulary.PREFIX_OWL); - this.rdfWriter.writeNamespaceDeclaration("xsd", Vocabulary.PREFIX_XSD); - this.rdfWriter.writeNamespaceDeclaration("schema", - Vocabulary.PREFIX_SCHEMA); - this.rdfWriter - .writeNamespaceDeclaration("skos", Vocabulary.PREFIX_SKOS); - this.rdfWriter - .writeNamespaceDeclaration("prov", Vocabulary.PREFIX_PROV); - } - - public void writeItemDocument(ItemDocument document) - throws RDFHandlerException { - - if (!hasTask(RdfSerializer.TASK_ITEMS)) { - return; - } - - String subjectUri = document.getEntityId().getIri(); // probably - // construct the - // URI from - // Vocabulary - Resource subject = this.rdfWriter.getUri(subjectUri); - - if ((this.tasks & (RdfSerializer.TASK_ALL_EXACT_DATA | RdfSerializer.TASK_SIMPLE_STATEMENTS)) != 0) { - this.rdfWriter.writeTripleValueObject(subject, RdfWriter.RDF_TYPE, - RdfWriter.WB_ITEM); - } - - writeDocumentTerms(subject, document); - - if (hasTask(RdfSerializer.TASK_SIMPLE_STATEMENTS)) { - writeSimpleStatements(subject, document); - } - - if (hasTask(RdfSerializer.TASK_STATEMENTS)) { - writeStatements(subject, document); - } - - writeSiteLinks(subject, document.getSiteLinks()); - - this.snakRdfConverter.writeAuxiliaryTriples(); - this.owlDeclarationBuffer.writePropertyDeclarations(this.rdfWriter, - hasTask(RdfSerializer.TASK_STATEMENTS), - hasTask(RdfSerializer.TASK_SIMPLE_STATEMENTS)); - this.referenceRdfConverter.writeReferences(); - } - - public void writePropertyDocument(PropertyDocument document) - throws RDFHandlerException { - - propertyRegister.setPropertyType(document.getEntityId(), document - .getDatatype().getIri()); - - if (!hasTask(RdfSerializer.TASK_PROPERTIES)) { - return; - } - - String propertyUri = document.getEntityId().getIri(); - Resource subject = this.rdfWriter.getUri(propertyUri); - - this.rdfWriter.writeTripleValueObject(subject, RdfWriter.RDF_TYPE, - RdfWriter.WB_PROPERTY); - - writeDocumentTerms(subject, document); - - if (hasTask(RdfSerializer.TASK_DATATYPES)) { - this.rdfWriter.writeTripleValueObject(subject, - RdfWriter.WB_PROPERTY_TYPE, - this.rdfWriter.getUri(document.getDatatype().getIri())); - } - - if (hasTask(RdfSerializer.TASK_STATEMENTS)) { - writeStatements(subject, document); - } - - if (hasTask(RdfSerializer.TASK_PROPERTY_LINKS)) { - writeInterPropertyLinks(document); - - } - - this.snakRdfConverter.writeAuxiliaryTriples(); - this.owlDeclarationBuffer.writePropertyDeclarations(this.rdfWriter, - hasTask(RdfSerializer.TASK_STATEMENTS), - hasTask(RdfSerializer.TASK_SIMPLE_STATEMENTS)); - this.referenceRdfConverter.writeReferences(); - } - - /** - * Writes triples which conect properties with there corresponding rdf - * properties for statements, simple statements, qualifiers, reference - * attributes and values. - * - * @param document - * @throws RDFHandlerException - */ - void writeInterPropertyLinks(PropertyDocument document) - throws RDFHandlerException { - Resource subject = this.rdfWriter.getUri(document.getEntityId() - .getIri()); - this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter - .getUri(Vocabulary.WB_DIRECT_CLAIM_PROP), Vocabulary - .getPropertyUri(document.getEntityId(), - PropertyContext.DIRECT)); - this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter - .getUri(Vocabulary.WB_CLAIM_PROP), Vocabulary.getPropertyUri( - document.getEntityId(), PropertyContext.STATEMENT)); - - this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter - .getUri(Vocabulary.WB_STATEMENT_PROP), Vocabulary - .getPropertyUri(document.getEntityId(), - PropertyContext.VALUE_SIMPLE)); - - this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter - .getUri(Vocabulary.WB_STATEMENT_VALUE_PROP), - Vocabulary.getPropertyUri(document.getEntityId(), - PropertyContext.VALUE)); - - this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter - .getUri(Vocabulary.WB_QUALIFIER_PROP), Vocabulary - .getPropertyUri(document.getEntityId(), - PropertyContext.QUALIFIER_SIMPLE)); - - this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter - .getUri(Vocabulary.WB_QUALIFIER_VALUE_PROP), Vocabulary - .getPropertyUri(document.getEntityId(), - PropertyContext.QUALIFIER)); - - this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter - .getUri(Vocabulary.WB_REFERENCE_PROP), Vocabulary - .getPropertyUri(document.getEntityId(), - PropertyContext.REFERENCE_SIMPLE)); - - this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter - .getUri(Vocabulary.WB_REFERENCE_VALUE_PROP), Vocabulary - .getPropertyUri(document.getEntityId(), - PropertyContext.REFERENCE)); - - this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter - .getUri(Vocabulary.WB_NO_VALUE_PROP), Vocabulary - .getPropertyUri(document.getEntityId(), - PropertyContext.NO_VALUE)); - this.rdfWriter.writeTripleUriObject(subject, this.rdfWriter - .getUri(Vocabulary.WB_NO_QUALIFIER_VALUE_PROP), Vocabulary - .getPropertyUri(document.getEntityId(), - PropertyContext.NO_QUALIFIER_VALUE)); - // TODO something more with NO_VALUE - } - - void writeStatements(Resource subject, StatementDocument statementDocument) - throws RDFHandlerException { - for (StatementGroup statementGroup : statementDocument - .getStatementGroups()) { - IRI property = this.rdfWriter.getUri(Vocabulary.getPropertyUri( - statementGroup.getProperty(), PropertyContext.STATEMENT)); - for (Statement statement : statementGroup) { - this.rdfWriter.writeTripleUriObject(subject, property, - Vocabulary.getStatementUri(statement)); - } - } - - for (StatementGroup statementGroup : statementDocument - .getStatementGroups()) { - for (Statement statement : statementGroup) { - writeStatement(statement); - } - writeBestRankTriples(); - } - } - - void writeSimpleStatements(Resource subject, - StatementDocument statementDocument) { - for (StatementGroup statementGroup : statementDocument - .getStatementGroups()) { - for (Statement statement : statementGroup) { - if (statement.getQualifiers().size() == 0) { - this.snakRdfConverter.setSnakContext(subject, - PropertyContext.DIRECT); - statement.getMainSnak() - .accept(this.snakRdfConverter); - } - } - } - } - - void writeDocumentTerms(Resource subject, TermedDocument document) - throws RDFHandlerException { - if (hasTask(RdfSerializer.TASK_LABELS)) { - writeTermTriples(subject, RdfWriter.RDFS_LABEL, document - .getLabels().values()); - } - if (hasTask(RdfSerializer.TASK_DESCRIPTIONS)) { - writeTermTriples(subject, RdfWriter.SCHEMA_DESCRIPTION, document - .getDescriptions().values()); - } - if (hasTask(RdfSerializer.TASK_ALIASES)) { - for (List aliases : document.getAliases() - .values()) { - writeTermTriples(subject, RdfWriter.SKOS_ALT_LABEL, aliases); - } - } - } - - void writeTermTriples(Resource subject, IRI predicate, - Collection terms) throws RDFHandlerException { - for (MonolingualTextValue mtv : terms) { - this.rdfWriter.writeTripleValueObject(subject, predicate, - RdfConverter.getMonolingualTextValueLiteral(mtv, - this.rdfWriter)); - } - } - - /** - * Writes a triple for the {@link StatementRank} of a {@link Statement} to - * the dump. - * - * @param subject - * @param rank - */ - void writeStatementRankTriple(Resource subject, StatementRank rank) { - try { - this.rdfWriter.writeTripleUriObject(subject, RdfWriter.WB_RANK, - getUriStringForRank(rank)); - this.rankBuffer.add(rank, subject); - - } catch (RDFHandlerException e) { - throw new RuntimeException(e.getMessage(), e); - } - } - - /** - * Writes triples to determine the statements with the highest rank. - */ - void writeBestRankTriples() { - for (Resource resource : this.rankBuffer.getBestRankedStatements()) { - try { - this.rdfWriter.writeTripleUriObject(resource, - RdfWriter.RDF_TYPE, RdfWriter.WB_BEST_RANK.toString()); - } catch (RDFHandlerException e) { - throw new RuntimeException(e.getMessage(), e); - } - } - this.rankBuffer.clear(); - } - - void writeStatement(Statement statement) throws RDFHandlerException { - String statementUri = Vocabulary.getStatementUri(statement); - Resource statementResource = this.rdfWriter.getUri(statementUri); - - this.rdfWriter.writeTripleValueObject(statementResource, - RdfWriter.RDF_TYPE, RdfWriter.WB_STATEMENT); - writeClaim(statementResource, statement.getClaim()); - - writeReferences(statementResource, statement.getReferences()); - - writeStatementRankTriple(statementResource, statement.getRank()); - - } - - void writeReferences(Resource statementResource, - List references) throws RDFHandlerException { - for (Reference reference : references) { - Resource resource = this.referenceRdfConverter - .addReference(reference); - this.rdfWriter.writeTripleValueObject(statementResource, - RdfWriter.PROV_WAS_DERIVED_FROM, resource); - } - } - - void writeClaim(Resource claimResource, Claim claim) { - // write main snak - this.snakRdfConverter.setSnakContext(claimResource, - PropertyContext.VALUE); - claim.getMainSnak().accept(this.snakRdfConverter); - this.snakRdfConverter.setSnakContext(claimResource, - PropertyContext.VALUE_SIMPLE); - claim.getMainSnak().accept(this.snakRdfConverter); - // write qualifier - this.snakRdfConverter.setSnakContext(claimResource, - PropertyContext.QUALIFIER); - for (SnakGroup snakGroup : claim.getQualifiers()) { - for (Snak snak : snakGroup) { - snak.accept(this.snakRdfConverter); - } - } - this.snakRdfConverter.setSnakContext(claimResource, - PropertyContext.QUALIFIER_SIMPLE); - for (SnakGroup snakGroup : claim.getQualifiers()) { - for (Snak snak : snakGroup) { - snak.accept(this.snakRdfConverter); - } - } - } - - void writeSiteLinks(Resource subject, Map siteLinks) - throws RDFHandlerException { - - if (!hasTask(RdfSerializer.TASK_SITELINKS)) { - return; - } - - for (String key : siteLinks.keySet()) { - SiteLink siteLink = siteLinks.get(key); - String siteLinkUrl = this.sites.getSiteLinkUrl(siteLink); - if (siteLinkUrl != null) { - IRI siteLinkUri = this.rdfWriter.getUri(siteLinkUrl); - - this.rdfWriter.writeTripleValueObject(siteLinkUri, - RdfWriter.RDF_TYPE, RdfWriter.SCHEMA_ARTICLE); - this.rdfWriter.writeTripleValueObject(siteLinkUri, - RdfWriter.SCHEMA_ABOUT, subject); - - String siteLanguageCode = this.sites.getLanguageCode(siteLink.getSiteKey()); - this.rdfWriter.writeTripleStringObject(siteLinkUri, - RdfWriter.SCHEMA_IN_LANGUAGE, convertSiteLanguageCode(siteLanguageCode)); - - for(ItemIdValue badge : siteLink.getBadges()) { - this.rdfWriter.writeTripleUriObject(siteLinkUri, - RdfWriter.WB_BADGE, badge.getIri()); - } - } else { - logger.warn("Failed to find URL for page \"" - + siteLink.getPageTitle() + "\" on site \"" - + siteLink.getSiteKey() + "\""); - } - } - } - - private String convertSiteLanguageCode(String languageCode) { - try { - return WikimediaLanguageCodes.getLanguageCode(languageCode); - } catch (IllegalArgumentException e) { - logger.warn("Unknown Wikimedia language code \"" - + languageCode - + "\". Using this code in RDF now, but this might be wrong."); - return languageCode; - } - } - - /** - * - * @param value - * @return - */ - public static Value getMonolingualTextValueLiteral( - MonolingualTextValue value, RdfWriter rdfWriter) { - String languageCode; - try { - languageCode = WikimediaLanguageCodes.getLanguageCode(value - .getLanguageCode()); - } catch (IllegalArgumentException e) { - languageCode = value.getLanguageCode(); - logger.warn("Unknown Wikimedia language code \"" - + languageCode - + "\". Using this code in RDF now, but this might be wrong."); - } - return rdfWriter.getLiteral(value.getText(), languageCode); - } - - /** - * Checks if the given task (or set of tasks) is to be performed. - * - * @param task - * the task (or set of tasks) to be checked - * @return true if the tasks include the given task - */ - boolean hasTask(int task) { - return ((this.tasks & task) == task); - } - - /** - * Returns an URI which represents the statement rank in a triple. - * - * @param rank - * @return - */ - String getUriStringForRank(StatementRank rank) { - switch (rank) { - case NORMAL: - return Vocabulary.WB_NORMAL_RANK; - case PREFERRED: - return Vocabulary.WB_PREFERRED_RANK; - case DEPRECATED: - return Vocabulary.WB_DEPRECATED_RANK; - default: - throw new IllegalArgumentException(); - } - } +import java.util.Collection; +import java.util.Map; +public class RdfConverter extends AbstractRdfConverter { + int tasks = RdfSerializer.TASK_ALL_ENTITIES + | RdfSerializer.TASK_ALL_EXACT_DATA; + + public RdfConverter(RdfWriter rdfWriter, Sites sites, PropertyRegister propertyRegister) { + super(rdfWriter, sites, propertyRegister); + } + + /** + * Sets the tasks that should be performed during export. The value should + * be a combination of flags such as {@link RdfSerializer#TASK_STATEMENTS}. + * + * @param tasks + * the tasks to be performed + */ + public void setTasks(int tasks) { + this.tasks = tasks; + } + + /** + * Returns the tasks that should be performed during export. The value + * should be a combination of flags such as + * {@link RdfSerializer#TASK_STATEMENTS}. + * + * @return tasks to be performed + */ + public int getTasks() { + return this.tasks; + } + + /** + * Checks if the given task (or set of tasks) is to be performed. + * + * @param task + * the task (or set of tasks) to be checked + * @return true if the tasks include the given task + */ + boolean hasTask(int task) { + return ((this.tasks & task) == task); + } + + @Override + public void writeTermTriples(Resource subject, TermKind kind, Collection terms) throws RDFHandlerException { + switch (kind) { + case LABEL: + if (!hasTask(RdfSerializer.TASK_LABELS)) return; + break; + case DESCRIPTION: + if (!hasTask(RdfSerializer.TASK_DESCRIPTIONS)) return; + break; + case ALIAS: + if (!hasTask(RdfSerializer.TASK_ALIASES)) return; + break; + } + super.writeTermTriples(subject, kind, terms); + } + + @Override + public void writeSiteLinks(Resource subject, Map siteLinks) throws RDFHandlerException { + if (!hasTask(RdfSerializer.TASK_SITELINKS)) return; + super.writeSiteLinks(subject, siteLinks); + } + + @Override + public void writePropertyDatatype(PropertyDocument document) { + if (!hasTask(RdfSerializer.TASK_DATATYPES)) return; + super.writePropertyDatatype(document); + } + + @Override + public void writeInterPropertyLinks(PropertyDocument document) throws RDFHandlerException { + if (!hasTask(RdfSerializer.TASK_PROPERTY_LINKS)) return; + super.writeInterPropertyLinks(document); + } + + @Override + public void writeSimpleStatement(Statement statement) { + if (!hasTask(RdfSerializer.TASK_SIMPLE_STATEMENTS)) return; + super.writeSimpleStatement(statement); + } + + @Override + public void writeFullStatement(Statement statement, boolean best) throws RDFHandlerException { + if (!hasTask(RdfSerializer.TASK_STATEMENTS)) return; + super.writeFullStatement(statement, best); + } + + @Override + public void writeItemDocument(ItemDocument document) throws RDFHandlerException { + if (!hasTask(RdfSerializer.TASK_ITEMS)) return; + super.writeItemDocument(document); + } + + @Override + public void writePropertyDocument(PropertyDocument document) throws RDFHandlerException { + if (!hasTask(RdfSerializer.TASK_PROPERTIES)) return; + super.writePropertyDocument(document); + } + + @Override + public void writeOWLDeclarations() { + this.owlDeclarationBuffer.writePropertyDeclarations(this.rdfWriter, + this.hasTask(RdfSerializer.TASK_STATEMENTS), + this.hasTask(RdfSerializer.TASK_SIMPLE_STATEMENTS)); + } } diff --git a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/Vocabulary.java b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/Vocabulary.java index 2e3caf68c..6538148a8 100644 --- a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/Vocabulary.java +++ b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/Vocabulary.java @@ -28,14 +28,7 @@ import java.util.HashMap; import java.util.Map; -import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue; -import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; -import org.wikidata.wdtk.datamodel.interfaces.QuantityValue; -import org.wikidata.wdtk.datamodel.interfaces.Reference; -import org.wikidata.wdtk.datamodel.interfaces.Snak; -import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; -import org.wikidata.wdtk.datamodel.interfaces.Statement; -import org.wikidata.wdtk.datamodel.interfaces.TimeValue; +import org.wikidata.wdtk.datamodel.interfaces.*; /** * This class contains static methods and constants that define the various OWL @@ -596,6 +589,19 @@ public static String getQuantityValueUri(QuantityValue value) { return PREFIX_WIKIDATA_VALUE + bytesToHex(md.digest()); } + public static String getStatementRankUri(StatementRank rank) { + switch (rank) { + case NORMAL: + return Vocabulary.WB_NORMAL_RANK; + case PREFERRED: + return Vocabulary.WB_PREFERRED_RANK; + case DEPRECATED: + return Vocabulary.WB_DEPRECATED_RANK; + default: + throw new IllegalArgumentException(); + } + } + static ByteBuffer longByteBuffer = ByteBuffer.allocate(Long.SIZE / 8); static void updateMessageDigestWithLong(MessageDigest md, long x) { diff --git a/wdtk-rdf/src/test/java/org/wikidata/wdtk/rdf/RdfConverterTest.java b/wdtk-rdf/src/test/java/org/wikidata/wdtk/rdf/RdfConverterTest.java index 730397413..e652fb36d 100644 --- a/wdtk-rdf/src/test/java/org/wikidata/wdtk/rdf/RdfConverterTest.java +++ b/wdtk-rdf/src/test/java/org/wikidata/wdtk/rdf/RdfConverterTest.java @@ -126,18 +126,31 @@ public void testWriteStatementRankTriple() throws RDFHandlerException, StatementRank rank = StatementRank.DEPRECATED; Resource subject = this.rdfFactory .createIRI("http://www.wikidata.org/Q10Snone"); - this.rdfConverter.writeStatementRankTriple(subject, rank); + this.rdfConverter.writeStatementRankTriple(subject, rank, false); this.rdfWriter.finish(); Model model = RdfTestHelpers.parseRdf(this.out.toString()); assertEquals(RdfTestHelpers.parseRdf(RdfTestHelpers .getResourceFromFile("StatementRankTriple.rdf")), model); } + @Test + public void testWriteStatementRankTripleBest() throws RDFHandlerException, + RDFParseException, IOException { + StatementRank rank = StatementRank.NORMAL; + Resource subject = this.rdfFactory + .createIRI("http://www.wikidata.org/Q10Snone"); + this.rdfConverter.writeStatementRankTriple(subject, rank, true); + this.rdfWriter.finish(); + Model model = RdfTestHelpers.parseRdf(this.out.toString()); + assertEquals(RdfTestHelpers.parseRdf(RdfTestHelpers + .getResourceFromFile("StatementRankTripleBest.rdf")), model); + } + @Test public void testStatementSimpleValue() throws RDFHandlerException, RDFParseException, IOException { Statement statement = objectFactory.createStatement("Q100", "P227"); - this.rdfConverter.writeStatement(statement); + this.rdfConverter.writeFullStatement(statement, false); this.rdfWriter.finish(); Model model = RdfTestHelpers.parseRdf(this.out.toString()); assertEquals(model, RdfTestHelpers.parseRdf(RdfTestHelpers @@ -153,7 +166,7 @@ public void testStatementComplexValue() throws RDFHandlerException, Statement statement = StatementBuilder .forSubjectAndProperty(ItemIdValue.NULL, PropertyIdValue.NULL) .withValue(value).build(); - this.rdfConverter.writeStatement(statement); + this.rdfConverter.writeFullStatement(statement, false); this.rdfWriter.finish(); Model model = RdfTestHelpers.parseRdf(this.out.toString()); assertEquals(model, RdfTestHelpers.parseRdf(RdfTestHelpers @@ -167,7 +180,7 @@ public void testStatementNoValue() throws RDFHandlerException, Statement statement = StatementBuilder .forSubjectAndProperty(ItemIdValue.NULL, pid) .withNoValue().build(); - this.rdfConverter.writeStatement(statement); + this.rdfConverter.writeFullStatement(statement, false); this.rdfWriter.finish(); Model model = RdfTestHelpers.parseRdf(this.out.toString()); assertEquals(model, RdfTestHelpers.parseRdf(RdfTestHelpers @@ -316,12 +329,13 @@ private PropertyDocument createWrongTestPropertyDocument() { public void testWriteSimpleStatements() throws RDFHandlerException, RDFParseException, IOException { ItemDocument document = createTestItemDocument(); - this.rdfConverter.writeSimpleStatements(resource, document); + this.rdfConverter.setTasks(RdfSerializer.TASK_SIMPLE_STATEMENTS); + this.rdfConverter.writeStatements(document); this.rdfWriter.finish(); Model model = RdfTestHelpers.parseRdf(this.out.toString()); assertEquals( RdfTestHelpers - .parseRdf("\n ;\n" + .parseRdf("\n ;\n" + " .\n"), model); } diff --git a/wdtk-rdf/src/test/resources/Statement.rdf b/wdtk-rdf/src/test/resources/Statement.rdf index 23eadbc03..856b781ad 100644 --- a/wdtk-rdf/src/test/resources/Statement.rdf +++ b/wdtk-rdf/src/test/resources/Statement.rdf @@ -1,4 +1,4 @@ - + . a ; "TestString" ; ; diff --git a/wdtk-rdf/src/test/resources/StatementCplx.rdf b/wdtk-rdf/src/test/resources/StatementCplx.rdf index b0cf3fb3c..c637f09cd 100644 --- a/wdtk-rdf/src/test/resources/StatementCplx.rdf +++ b/wdtk-rdf/src/test/resources/StatementCplx.rdf @@ -1,4 +1,4 @@ - + . a ; ; "Point(51.0 13.0)"^^ ; diff --git a/wdtk-rdf/src/test/resources/StatementNoValue.rdf b/wdtk-rdf/src/test/resources/StatementNoValue.rdf index f8214964a..301d11486 100644 --- a/wdtk-rdf/src/test/resources/StatementNoValue.rdf +++ b/wdtk-rdf/src/test/resources/StatementNoValue.rdf @@ -1,3 +1,3 @@ - + . a , ; . diff --git a/wdtk-rdf/src/test/resources/StatementRankTripleBest.rdf b/wdtk-rdf/src/test/resources/StatementRankTripleBest.rdf new file mode 100644 index 000000000..b8eb3996a --- /dev/null +++ b/wdtk-rdf/src/test/resources/StatementRankTripleBest.rdf @@ -0,0 +1,4 @@ + + . + + . \ No newline at end of file From 03a824258efc0a043cf91c18c4b6d92363b50125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benno=20F=C3=BCnfst=C3=BCck?= Date: Wed, 4 Sep 2019 11:26:22 +0200 Subject: [PATCH 2/2] Export direct statements only if there are no qualifiers This got accidently changed in the previous refactoring. Restore the previous behaviour for now. --- .../src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java index 1170d0a39..397a595c7 100644 --- a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java +++ b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/RdfConverter.java @@ -105,7 +105,9 @@ public void writeInterPropertyLinks(PropertyDocument document) throws RDFHandler @Override public void writeSimpleStatement(Statement statement) { if (!hasTask(RdfSerializer.TASK_SIMPLE_STATEMENTS)) return; - super.writeSimpleStatement(statement); + if (statement.getQualifiers().size() == 0) { + super.writeSimpleStatement(statement); + } } @Override