Skip to content

Commit

Permalink
apacheGH-2473: Accept rdf:parseType="Statements"
Browse files Browse the repository at this point in the history
  • Loading branch information
afs committed May 17, 2024
1 parent 918b041 commit 8f5f4b3
Show file tree
Hide file tree
Showing 9 changed files with 115 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import javax.xml.namespace.QName;

import org.apache.jena.atlas.io.IndentedWriter;
import org.apache.jena.atlas.logging.Log;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.Node;
Expand All @@ -38,7 +37,6 @@
import org.apache.jena.irix.IRIs;
import org.apache.jena.irix.IRIx;
import org.apache.jena.riot.RiotException;
import org.apache.jena.riot.SysRIOT;
import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException;
import org.apache.jena.riot.out.NodeFmtLib;
import org.apache.jena.riot.system.FactoryRDF;
Expand Down Expand Up @@ -252,7 +250,10 @@ private static class Counter { int value = 1; }
/** Node holder for collection items. Holds the node for the last item added in the collection at this level. */
private static class NodeHolder { Node node = null; }

/** rdf:parseType for objects, with a default "Lexical" case */
/**
* rdf:parseType for objects, with a default "Lexical" case - see
* {@link #objectParseType} for alternative, non-standard names
*/
private enum ObjectParseType { Literal, Collection, Resource,
// This is a extra parseType to indicate the "no ParseType" case
// which is a plain lexical or nested resource.
Expand Down Expand Up @@ -1042,15 +1043,21 @@ private String xmlLang(Attributes attributes, Position position) {
return langStr;
}


private ObjectParseType objectParseType(String parseTypeStr, Position position) {
if ( parseTypeStr == null )
return ObjectParseType.Plain;
try {
String parseTypeName = parseTypeStr;
if ( parseTypeName.equals("literal") ) {
Log.warn(SysRIOT.getLogger(), "Encountered rdf:parseType='literal'. Treated as rdf:parseType='literal'");
parseTypeName = "Literal";
switch(parseTypeName) {
case "literal" -> {
RDFXMLparseWarning("Encountered rdf:parseType='literal'. Treated as rdf:parseType='literal'", position);
parseTypeName = "Literal";
}
// CIM (Common Information Model) - see github issue 2473
case "Statements" -> {
RDFXMLparseWarning("Encountered rdf:parseType='Statements'. Treated as rdf:parseType='literal'", position);
parseTypeName = "Literal";
}
}
return ObjectParseType.valueOf(parseTypeName);
} catch (IllegalArgumentException ex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

import org.apache.commons.lang3.StringUtils;
import org.apache.jena.atlas.io.IndentedWriter;
import org.apache.jena.atlas.logging.Log;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.Node;
Expand All @@ -40,7 +39,6 @@
import org.apache.jena.irix.IRIException;
import org.apache.jena.irix.IRIx;
import org.apache.jena.riot.RiotException;
import org.apache.jena.riot.SysRIOT;
import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException;
import org.apache.jena.riot.system.ErrorHandler;
import org.apache.jena.riot.system.ParserProfile;
Expand Down Expand Up @@ -200,10 +198,11 @@ private static class Counter { int value = 1; }
// whitespace characters inside elements. Skip it.
private static final QName xmlQNameSpace = new QName(XMLConstants.XML_NS_URI, "space");

private static final String parseTypeCollection = "Collection";
private static final String parseTypeLiteral = "Literal";
private static final String parseTypeLiteralAlt = "literal";
private static final String parseTypeResource = "Resource";
private static final String parseTypeCollection = "Collection";
private static final String parseTypeLiteral = "Literal";
private static final String parseTypeLiteralAlt = "literal";
private static final String parseTypeLiteralStmts = "Statements"; // CIM Github issue 2473
private static final String parseTypeResource = "Resource";
// This is a dummy parseType for when there is no given rdf:parseType.
private static final String parseTypePlain = "$$";

Expand Down Expand Up @@ -579,7 +578,7 @@ private void propertyElement(Node subject, StartElement startElt, Counter listEl
}

private XMLEvent propertyElementProcess(Node subject, StartElement startElt, Counter listElementCounter) {
Location location = startElt.getLocation();
final Location location = startElt.getLocation();
Node property;
if ( qNameMatches(rdfContainerItem, startElt.getName()) )
property = iriDirect(rdfNS+"_"+Integer.toString(listElementCounter.value++), location);
Expand Down Expand Up @@ -638,9 +637,15 @@ private XMLEvent propertyElementProcess(Node subject, StartElement startElt, Cou
}

String parseTypeName = parseType;
if ( parseTypeName.equals(parseTypeLiteralAlt) ) {
Log.warn(SysRIOT.getLogger(), "Encountered rdf:parseType='literal'. Treated as rdf:parseType='literal'");
parseTypeName = "Literal";
switch( parseTypeName) {
case parseTypeLiteralAlt -> {
RDFXMLparseWarning("Encountered rdf:parseType='literal'. Treated as rdf:parseType='literal'", location);
parseTypeName = "Literal";
}
case parseTypeLiteralStmts -> {
RDFXMLparseWarning("Encountered rdf:parseType='Statements'. Treated as rdf:parseType='literal'", location);
parseTypeName = "Literal";
}
}

switch(parseTypeName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@

import org.apache.commons.lang3.StringUtils;
import org.apache.jena.atlas.io.IndentedWriter;
import org.apache.jena.atlas.logging.Log;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.Node;
Expand All @@ -44,7 +43,6 @@
import org.apache.jena.irix.IRIException;
import org.apache.jena.irix.IRIx;
import org.apache.jena.riot.RiotException;
import org.apache.jena.riot.SysRIOT;
import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException;
import org.apache.jena.riot.system.ErrorHandler;
import org.apache.jena.riot.system.ParserProfile;
Expand Down Expand Up @@ -201,10 +199,11 @@ private static class Counter { int value = 1; }
// whitespace characters inside elements. Skip it.
private static final QName xmlQNameSpace = new QName(XMLConstants.XML_NS_URI, "space");

private static final String parseTypeCollection = "Collection";
private static final String parseTypeLiteral = "Literal";
private static final String parseTypeLiteralAlt = "literal";
private static final String parseTypeResource = "Resource";
private static final String parseTypeCollection = "Collection";
private static final String parseTypeLiteral = "Literal";
private static final String parseTypeLiteralAlt = "literal";
private static final String parseTypeLiteralStmts = "Statements"; // CIM Github issue 2473
private static final String parseTypeResource = "Resource";
// This is a dummy parseType for when there is no given rdf:parseType.
private static final String parseTypePlain = "$$";

Expand Down Expand Up @@ -608,16 +607,21 @@ private int propertyElementProcess(Node subject, QName qName,
// Must be an empty element.
int event = nextEventAny();
if ( ! lookingAt(event, END_ELEMENT) )
throw RDFXMLparseError("Expecting end element tag when using rdf:resource or rdf:NodeId on a proeprty.");
throw RDFXMLparseError("Expecting end element tag when using rdf:resource or rdf:NodeId on a property.");
return event;
}

String parseTypeName = parseType;
if ( parseTypeName.equals(parseTypeLiteralAlt) ) {
Log.warn(SysRIOT.getLogger(), "Encountered rdf:parseType='literal'. Treated as rdf:parseType='literal'");
parseTypeName = "Literal";
switch( parseTypeName) {
case parseTypeLiteralAlt -> {
RDFXMLparseWarning("Encountered rdf:parseType='literal'. Treated as rdf:parseType='literal'", location());
parseTypeName = "Literal";
}
case parseTypeLiteralStmts -> {
RDFXMLparseWarning("Encountered rdf:parseType='Statements'. Treated as rdf:parseType='literal'", location());
parseTypeName = "Literal";
}
}

switch(parseTypeName) {
case parseTypeResource -> {
// Implicit <rdf:Description><rdf:Description> i.e. fresh blank node
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,18 +328,19 @@ static void runTestExpectFailure(String testLabel,
parseFile(testSubjectFactory, actualErrorHandler, filename);
output.printf("## Expected RiotExpection : %-4s : %s : %s", subjectLabel, testLabel, filename);
});
checkErrorHandler(testLabel, actualErrorHandler, -1, 1, -1);
checkErrorHandler(testLabel, actualErrorHandler, -1, 1, 0);
}

/** Run a test expecting a warning.. */
static void runTestExpectWarning(String testLabel,
ReaderRIOTFactory testSubjectFactory, String subjectLabel,
int numWarnings,
String filename) {
ErrorHandlerCollector actualErrorHandler = new ErrorHandlerCollector();
LogCtl.withLevel(SysRIOT.getLogger(), "Error", ()->
parseFile(testSubjectFactory, actualErrorHandler, filename)
);
checkErrorHandler(testLabel, actualErrorHandler, 0, 0, 1);
checkErrorHandler(testLabel, actualErrorHandler, numWarnings, 0, 0);
}

/**
Expand Down Expand Up @@ -446,7 +447,7 @@ private static void checkErrorHandler(String testLabel, ErrorHandlerCollector er
/** Counts check of an error handler */
private static void checkErrorHandler(String testLabel, ErrorHandlerCollector errorHandler, int countWarnings, int countErrors, int countFatals) {
if ( countFatals >= 0 )
assertEquals("Fatal message counts different", countWarnings, errorHandler.fatals.size());
assertEquals("Fatal message counts different", countFatals, errorHandler.fatals.size());
if ( countErrors >= 0 )
assertEquals("Error message counts different", countErrors, errorHandler.errors.size());
if ( countWarnings >= 0 )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,19 @@ public TestRRX(String label, Lang lang) {
errorTest("error01.rdf");
}

@Test public void error02() {
@Test public void warn_literal() {
// Now valid. parseType="literal" -> parseType="Literal"
// because ARP behaved that way.
// Warning issued.
warningTest("warn01.rdf", 1);
}

@Test public void cim_statements01() {
// parseType="Statements"
// because ARP behaved that way.
//errorTest("error02.rdf");
// Warning issued.
warningTest("error02.rdf");
warningTest("cim_statements01.rdf", 2);
}

@Test public void noBase01() {
Expand Down Expand Up @@ -98,10 +105,10 @@ private void noBase(String filename) {
}
}

private void warningTest(String filename) {
private void warningTest(String filename, int warnings) {
ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang);
String fn = "testing/RIOT/rrx-files/"+filename;
RunTestRDFXML.runTestExpectWarning(filename, factory, label, fn);
RunTestRDFXML.runTestExpectWarning(filename, factory, label, warnings, fn);
}

private void errorTest(String filename) {
Expand Down
12 changes: 12 additions & 0 deletions jena-arq/testing/RIOT/rrx-files/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Extensions to RDF/XML


CIM - uses rdf:parseType="Statements" for rdf:parseType="Literal"
https://github.com/apache/jena/issues/2473
rdfxml-cim-1.rdf

Lower case "l" for parse type literal
A common mistake.
https://github.com/apache/jena/issues/2430
https://github.com/apache/jena/pull/2431/commits/a324fd4c1502c410fdb91c557ed2785795acbba3
rdfxml-literal-1.rdf
27 changes: 27 additions & 0 deletions jena-arq/testing/RIOT/rrx-files/cim_statements01.rdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?xml version='1.0'?>
<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 -->

<rdf:RDF
xmlns:cim="http://iec.ch/TC57/2014/CIM-schema-cim16#"
xmlns:dm="http://iec.ch/2002/schema/CIM_difference_model#"
xmlns:md="http://iec.ch/TC57/61970-552/ModelDescription/1#"
xmlns:meta="http://iec.ch/TC57/2014/CIM-schema-cim16#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://local/"
xmlns:ex="http://example/"
xml:base="http://base/">

<!-- parse type "Statements" -->
<dm:DifferenceModel rdf:about="#_248c809d-1d7b-397c-830f-6928007ae6d9">
<dm:forwardDifferences rdf:parseType="Statements">
<cim:A rdf:about="#_individual-A-1">
<cim:A-2-B rdf:resource="#_individual-B-1"/>
</cim:A>
<cim:B rdf:about="#_individual-B-1"/>
<cim:D rdf:about="#_individual-D-1"/>
</dm:forwardDifferences>
<dm:reverseDifferences rdf:parseType="Statements">
</dm:reverseDifferences>
</dm:DifferenceModel>

</rdf:RDF>
14 changes: 0 additions & 14 deletions jena-arq/testing/RIOT/rrx-files/error02.rdf

This file was deleted.

17 changes: 17 additions & 0 deletions jena-arq/testing/RIOT/rrx-files/warn01.rdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version='1.0'?>
<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 -->

<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://local/"
xmlns:ex="http://example/"
xml:base="http://base/">

<rdf:Description rdf:about="http://example.org/basket">
<!-- Lower case "literal - it should be uppercase "Literal" -->
<ex:xmlliteral rdf:parseType="literal">
<innerTag>Inner Tag</innerTag>
</ex:xmlliteral>
</rdf:Description>

</rdf:RDF>

0 comments on commit 8f5f4b3

Please sign in to comment.