From 3c89e23b8cda5ab4b73141607900d82f72edc075 Mon Sep 17 00:00:00 2001 From: Andy Seaborne Date: Thu, 9 Feb 2017 13:00:58 +0000 Subject: [PATCH] JENA-1288: Remove use of Xerces when alternatives exist. --- .../java/org/apache/jena/riot/SysRIOT.java | 5 +- .../jena/riot/checker/CheckerLiterals.java | 82 ++++--------------- .../jena/datatypes/xsd/XSDbase64Binary.java | 5 +- .../jena/datatypes/xsd/XSDhexBinary.java | 5 +- .../xsd/impl/XSDBaseNumericType.java | 2 +- .../reasoner/rulesys/builtins/MakeSkolem.java | 7 +- .../jena/graph/test/TestTypedLiterals.java | 5 +- 7 files changed, 35 insertions(+), 76 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/SysRIOT.java b/jena-arq/src/main/java/org/apache/jena/riot/SysRIOT.java index 33c5b52a19e..e0d892421de 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/SysRIOT.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/SysRIOT.java @@ -30,7 +30,10 @@ public class SysRIOT public static final String riotLoggerName = "org.apache.jena.riot" ; private static Logger riotLogger = LoggerFactory.getLogger(riotLoggerName) ; + /** @deprecated Do not use - lexicial forms are always strict. */ + @Deprecated public static boolean StrictXSDLexicialForms = false ; + public static boolean strictMode = false ; /** Some people argue that absolute URIs should not be normalized. @@ -47,7 +50,7 @@ public class SysRIOT public static void setStrictMode(boolean state) { SysRIOT.strictMode = state ; - SysRIOT.StrictXSDLexicialForms = state ; + //SysRIOT.StrictXSDLexicialForms = state ; //SysRIOT.AbsURINoNormalization = state ; } diff --git a/jena-arq/src/main/java/org/apache/jena/riot/checker/CheckerLiterals.java b/jena-arq/src/main/java/org/apache/jena/riot/checker/CheckerLiterals.java index 426c3108c71..8fc59b61e38 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/checker/CheckerLiterals.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/checker/CheckerLiterals.java @@ -23,19 +23,10 @@ import org.apache.jena.JenaRuntime ; import org.apache.jena.datatypes.RDFDatatype ; -import org.apache.jena.datatypes.xsd.impl.XSDAbstractDateTimeType ; -import org.apache.jena.datatypes.xsd.impl.XSDBaseNumericType ; -import org.apache.jena.datatypes.xsd.impl.XSDDouble ; -import org.apache.jena.datatypes.xsd.impl.XSDFloat ; import org.apache.jena.graph.Node ; -import org.apache.jena.riot.SysRIOT ; import org.apache.jena.riot.system.ErrorHandler ; import org.apache.jena.sparql.graph.NodeConst ; -import org.apache.xerces.impl.dv.InvalidDatatypeValueException ; -import org.apache.xerces.impl.dv.ValidatedInfo ; -import org.apache.xerces.impl.dv.ValidationContext ; -import org.apache.xerces.impl.dv.XSSimpleType ; -import org.apache.xerces.impl.validation.ValidationState ; +import org.apache.jena.util.SplitIRI; public class CheckerLiterals implements NodeChecker { // A flag to enable the test suite to read bad data. @@ -105,80 +96,41 @@ public static boolean checkLiteral(String lexicalForm, String lang, RDFDatatype } return true ; } + + // Whitespace. + // XSD allows whitespace before and after the lexical forms of a literal but not insiode. + // Jena handles this correctly. protected static boolean validateByDatatype(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) { - if ( SysRIOT.StrictXSDLexicialForms ) { - if ( datatype instanceof XSDBaseNumericType || datatype instanceof XSDFloat - || datatype instanceof XSDDouble ) - return validateByDatatypeNumeric(lexicalForm, datatype, handler, line, col) ; - if ( datatype instanceof XSDAbstractDateTimeType ) - return validateByDatatypeDateTime(lexicalForm, datatype, handler, line, col) ; - } +// if ( SysRIOT.StrictXSDLexicialForms ) +// checkWhitespace(lexicalForm, datatype, handler, line, col); return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ; } - protected static boolean validateByDatatypeJena(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, - long line, long col) { + protected static boolean validateByDatatypeJena(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) { if ( datatype.isValid(lexicalForm) ) return true ; - handler.warning("Lexical form '" + lexicalForm + "' not valid for datatype " + datatype.getURI(), line, col) ; + handler.warning("Lexical form '" + lexicalForm + "' not valid for datatype " + xsdDatatypeName(datatype), line, col) ; return false ; } - protected static boolean validateByDatatypeDateTime(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) { + protected static boolean checkWhitespace(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) { if ( lexicalForm.contains(" ") ) { - handler.warning("Whitespace in XSD date or time literal: '" + lexicalForm + "'", line, col) ; + handler.warning("Whitespace in "+xsdDatatypeName(datatype)+" literal: '" + lexicalForm + "'", line, col) ; return false ; } if ( lexicalForm.contains("\n") ) { - handler.warning("Newline in XSD date or time literal: '" + lexicalForm + "'", line, col) ; + handler.warning("Newline in "+xsdDatatypeName(datatype)+" literal: '" + lexicalForm + "'", line, col) ; return false ; } if ( lexicalForm.contains("\r") ) { - handler.warning("Newline in XSD date or time literal: '" + lexicalForm + "'", line, col) ; + handler.warning("Newline in "+xsdDatatypeName(datatype)+" literal: '" + lexicalForm + "'", line, col) ; return false ; } - // if ( ! StrictXSDLexicialForms ) - // Jena is already strict. - return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ; + return true ; } - - protected static boolean validateByDatatypeNumeric(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) { - // Do a white space check as well for numerics. - if ( lexicalForm.contains(" ") ) { - handler.warning("Whitespace in numeric XSD literal: '" + lexicalForm + "'", line, col) ; - return false ; - } - if ( lexicalForm.contains("\n") ) { - handler.warning("Newline in numeric XSD literal: '" + lexicalForm + "'", line, col) ; - return false ; - } - if ( lexicalForm.contains("\r") ) { - handler.warning("Carriage return in numeric XSD literal: '" + lexicalForm + "'", line, col) ; - return false ; - } - -// if ( lit.getDatatype() instanceof XSDAbstractDateTimeType ) -// { -// // Do a white space check as well for numerics. -// if ( lex.contains(" ") ) { handler.warning("Whitespace in XSD date or time literal: "+node, line, col) ; return false ; } -// if ( lex.contains("\n") ) { handler.warning("Newline in XSD date or time literal: "+node, line, col) ; return false ; } -// if ( lex.contains("\r") ) { handler.warning("Newline in XSD date or time literal: "+node, line, col) ; return false ; } -// } -// - if ( ! SysRIOT.StrictXSDLexicialForms ) - return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ; - - // From Jena 2.6.3, XSDDatatype.parse - XSSimpleType typeDeclaration = (XSSimpleType)datatype.extendedTypeDefinition() ; - try { - ValidationContext context = new ValidationState(); - ValidatedInfo resultInfo = new ValidatedInfo(); - Object result = typeDeclaration.validate(lexicalForm, context, resultInfo); - return true ; - } catch (InvalidDatatypeValueException e) { - handler.warning("Lexical form '"+lexicalForm+"' not valid for datatype "+datatype.getURI(), line, col) ; - return false ; - } + + private static String xsdDatatypeName(RDFDatatype datatype) { + return "XSD "+SplitIRI.localname(datatype.getURI()); } } diff --git a/jena-core/src/main/java/org/apache/jena/datatypes/xsd/XSDbase64Binary.java b/jena-core/src/main/java/org/apache/jena/datatypes/xsd/XSDbase64Binary.java index 19bb5e8a719..78a1541a3d4 100644 --- a/jena-core/src/main/java/org/apache/jena/datatypes/xsd/XSDbase64Binary.java +++ b/jena-core/src/main/java/org/apache/jena/datatypes/xsd/XSDbase64Binary.java @@ -18,8 +18,9 @@ package org.apache.jena.datatypes.xsd; +import javax.xml.bind.DatatypeConverter; + import org.apache.jena.datatypes.DatatypeFormatException ; -import org.apache.xerces.impl.dv.util.Base64 ; /** * Implement base64binary type. Most of the work is done in the superclass. @@ -38,7 +39,7 @@ public XSDbase64Binary(String typeName) { @Override public String unparse(Object value) { if (value instanceof byte[]) { - return Base64.encode((byte[])value); + return DatatypeConverter.printBase64Binary((byte[])value); } else { throw new DatatypeFormatException("base64 asked to encode an unwrapped byte array"); } diff --git a/jena-core/src/main/java/org/apache/jena/datatypes/xsd/XSDhexBinary.java b/jena-core/src/main/java/org/apache/jena/datatypes/xsd/XSDhexBinary.java index e289ec917e5..7ca77815db7 100644 --- a/jena-core/src/main/java/org/apache/jena/datatypes/xsd/XSDhexBinary.java +++ b/jena-core/src/main/java/org/apache/jena/datatypes/xsd/XSDhexBinary.java @@ -18,8 +18,9 @@ package org.apache.jena.datatypes.xsd; +import javax.xml.bind.DatatypeConverter; + import org.apache.jena.datatypes.DatatypeFormatException ; -import org.apache.xerces.impl.dv.util.HexBin ; /** * Implement hexbinary type. Most of the work is done in the superclass. @@ -38,7 +39,7 @@ public XSDhexBinary(String typeName) { @Override public String unparse(Object value) { if (value instanceof byte[]) { - return HexBin.encode((byte[])value); + return DatatypeConverter.printHexBinary((byte[])value); } else { throw new DatatypeFormatException("hexBinary asked to encode a non-byte arrary"); } diff --git a/jena-core/src/main/java/org/apache/jena/datatypes/xsd/impl/XSDBaseNumericType.java b/jena-core/src/main/java/org/apache/jena/datatypes/xsd/impl/XSDBaseNumericType.java index abbab9d3f1c..22ba236a1de 100644 --- a/jena-core/src/main/java/org/apache/jena/datatypes/xsd/impl/XSDBaseNumericType.java +++ b/jena-core/src/main/java/org/apache/jena/datatypes/xsd/impl/XSDBaseNumericType.java @@ -57,7 +57,7 @@ public XSDBaseNumericType(String typeName, Class javaClass) { /** * Test whether the given LiteralLabel is a valid instance - * of this datatype. This takes into accound typing information + * of this datatype. This takes into account typing information * as well as lexical form - for example an xsd:string is * never considered valid as an xsd:integer (even if it is * lexically legal like "1"). diff --git a/jena-core/src/main/java/org/apache/jena/reasoner/rulesys/builtins/MakeSkolem.java b/jena-core/src/main/java/org/apache/jena/reasoner/rulesys/builtins/MakeSkolem.java index b5aedb99537..01491d50586 100644 --- a/jena-core/src/main/java/org/apache/jena/reasoner/rulesys/builtins/MakeSkolem.java +++ b/jena-core/src/main/java/org/apache/jena/reasoner/rulesys/builtins/MakeSkolem.java @@ -21,11 +21,12 @@ import java.security.MessageDigest ; import java.security.NoSuchAlgorithmException ; +import javax.xml.bind.DatatypeConverter; + import org.apache.jena.graph.Node ; import org.apache.jena.graph.NodeFactory ; import org.apache.jena.reasoner.rulesys.RuleContext ; import org.apache.jena.shared.JenaException ; -import org.apache.xerces.impl.dv.util.Base64 ; /** * Bind a blank node to the first argument. @@ -75,9 +76,9 @@ public boolean bodyCall(Node[] args, int length, RuleContext context) { MessageDigest digester = MessageDigest.getInstance("MD5"); digester.reset(); byte[] digest = digester.digest(key.toString().getBytes()); - Node skolem = NodeFactory.createBlankNode(Base64.encode(digest)); + String label = DatatypeConverter.printBase64Binary(digest); + Node skolem = NodeFactory.createBlankNode(label); return context.getEnv().bind(args[0], skolem); - } catch (NoSuchAlgorithmException e) { throw new JenaException(e); } diff --git a/jena-core/src/test/java/org/apache/jena/graph/test/TestTypedLiterals.java b/jena-core/src/test/java/org/apache/jena/graph/test/TestTypedLiterals.java index 7f5b6744e96..11c137cb2e9 100644 --- a/jena-core/src/test/java/org/apache/jena/graph/test/TestTypedLiterals.java +++ b/jena-core/src/test/java/org/apache/jena/graph/test/TestTypedLiterals.java @@ -25,6 +25,8 @@ import java.text.SimpleDateFormat ; import java.util.* ; +import javax.xml.bind.DatatypeConverter; + import junit.framework.TestCase ; import junit.framework.TestSuite ; import org.apache.jena.JenaRuntime ; @@ -43,7 +45,6 @@ import org.apache.jena.shared.impl.JenaParameters ; import org.apache.jena.vocabulary.RDF ; import org.apache.jena.vocabulary.XSD ; -import org.apache.xerces.impl.dv.util.HexBin ; import org.junit.Assert ; /** @@ -960,7 +961,7 @@ public void testBinary3() { Literal l = m.createTypedLiteral(data, XSDDatatype.XSDhexBinary); LiteralLabel ll = l.asNode().getLiteral(); assertEquals("binary test 1b", ll.getDatatype(), XSDDatatype.XSDhexBinary); - assertEquals("binary test 2b", HexBin.encode(data), ll.getLexicalForm()); + assertEquals("binary test 2b", DatatypeConverter.printHexBinary(data), ll.getLexicalForm()); // Check round tripping from value LiteralLabel l2 = m.createTypedLiteral(ll.getLexicalForm(), XSDDatatype.XSDhexBinary).asNode().getLiteral();