From 42a8c8b7b8e72f9ba7aece5f6e1fd4c4a9ca2d7c Mon Sep 17 00:00:00 2001 From: Andy Seaborne Date: Sun, 17 May 2026 12:20:10 +0100 Subject: [PATCH] GH-3926: handle unrecognized syntax (riot) --- .../java/org/apache/jena/riot/RDFParser.java | 28 ++++++--- .../apache/jena/riot/RDFParserBuilder.java | 7 ++- .../src/main/java/riotcmd/CmdLangParse.java | 58 +++++++++++-------- 3 files changed, 58 insertions(+), 35 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java index 1f90e8fd1b4..adb4f47d9f2 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java @@ -314,12 +314,21 @@ public DatasetGraph toDatasetGraph() { return dataset; } + public static class SetupException extends RiotException { + public SetupException(String msg) { super(msg) ; } + public SetupException(String msg, Throwable th) { super(msg, th) ; } + } + + private static RiotException setupException(String msg) { + return new SetupException(msg); + } + /** * Parse the source, sending the results to a {@link StreamRDF}. */ public void parse(StreamRDF destination) { if ( !canUseThisParser ) - throw new RiotException("Parser has been used once and can not be used again"); + throw setupException("Parser has been used once and can not be used again"); // Consuming mode. canUseThisParser = (inputStream == null && javaReader == null); // FactoryRDF is stateful in the LabelToNode mapping. @@ -357,7 +366,7 @@ private void parseURI(StreamRDF destination) { if ( forceLang != null ) { ReaderRIOTFactory r = RDFParserRegistry.getFactory(forceLang); if ( r == null ) - throw new RiotException("No parser registered for language: " + forceLang); + throw setupException("No parser registered for language: " + forceLang); ct = forceLang.getContentType(); readerRiot = createReader(r, forceLang); } else { @@ -377,11 +386,16 @@ else if ( path != null ) else target = baseURI; ct = WebContent.determineCT(input.getContentType(), hintLang, target); - if ( ct == null ) - throw new RiotException("Failed to determine the content type: (URI=" + baseURI + " : stream=" + input.getContentType()+")"); + if ( ct == null ) { + String inputCT = input.getContentType(); + String msg = (inputCT != null) + ? "Failed to determine the content type for " + baseURI + " : stream=" + input.getContentType() + : "Failed to determine the content type for " + baseURI; + throw setupException(msg); + } readerRiot = createReader(ct); if ( readerRiot == null ) - throw new RiotException("No parser registered for content type: " + ct.getContentTypeStr()); + throw setupException("No parser registered for content type: " + ct.getContentTypeStr()); } read(readerRiot, input, null, baseURI, context, ct, destination); } @@ -395,11 +409,11 @@ private void parseNotUri(StreamRDF destination) { lang = forceLang; ContentType ct = WebContent.determineCT(null, lang, baseURI); if ( ct == null ) - throw new RiotException("Failed to determine the RDF syntax (.lang or .base required)"); + throw setupException("Failed to determine the RDF syntax (.lang or .base required)"); ReaderRIOT readerRiot = createReader(ct); if ( readerRiot == null ) - throw new RiotException("No parser registered for content type: " + ct.getContentTypeStr()); + throw setupException("No parser registered for content type: " + ct.getContentTypeStr()); Reader jr = javaReader; if ( stringToParse != null ) jr = new StringReader(stringToParse); diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFParserBuilder.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFParserBuilder.java index 1e1286543c4..3e6174a71d2 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/RDFParserBuilder.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFParserBuilder.java @@ -232,9 +232,10 @@ private void clearSource() { } /** - * Set the hint {@link Lang}. This is the RDF syntax used when there is no way to - * deduce the syntax (e.g. read from a InputStream, not recognized file extension, no - * recognized HTTP Content-Type provided). + * Set the hint {@link Lang}. This is the RDF syntax used when there is no other way to + * deduce the syntax (e.g. read from a InputStream, or the file extension is not recognized, + * or the HTTP Content-Type does not make sense). + * To force the choice of language, use {@link #forceLang(Lang)} * * @param lang * @return this diff --git a/jena-cmds/src/main/java/riotcmd/CmdLangParse.java b/jena-cmds/src/main/java/riotcmd/CmdLangParse.java index 55d8d195493..1323e8f0ebb 100644 --- a/jena-cmds/src/main/java/riotcmd/CmdLangParse.java +++ b/jena-cmds/src/main/java/riotcmd/CmdLangParse.java @@ -56,6 +56,7 @@ import org.apache.jena.sparql.core.DatasetGraphFactory; import org.apache.jena.sparql.core.Quad; import org.apache.jena.sys.JenaSystem; +import org.slf4j.Logger; /** Common framework for running RIOT parsers */ public abstract class CmdLangParse extends CmdMain { @@ -162,8 +163,7 @@ protected void exec() { try { exec$(); - } - finally { + } finally { SysRIOT.setStrictMode(oldStrictValue); } } @@ -265,8 +265,9 @@ protected void exec() { // pr.success is true if the indicates the parser completed it's run // (no failure-on-error or unexpected exceptions). for ( ParseRecord pr : outcomes ) { - if ( !pr.success || pr.errHandler.hadErrors() ) + if ( !pr.success || pr.errHandler.hadErrors() ) { throw new TerminationException(1); + } } } @@ -363,28 +364,29 @@ protected ParseRecord parseRIOT(RDFParserBuilder builder, String filenameLabel, // Build parser output additions. StreamRDFCounting parserOut; - { - StreamRDF s = parserOutputStream; - if ( setupRDFS != null ) { - // Remove literals as subjects - s = RDFSFactory.removeGeneralizedRDF(s); - // Generate RDFS (this feeds into the stream created above). - s = RDFSFactory.streamRDFS(s, setupRDFS); - // Parser sends data to RDFS, which goes to the filter, then to parserOutputStream - } - // If added here, count is quads and triples seen in the input. - if ( modLangParse.mergeQuads() ) - s = new QuadsToTriples(s); - parserOut = StreamRDFLib.count(s); - s = null; + StreamRDF s = parserOutputStream; + if ( setupRDFS != null ) { + // Remove literals as subjects + s = RDFSFactory.removeGeneralizedRDF(s); + // Generate RDFS (this feeds into the stream created above). + s = RDFSFactory.streamRDFS(s, setupRDFS); + // Parser sends data to RDFS, which goes to the filter, then to parserOutputStream } - - ErrorHandlerCLI errHandler = ErrorHandlerCLI.errorHandlerTracking(ErrorHandlerFactory.stdLogger, - passRelativeURIs, // Silent warnings if allowing relative URIs. - true, // Fail on error - stopOnWarnings, // Fail on warnings - ()->parserOut.finish() // Flush to align log messages - ); + // If added here, count is quads and triples seen in the input. + if ( modLangParse.mergeQuads() ) + s = new QuadsToTriples(s); + parserOut = StreamRDFLib.count(s); + // Not used beyond this point. + s = null; + + Logger logger = ErrorHandlerFactory.stdLogger; + ErrorHandlerCLI errHandler = ErrorHandlerCLI + .errorHandlerTracking(logger, + passRelativeURIs, // Silence warnings if allowing relative URIs. + true, // Fail on error + stopOnWarnings, // Fail on warnings + ()->parserOut.finish() // Flush to align log messages + ); builder.errorHandler(errHandler); boolean successful = true; @@ -395,7 +397,13 @@ protected ParseRecord parseRIOT(RDFParserBuilder builder, String filenameLabel, parser.parse(parserOut); successful = true; } catch (RiotNotFoundException ex) { - errHandler.error(ex.getMessage(), -1, -1); + logger.error(ex.getMessage(), -1, -1); + successful = false; + } catch (RDFParser.SetupException ex) { + logger.error(ex.getMessage()); + successful = false; + } catch (RiotParseException ex) { + // is this reliable enough? successful = false; } catch (RiotException ex) { successful = false;