From bfc6e470e55a84d33854337755b8df0d3e9a86c1 Mon Sep 17 00:00:00 2001 From: Andy Seaborne Date: Mon, 22 May 2017 00:29:18 +0100 Subject: [PATCH 1/2] Change CSV_PREFIX so that it does shows this is Jena-specific. --- .../src/main/java/org/apache/jena/riot/lang/ReaderRIOTCSV.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/ReaderRIOTCSV.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/ReaderRIOTCSV.java index b4b07ea3b06..db8034eaea5 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/ReaderRIOTCSV.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/ReaderRIOTCSV.java @@ -43,7 +43,7 @@ public ReaderRIOT create(Lang language, ParserProfile profile) { } } - public static final String CSV_PREFIX = "http://w3c/future-csv-vocab/"; + public static final String CSV_PREFIX = "http://jena.apache.org/csv/"; public static final String CSV_ROW = CSV_PREFIX + "row"; private InputStream input = null; From d286ea0ae635c121a68310405b6ecdc6306c0381 Mon Sep 17 00:00:00 2001 From: Andy Seaborne Date: Mon, 22 May 2017 00:30:23 +0100 Subject: [PATCH 2/2] JENA-1340: riot command to work by configuring an RDFParserBuilder. --- .../apache/jena/riot/RDFParserRegistry.java | 14 ++++- .../apache/jena/riot/lang/RiotParsers.java | 4 +- .../src/main/java/riotcmd/CmdLangParse.java | 61 +++++++++---------- jena-cmds/src/main/java/riotcmd/riot.java | 12 ++-- 4 files changed, 50 insertions(+), 41 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java index f19c78e2298..1a1458b6185 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java @@ -190,13 +190,23 @@ private static class ReaderRIOTLang implements ReaderRIOT @Override public void read(InputStream in, String baseURI, ContentType ct, StreamRDF output, Context context) { - LangRIOT parser = RiotParsers.createParser(in, lang, baseURI, output, parserProfile); + // Unnecessary - RDFParser did it and set it in the ParserProfile +// if ( baseURI != null ) { +// IRIResolver newResolver = IRIResolver.create(baseURI) ; +// parserProfile.setIRIResolver(newResolver); +// } + LangRIOT parser = RiotParsers.createParser(in, lang, output, parserProfile); parser.parse() ; } @Override public void read(Reader in, String baseURI, ContentType ct, StreamRDF output, Context context) { - LangRIOT parser = RiotParsers.createParser(in, lang, baseURI, output, parserProfile); + // Unnecessary - RDFParser did it and set it in the ParserProfile +// if ( baseURI != null ) { +// IRIResolver newResolver = IRIResolver.create(baseURI) ; +// parserProfile.setIRIResolver(newResolver); +// } + LangRIOT parser = RiotParsers.createParser(in, lang, output, parserProfile); parser.parse() ; } } diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java index 054ec23b6d8..8e8df94406e 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java @@ -48,7 +48,7 @@ public class RiotParsers { private RiotParsers() {} /** InputStream input */ - public static LangRIOT createParser(InputStream input, Lang lang, String baseIRI, StreamRDF dest, ParserProfile profile) { + public static LangRIOT createParser(InputStream input, Lang lang, StreamRDF dest, ParserProfile profile) { if ( RDFLanguages.sameLang(RDFJSON, lang) ) { Tokenizer tokenizer = new TokenizerJSON(PeekReader.makeUTF8(input)); return createParserRdfJson(tokenizer, dest, profile); @@ -67,7 +67,7 @@ public static LangRIOT createParser(InputStream input, Lang lang, String baseIRI } /** Reader input */ - public static LangRIOT createParser(Reader input, Lang lang, String baseIRI, StreamRDF dest, ParserProfile profile) { + public static LangRIOT createParser(Reader input, Lang lang, StreamRDF dest, ParserProfile profile) { if ( RDFLanguages.sameLang(RDFJSON, lang) ) { Tokenizer tokenizer = new TokenizerJSON(PeekReader.make(input)); return createParserRdfJson(tokenizer, dest, profile); diff --git a/jena-cmds/src/main/java/riotcmd/CmdLangParse.java b/jena-cmds/src/main/java/riotcmd/CmdLangParse.java index d247c52f23b..34a7dec60bf 100644 --- a/jena-cmds/src/main/java/riotcmd/CmdLangParse.java +++ b/jena-cmds/src/main/java/riotcmd/CmdLangParse.java @@ -37,7 +37,6 @@ import org.apache.jena.atlas.lib.InternalErrorException ; import org.apache.jena.atlas.lib.Pair ; import org.apache.jena.atlas.web.ContentType ; -import org.apache.jena.atlas.web.TypedInputStream ; import org.apache.jena.query.ARQ ; import org.apache.jena.riot.* ; import org.apache.jena.riot.lang.LabelToNode ; @@ -99,7 +98,6 @@ protected void processModulesAndArgs() { protected interface PostParseHandler { void postParse(); } static class ParseRecord { - final String baseURI; final String filename; final boolean success; final long timeMillis; @@ -107,10 +105,8 @@ static class ParseRecord { final long quads; final long tuples = 0; - public ParseRecord(String baseURI, String filename, - boolean successful, long timeMillis, + public ParseRecord(String filename, boolean successful, long timeMillis, long countTriples, long countQuads) { - this.baseURI = baseURI; this.filename = filename; this.success = successful; this.timeMillis = timeMillis; @@ -121,6 +117,9 @@ public ParseRecord(String baseURI, String filename, @Override protected void exec() { + if ( modLangParse.skipOnBadTerm() ) + throw new CmdException("Not supported : skip on bad term"); + boolean oldStrictValue = SysRIOT.isStrictMode() ; if ( modLangParse.strictMode() ) SysRIOT.setStrictMode(true) ; @@ -149,6 +148,7 @@ protected void exec() { } try { + // The actual parsing ... if ( super.getPositional().isEmpty() ) { ParseRecord parseRec = parseFile("-"); outcome(parseRec); @@ -162,10 +162,12 @@ protected void exec() { outcome(parseRec); } } + // ... parsing done. + if ( postParse != null ) postParse.postParse(); // Post parse information. - // Total if more then one file. + // Total if more than one file. if ( super.getPositional().size() > 1 && modTime.timingEnabled() ) { long totalMillis = 0; long totalTriples = 0; @@ -206,32 +208,33 @@ public void outcome(ParseRecord rtn) { public ParseRecord parseFile(String filename) { String baseURI = modLangParse.getBaseIRI() ; + RDFParserBuilder builder = RDFParser.create(); + if ( baseURI != null ) + builder.base(baseURI); + Lang lang = selectLang(null, null, RDFLanguages.NQUADS) ; + builder.lang(lang); + + // Set the source. if ( filename.equals("-") ) { - if ( baseURI == null ) + if ( baseURI == null ) { baseURI = "http://base/"; - TypedInputStream in = TypedInputStream.wrap(System.in) ; - return parseRIOT(baseURI, "stdin", in) ; - } else { - try ( TypedInputStream in = RDFDataMgr.open(filename) ) { - return parseRIOT(baseURI, filename, in) ; - } catch (RiotNotFoundException ex) { - System.err.println("Can't open '"+filename+"' "+ex.getMessage()) ; - return new ParseRecord(null, filename, false, -1, -1, -1); + builder.base(baseURI); } + builder.source(System.in); + } else { + builder.source(filename); } + return parseRIOT(builder, filename); } protected abstract Lang selectLang(String filename, ContentType contentType, Lang dftLang ) ; - protected ParseRecord parseRIOT(String baseURI, String filename, TypedInputStream in) { - ContentType ct = in.getMediaType() ; - - baseURI = SysRIOT.chooseBaseIRI(baseURI, filename) ; - - RDFParserBuilder builder = RDFParser.create(); + protected ParseRecord parseRIOT(RDFParserBuilder builder, /*Info for the ProcessOutcome*/ String filename) { boolean checking = true ; - if ( modLangParse.explicitChecking() ) checking = true ; - if ( modLangParse.explicitNoChecking() ) checking = false ; + if ( modLangParse.explicitChecking() ) + checking = true; + if ( modLangParse.explicitNoChecking() ) + checking = false; builder.checking(checking); ErrorHandler errHandler = ErrorHandlerFactory.errorHandlerWarn ; @@ -244,14 +247,9 @@ protected ParseRecord parseRIOT(String baseURI, String filename, TypedInputStrea } if ( modLangParse.skipOnBadTerm() ) { - // TODO skipOnBadterm + // skipOnBadterm - this needs collaboration from the parser. } - Lang lang = selectLang(filename, ct, RDFLanguages.NQUADS) ; - if ( ! RDFLanguages.isQuads(lang) && ! RDFLanguages.isTriples(lang) ) - throw new CmdException("Undefined language: "+lang) ; - builder.lang(lang); - // Make a flag. // Input and output subflags. // If input is "label, then output using NodeToLabel.createBNodeByLabelRaw() ; @@ -279,8 +277,6 @@ protected ParseRecord parseRIOT(String baseURI, String filename, TypedInputStrea modTime.startTimer() ; sink.start() ; - - builder.source(in); RDFParser parser = builder.build(); try { parser.parse(sink); @@ -290,7 +286,8 @@ protected ParseRecord parseRIOT(String baseURI, String filename, TypedInputStrea } sink.finish() ; long x = modTime.endTimer() ; - ParseRecord outcome = new ParseRecord(baseURI, filename, successful, x, sink.countTriples(), sink.countQuads()); + // TEMP + ParseRecord outcome = new ParseRecord(filename, successful, x, sink.countTriples(), sink.countQuads()); return outcome; } diff --git a/jena-cmds/src/main/java/riotcmd/riot.java b/jena-cmds/src/main/java/riotcmd/riot.java index 1bf8cf6b1f7..4ef6df863b2 100644 --- a/jena-cmds/src/main/java/riotcmd/riot.java +++ b/jena-cmds/src/main/java/riotcmd/riot.java @@ -47,11 +47,13 @@ protected Lang selectLang(String filename, ContentType contentType, Lang dftLang if ( contentType != null && ! WebContent.matchContentType(WebContent.ctTextPlain, contentType) ) return RDFLanguages.contentTypeToLang(contentType) ; - - Lang lang = RDFLanguages.filenameToLang(filename) ; - if ( lang == null ) - lang = dftLang ; - return lang ; + + if ( filename != null ) { + Lang lang = RDFLanguages.filenameToLang(filename) ; + if ( lang != null ) + return lang; + } + return dftLang; } @Override