Skip to content

Commit

Permalink
eclipse-rdf4jGH-2840 Implement an RDFParser and RDFWriter for Newline…
Browse files Browse the repository at this point in the history
… delimited JSON-LD

Introduce NDJSONLDParser that extends the JSONLDParser. It collects each JSON-LD line from the input, puts all JSON objects in a list, flattening them if necessary and pass further to the JSON API as one single JSON object.

Introduce NDJSONLDWriter that groups triples by subject and context and writes an JSON-LD line for each group, by delegating to the JSONLDWriter.

Signed-off-by: desislava.hristova <desislava.hristova@ontotext.com>
  • Loading branch information
desislava-hristova-ontotext committed Feb 22, 2021
1 parent a72da24 commit c59993e
Show file tree
Hide file tree
Showing 14 changed files with 461 additions and 3 deletions.
11 changes: 11 additions & 0 deletions core/rio/api/src/main/java/org/eclipse/rdf4j/rio/RDFFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,17 @@ public class RDFFormat extends FileFormat {
SimpleValueFactory.getInstance().createIRI("http://www.w3.org/ns/formats/JSON-LD"), SUPPORTS_NAMESPACES,
SUPPORTS_CONTEXTS, NO_RDF_STAR);

/**
* The NDJSON-LD is a Newline Delimited JSON-LD format.
* <p>
* The file extension <code>.ndjsonld</code> is recommended for NDJSON-LD documents. The media type is
* <code>application/x-ld+ndjson</code> and the encoding is UTF-8.
* </p>
*/
public static final RDFFormat NDJSONLD = new RDFFormat("NDJSON-LD", Arrays.asList("application/x-ld+ndjson"),
StandardCharsets.UTF_8, Arrays.asList("ndjsonld", "jsonl", "ndjson"), null, SUPPORTS_NAMESPACES,
SUPPORTS_CONTEXTS, NO_RDF_STAR);

/**
* The <a href="http://www.w3.org/TR/rdf-json/" >RDF/JSON</a> file format, an RDF serialization format that supports
* recording of named graphs.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ public BufferedGroupingRDFHandler(int bufferSize, RDFHandler... handlers) {
this.contexts = new HashSet<>();
}

protected Model getBufferedStatements() {
return bufferedStatements;
}

@Override
public void handleStatement(Statement st) throws RDFHandlerException {
synchronized (bufferLock) {
Expand All @@ -76,7 +80,7 @@ public void handleStatement(Statement st) throws RDFHandlerException {
/*
* not synchronized, assumes calling method has obtained a lock on bufferLock
*/
private void processBuffer() throws RDFHandlerException {
protected void processBuffer() throws RDFHandlerException {
// primary grouping per context.
for (Resource context : contexts) {
Model contextData = bufferedStatements.filter(null, null, null, context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,7 @@ valueFactory, getParserConfig(), getParseErrorListener(), nodeID -> createNode(n
options.setDocumentLoader(loader);
}
JsonFactory factory = configureNewJsonFactory();
JsonParser nextParser = (in != null) ? factory.createParser(in) : factory.createParser(reader);
final Object parsedJson = JsonUtils.fromJsonParser(nextParser);
final Object parsedJson = getJSONObject(in, reader, factory);

JsonLdProcessor.toRDF(parsedJson, callback, options);
} catch (JsonLdError e) {
Expand All @@ -142,6 +141,11 @@ valueFactory, getParserConfig(), getParseErrorListener(), nodeID -> createNode(n
}
}

protected Object getJSONObject(InputStream in, Reader reader, JsonFactory factory) throws IOException {
JsonParser nextParser = (in != null) ? factory.createParser(in) : factory.createParser(reader);
return JsonUtils.fromJsonParser(nextParser);
}

/**
* Get an instance of JsonFactory configured using the settings from {@link #getParserConfig()}.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*******************************************************************************
* Copyright (c) 2021 Eclipse RDF4J contributors.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*******************************************************************************/
package org.eclipse.rdf4j.rio.ndjsonld;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.LinkedList;
import java.util.List;

import org.apache.commons.io.input.BOMInputStream;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.rio.*;
import org.eclipse.rdf4j.rio.jsonld.JSONLDParser;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import com.github.jsonldjava.utils.JsonUtils;

/**
* Introduce a parser capable of parsing Newline Delimited JSON-LD, where each line is a serialized JSON-LD record. The
* format is inspired by Newline Delimited JSON format<a>http://ndjson.org/</a>. Even though each line is a separate
* JSON-LD document, the whole document is treated as a single RDF document, having one single BNodes context to
* preserve BNodes identifiers.
*
* @author Desislava Hristova
*/
public class NDJSONLDParser extends JSONLDParser implements RDFParser {

/**
* Default constructor
*/
public NDJSONLDParser() {
super();
}

/**
* Creates a RDF4J NDJSONLD Parser using the given {@link ValueFactory} to create new {@link Value}s.
*
* @param valueFactory The ValueFactory to use
*/
public NDJSONLDParser(final ValueFactory valueFactory) {
super(valueFactory);
}

@Override
public RDFFormat getRDFFormat() {
return RDFFormat.NDJSONLD;
}

@Override
protected Object getJSONObject(InputStream in, Reader reader, JsonFactory factory) throws IOException {
List<Object> arrayOfJSONLD = new LinkedList<>();
try (BufferedReader bufferedReader = new BufferedReader(reader)) {
String line;
while ((line = bufferedReader.readLine()) != null) {
if (!line.isEmpty()) {
JsonParser nextParser = factory.createParser(new ByteArrayInputStream(line.getBytes()));
Object singleJSONLD = JsonUtils.fromJsonParser(nextParser);
if (singleJSONLD instanceof List) {
arrayOfJSONLD.addAll((List) singleJSONLD);
} else {
arrayOfJSONLD.add(singleJSONLD);
}
}
}
return arrayOfJSONLD;
}
}

@Override
public void parse(InputStream in, String baseURI) throws RDFParseException, RDFHandlerException, IOException {
if (in == null) {
throw new IllegalArgumentException("Input stream must not be 'null'");
}

parse(new InputStreamReader(new BOMInputStream(in, false), StandardCharsets.UTF_8), baseURI);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*******************************************************************************
* Copyright (c) 2021 Eclipse RDF4J contributors.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*******************************************************************************/
package org.eclipse.rdf4j.rio.ndjsonld;

import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.RDFParserFactory;

/**
* An {@link RDFParserFactory} that creates instances of {@link NDJSONLDParser}.
*/
public class NDJSONLDParserFactory implements RDFParserFactory {

@Override
public RDFFormat getRDFFormat() {
return RDFFormat.NDJSONLD;
}

@Override
public RDFParser getParser() {
return new NDJSONLDParser();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*******************************************************************************
* Copyright (c) 2021 Eclipse RDF4J contributors.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*******************************************************************************/
package org.eclipse.rdf4j.rio.ndjsonld;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;

import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.rio.*;
import org.eclipse.rdf4j.rio.helpers.AbstractRDFWriter;
import org.eclipse.rdf4j.rio.helpers.BasicWriterSettings;
import org.eclipse.rdf4j.rio.helpers.BufferedGroupingRDFHandler;
import org.eclipse.rdf4j.rio.helpers.JSONLDSettings;
import org.eclipse.rdf4j.rio.jsonld.JSONLDWriter;

public class NDJSONLDWriter extends AbstractRDFWriter implements RDFWriter {

private final BufferedGroupingRDFHandler bufferedGroupingRDFHandler;

private final LinkedHashMap<String, String> namespacesBuffer;

/**
* Creates a new NDJSONLDWriter that will write to the supplied OutputStream.
*
* @param outputStream The OutputStream to write the NDJSONLD document to.
*/
public NDJSONLDWriter(OutputStream outputStream) {
this(outputStream, null);
}

public NDJSONLDWriter(Writer writer) {
this(writer, null);
}

public NDJSONLDWriter(OutputStream out, String baseURI) {
this(new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)), baseURI);
}

public NDJSONLDWriter(Writer writer, String baseURI) {
namespacesBuffer = new LinkedHashMap<>();
bufferedGroupingRDFHandler = new BufferedGroupingRDFHandler() {

@Override
protected void processBuffer() throws RDFHandlerException {
for (Resource context : getBufferedStatements().contexts()) {
for (Resource subject : getBufferedStatements().subjects()) {
JSONLDWriter jsonldWriter = getJsonldWriter(writer, baseURI);
Iterable<Statement> statements = getBufferedStatements().getStatements(subject, null, null,
context);
jsonldWriter.startRDF();
for (String key : namespacesBuffer.keySet()) {
jsonldWriter.handleNamespace(key, namespacesBuffer.get(key));
}
for (Statement st : statements) {
jsonldWriter.handleStatement(st);
}
jsonldWriter.endRDF();
try {
jsonldWriter.getWriter().write(System.lineSeparator());
} catch (IOException e) {
throw new RDFHandlerException(e);
}
}
}

getBufferedStatements().clear();
}
};
}

private JSONLDWriter getJsonldWriter(Writer writer, String baseURI) {
JSONLDWriter jsonldWriter = new JSONLDWriter(writer, baseURI);
jsonldWriter.setWriterConfig(getWriterConfig());
jsonldWriter.getWriterConfig().set(BasicWriterSettings.PRETTY_PRINT, false);
return jsonldWriter;
}

@Override
public void handleStatement(Statement st) throws RDFHandlerException {
bufferedGroupingRDFHandler.handleStatement(st);
}

@Override
public void startRDF() throws RDFHandlerException {
bufferedGroupingRDFHandler.startRDF();
}

@Override
public void endRDF() throws RDFHandlerException {
bufferedGroupingRDFHandler.endRDF();
}

@Override
public void handleNamespace(String prefix, String uri) throws RDFHandlerException {
namespacesBuffer.put(prefix, uri);
}

@Override
public void handleComment(String comment) throws RDFHandlerException {
// comments are not handled by JSON-LD Writer, so do nothing
}

@Override
public RDFFormat getRDFFormat() {
return RDFFormat.NDJSONLD;
}

@Override
public Collection<RioSetting<?>> getSupportedSettings() {
Collection<RioSetting<?>> result = new HashSet<>(Arrays.asList(
BasicWriterSettings.BASE_DIRECTIVE,
JSONLDSettings.COMPACT_ARRAYS,
JSONLDSettings.HIERARCHICAL_VIEW,
JSONLDSettings.JSONLD_MODE,
JSONLDSettings.PRODUCE_GENERALIZED_RDF,
JSONLDSettings.USE_RDF_TYPE,
JSONLDSettings.USE_NATIVE_TYPES
));
return result;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*******************************************************************************
* Copyright (c) 2021 Eclipse RDF4J contributors.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*******************************************************************************/
package org.eclipse.rdf4j.rio.ndjsonld;

import java.io.OutputStream;
import java.io.Writer;

import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFWriter;
import org.eclipse.rdf4j.rio.RDFWriterFactory;

public class NDJSONLDWriterFactory implements RDFWriterFactory {

@Override
public RDFFormat getRDFFormat() {
return RDFFormat.NDJSONLD;
}

@Override
public RDFWriter getWriter(OutputStream out) {
return new NDJSONLDWriter(out);
}

@Override
public RDFWriter getWriter(OutputStream out, String baseURI) {
return new NDJSONLDWriter(out, baseURI);
}

@Override
public RDFWriter getWriter(Writer writer) {
return new NDJSONLDWriter(writer);
}

@Override
public RDFWriter getWriter(Writer writer, String baseURI) {
return new NDJSONLDWriter(writer, baseURI);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package org.eclipse.rdf4j.rio.ndjsonld;
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
org.eclipse.rdf4j.rio.jsonld.JSONLDParserFactory
org.eclipse.rdf4j.rio.ndjsonld.NDJSONLDParserFactory

Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
org.eclipse.rdf4j.rio.jsonld.JSONLDWriterFactory
org.eclipse.rdf4j.rio.ndjsonld.NDJSONLDWriterFactory
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*******************************************************************************
* Copyright (c) 2021 Eclipse RDF4J contributors.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*******************************************************************************/
package org.eclipse.rdf4j.rio.ndjsonld;

import java.io.OutputStream;

import org.eclipse.rdf4j.rio.AbstractParserHandlingTest;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.RDFWriter;

public class NDJSONLDParserHandlerTest extends AbstractParserHandlingTest {

@Override
protected RDFParser getParser() {
return new NDJSONLDParser();
}

@Override
protected RDFWriter createWriter(OutputStream output) {
return new NDJSONLDWriter(output);
}
}
Loading

0 comments on commit c59993e

Please sign in to comment.