From a182de7794287b06f416073b275a4b56069f8dfa Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Wed, 29 Oct 2025 15:05:33 +0100 Subject: [PATCH 01/14] init of parser with jsop as html parser --- build.gradle.kts | 3 ++ .../core/next/api/base/io/RDFFormat.java | 10 ++++-- .../next/impl/io/parser/rdfa/RDFaParser.java | 35 +++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java diff --git a/build.gradle.kts b/build.gradle.kts index 93fdfb7c1..348728b6d 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -139,6 +139,9 @@ dependencies { implementation("com.typesafe.akka:akka-stream_2.13:2.6.20") // Akka Streams for reactive streams processing implementation("com.lightbend.akka:akka-stream-alpakka-xml_2.13:3.0.4") // Alpakka XML for XML processing with Akka Streams + // HTML parsing for RDFa + implementation("org.jsoup:jsoup:1.21.2") + // === Utilities === implementation("org.apache.commons:commons-text:1.13.1") // Text manipulation utilities (Commons Text) implementation("org.json:json:20250517") // JSON processing diff --git a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java index e16c3a92c..0b1568e3d 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java @@ -23,7 +23,6 @@ public class RDFFormat extends FileFormat { true, false); - public static final RDFFormat NTRIPLES = new RDFFormat( "N-Triples", List.of("nt"), @@ -66,6 +65,13 @@ public class RDFFormat extends FileFormat { false, true); + public static final RDFFormat RDFa = new RDFFormat( + "RDFa", + List.of("html"), + List.of("text/html", "application/xhtml+xml"), + true, + false); + /** * Constructs a new RDF format. * @@ -152,7 +158,7 @@ public static Optional byMimeType(String mimeType) { * @return An unmodifiable List of all RdfFormat constants. */ public static List all() { - return List.of(TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG); + return List.of(TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG, RDFC_1_0, RDFa); } @Override diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java new file mode 100644 index 000000000..08484ae67 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -0,0 +1,35 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; +import fr.inria.corese.core.next.api.io.IOOptions; +import org.jsoup.Jsoup; + +import java.io.InputStream; +import java.io.Reader; + +public class RDFaParser extends AbstractRDFParser { + protected RDFaParser(Model model, ValueFactory factory) { + super(model, factory); + } + + protected RDFaParser(Model model, ValueFactory factory, IOOptions config) { + super(model, factory, config); + } + + @Override + public RDFFormat getRDFFormat() { + return RDFFormat.RDFa; + } + + @Override + public void parse(InputStream in, String baseURI) { + } + + @Override + public void parse(Reader reader, String baseURI) { + + } +} From 4d0b1e30829f3c7de71c36e52d0483d9b460fa90 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Fri, 31 Oct 2025 11:53:43 +0100 Subject: [PATCH 02/14] Basic structure based on official presentation --- .../rdfa/RDFaEvaluationContextHandler.java | 64 +++++++++++++ .../next/impl/io/parser/rdfa/RDFaParser.java | 35 ++++++- .../rdfa/model/RDFaIncompleteStatement.java | 94 +++++++++++++++++++ .../impl/io/parser/rdfa/RDFaParserTest.java | 26 +++++ 4 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java create mode 100644 src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java new file mode 100644 index 000000000..7831ac01d --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java @@ -0,0 +1,64 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Statement; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; + +import java.util.*; + +/** + * This class is to be used during the evaluation of an HTML file to generate triples during the DOM traversal. + * @see RDFa recommandation + */ +public class RDFaEvaluationContextHandler { + + private ValueFactory factory; + + /** + * This will usually be the URL of the document being processed, but it could be some other URL, set by some other mechanism, such as the XHTML base element. The important thing is that it establishes a URL against which relative paths can be resolved. + */ + private IRI baseIri; + + /** + * The initial value will be the same as the initial value of [base], but it will usually change during the course of processing. + */ + private Resource parentSubjectResource ; + + /** + * In some situations the object of a statement becomes the subject of any nested statements, and this property is used to convey this value. Note that this value may be a bnode, since in some situations a number of nested statements are grouped together on one bnode. This means that the bnode must be set in the containing statement and passed down, and this property is used to convey this value. + */ + private Resource parentObjectResource = null; + + /** + * An index of locally defined IRI prefixes + */ + private Map uriMappings = new HashMap<>(); + + /** + * Set of statement in the process of building. + */ + private Set incompleteStatement = new HashSet<>(); + + /** + * The language of the document. Note that there is no default language. + */ + private String language = null; + + private Statement incompleteStatementToStatement(RDFaIncompleteStatement incompleteStatement) { + Objects.requireNonNull(incompleteStatement.getSubject(), "Null subject, IncompleteStatement can only be converted if all its component are non-null."); + Objects.requireNonNull(incompleteStatement.getPredicate(), "Null predicate, IncompleteStatement can only be converted if all its component are non-null."); + Objects.requireNonNull(incompleteStatement.getObject(), "Null object, IncompleteStatement can only be converted if all its component are non-null."); + + return factory.createStatement(incompleteStatement.getSubject(), incompleteStatement.getPredicate(), incompleteStatement.getObject()); + } + + public boolean isRecursive() { + return recursive; + } + + public void setRecursive(boolean recursive) { + this.recursive = recursive; + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index 08484ae67..185c1a31f 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -1,14 +1,22 @@ package fr.inria.corese.core.next.impl.io.parser.rdfa; import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Value; import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.api.io.parser.RDFParserBaseIRIOptions; +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.io.Reader; +import java.nio.charset.StandardCharsets; public class RDFaParser extends AbstractRDFParser { protected RDFaParser(Model model, ValueFactory factory) { @@ -26,10 +34,35 @@ public RDFFormat getRDFFormat() { @Override public void parse(InputStream in, String baseURI) { + try { + Document document = Jsoup.parse(in, null, baseURI); + + document.stream().iterator() + } catch (IOException e) { + throw new ParsingErrorException("Error during parsing of HTML document", e); + } + + + } + + /** + * + * @param element + * @param context + * @param recursive Processing generally continues recursively through the entire tree of elements available. However, if an author indicates that some branch of the tree should be treated as an XML literal, no further processing should take place on that branch, and setting this flag to false would have that effect. + * @param skipElement Flag thet indicates whether the [current element] can safely be ignored since it has no relevant RDFa attributes. Note that descendant elements will still be processed. + * @param newSubject A [new subject] value, which once calculated will set the [parent subject] property in an [evaluation context], as well as being used to complete any [incomplete triple]s + */ + private void processElement(Element element, RDFaEvaluationContextHandler context, boolean recursive, boolean skipElement, Value newSubject) { + + + } + + private void processElement(Element element, RDFaEvaluationContextHandler context) { + processElement(element, context, true, false, this.) } @Override public void parse(Reader reader, String baseURI) { - } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java new file mode 100644 index 000000000..bff3564a4 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java @@ -0,0 +1,94 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa.model; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Value; + +/** + * This class represents triples in the process of creation during the chaining of element in an RDFa document. + */ +public class RDFaIncompleteStatement { + + private Resource subject = null; + private IRI predicate = null; + private Value object = null; + + public RDFaIncompleteStatement() { + + } + + public RDFaIncompleteStatement(IRI predicate) { + + } + + public Resource getSubject() { + return subject; + } + + public void setSubject(Resource subject) { + this.subject = subject; + } + + public IRI getPredicate() { + return predicate; + } + + public void setPredicate(IRI predicate) { + this.predicate = predicate; + } + + public Value getObject() { + return object; + } + + public void setObject(Value object) { + this.object = object; + } + + public boolean hasSubject() { + return this.getSubject() != null; + } + + public boolean hasPredicate() { + return this.getPredicate() != null; + } + + public boolean hasObject() { + return this.getObject() != null; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + if(this.hasSubject()) { + sb.append(this.getSubject().toString()); + } else { + sb.append("?"); + } + sb.append(" "); + + if(this.hasPredicate()) { + sb.append(this.getPredicate().toString()); + } else { + sb.append("?"); + } + sb.append(" "); + + if(this.hasObject()) { + sb.append(this.getObject().toString()); + } else { + sb.append("?"); + } + + return sb.toString(); + } + + @Override + public int hashCode() { + int hash = 7; + hash = 31 * hash + (getSubject() == null ? 0 : getSubject().hashCode()); + hash = 31 * hash + (getPredicate() == null ? 0 : getPredicate().hashCode()); + hash = 31 * hash + (getObject() == null ? 0 : getObject().hashCode()); + return hash; + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java new file mode 100644 index 000000000..dcb30eaea --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -0,0 +1,26 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import org.junit.jupiter.api.Test; + +public class RDFaParserTest { + + @Test + public void basicDocTest() { + String docString = """ + + + + + Test 0001 + + +

This photo was taken by Mark Birbeck.

+ +"""; + + /* + "Mark Birbeck" . + */ + } +} From 4964c7129965d8793fde20802ca6240a2f2d6539 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Fri, 31 Oct 2025 16:35:54 +0100 Subject: [PATCH 03/14] Better processing init --- .../rdfa/RDFaEvaluationContextHandler.java | 58 ++++++++++--- .../next/impl/io/parser/rdfa/RDFaParser.java | 82 +++++++++++++++---- .../io/parser/rdfa/RDFaParserOptions.java | 46 +++++++++++ 3 files changed, 158 insertions(+), 28 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserOptions.java diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java index 7831ac01d..48ed1e0f1 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java @@ -14,8 +14,6 @@ */ public class RDFaEvaluationContextHandler { - private ValueFactory factory; - /** * This will usually be the URL of the document being processed, but it could be some other URL, set by some other mechanism, such as the XHTML base element. The important thing is that it establishes a URL against which relative paths can be resolved. */ @@ -46,19 +44,57 @@ public class RDFaEvaluationContextHandler { */ private String language = null; - private Statement incompleteStatementToStatement(RDFaIncompleteStatement incompleteStatement) { - Objects.requireNonNull(incompleteStatement.getSubject(), "Null subject, IncompleteStatement can only be converted if all its component are non-null."); - Objects.requireNonNull(incompleteStatement.getPredicate(), "Null predicate, IncompleteStatement can only be converted if all its component are non-null."); - Objects.requireNonNull(incompleteStatement.getObject(), "Null object, IncompleteStatement can only be converted if all its component are non-null."); + public RDFaEvaluationContextHandler(IRI baseIri) { + this.baseIri = baseIri; + this.parentSubjectResource = baseIri; + } + + public RDFaEvaluationContextHandler(IRI baseIri, IRI parentSubjectResource) { + this.baseIri = baseIri; + this.parentSubjectResource = parentSubjectResource; + } + + public IRI getBaseIri() { + return baseIri; + } + + public void setBaseIri(IRI baseIri) { + this.baseIri = baseIri; + } + + public Resource getParentSubjectResource() { + return parentSubjectResource; + } + + public void setParentSubjectResource(Resource parentSubjectResource) { + this.parentSubjectResource = parentSubjectResource; + } + + public Resource getParentObjectResource() { + return parentObjectResource; + } + + public void setParentObjectResource(Resource parentObjectResource) { + this.parentObjectResource = parentObjectResource; + } + + public Map getUriMappings() { + return uriMappings; + } - return factory.createStatement(incompleteStatement.getSubject(), incompleteStatement.getPredicate(), incompleteStatement.getObject()); + public void setUriMappings(Map uriMappings) { + this.uriMappings = uriMappings; } - public boolean isRecursive() { - return recursive; + /** + * @param prefix the prefix WITHOUT ":" + * @return the IRI associated to the prefix in this context + */ + public IRI getUriMapping(String prefix) { + return this.uriMappings.get(prefix); } - public void setRecursive(boolean recursive) { - this.recursive = recursive; + public void addUriMapping(String prefix, IRI prefixIri) { + this.uriMappings.put(prefix, prefixIri); } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index 185c1a31f..b1758db9a 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -1,26 +1,26 @@ package fr.inria.corese.core.next.impl.io.parser.rdfa; -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.api.Value; -import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.api.*; import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; import fr.inria.corese.core.next.api.io.IOOptions; -import fr.inria.corese.core.next.api.io.parser.RDFParserBaseIRIOptions; import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; +import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; import org.jsoup.Jsoup; +import org.jsoup.nodes.Attribute; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.io.Reader; -import java.nio.charset.StandardCharsets; +import java.util.Iterator; +import java.util.Objects; +import java.util.concurrent.atomic.AtomicReference; public class RDFaParser extends AbstractRDFParser { protected RDFaParser(Model model, ValueFactory factory) { - super(model, factory); + this(model, factory, new RDFaParserOptions.Builder().build()); } protected RDFaParser(Model model, ValueFactory factory, IOOptions config) { @@ -33,36 +33,84 @@ public RDFFormat getRDFFormat() { } @Override - public void parse(InputStream in, String baseURI) { + public void parse(InputStream in, String baseURIString) { try { - Document document = Jsoup.parse(in, null, baseURI); + Document document = Jsoup.parse(in, null, baseURIString); - document.stream().iterator() - } catch (IOException e) { + IRI baseIri = getValueFactory().createIRI(baseURIString); + processDocument(document, baseIri); + } catch (Exception e) { throw new ParsingErrorException("Error during parsing of HTML document", e); } + } + + /** + * Intermediary function to configure the processing of a document using some basic HTML traversal to determine if a baseIri has been defined in the document. + * If the baseIri in argument is the Corese default base IRI, the value stored in the document is used instead. + * @param document Jsoup HTML document to be processed + * @param baseIri An IRI object + */ + private void processDocument(Document document, IRI baseIri) { + + // If the base Iri in argument is not the default baseIri, then we take it, else we use the one in the document + if(baseIri.stringValue().equals(ParserConstants.getDefaultBaseURI())) { + // Looking for the node in the document + IRI baseIriFromXml = baseIri; + Iterator baseElementIterator = document.stream().filter(element -> element.nameIs("base")).iterator(); + while(baseElementIterator.hasNext()) { + Element baseElement = baseElementIterator.next(); + Attribute baseElementHrefAttribute = baseElement.attribute("href"); + if(baseElementHrefAttribute != null) { + String baseIriString = baseElementHrefAttribute.getValue(); + baseIriFromXml = getValueFactory().createIRI(baseIriString); + } + }; + + baseIri = this.getValueFactory().createIRI(baseIriFromXml.stringValue()); + } + + Iterator elementIt = document.stream().iterator(); + while (elementIt.hasNext()) { + Element element = elementIt.next(); + processElement(element, new RDFaEvaluationContextHandler(baseIri), baseIri); + } } /** * - * @param element - * @param context + * @param element Current element + * @param context Active context * @param recursive Processing generally continues recursively through the entire tree of elements available. However, if an author indicates that some branch of the tree should be treated as an XML literal, no further processing should take place on that branch, and setting this flag to false would have that effect. * @param skipElement Flag thet indicates whether the [current element] can safely be ignored since it has no relevant RDFa attributes. Note that descendant elements will still be processed. * @param newSubject A [new subject] value, which once calculated will set the [parent subject] property in an [evaluation context], as well as being used to complete any [incomplete triple]s + * @see
RDFa processing in details */ - private void processElement(Element element, RDFaEvaluationContextHandler context, boolean recursive, boolean skipElement, Value newSubject) { + private void processElement(Element element, RDFaEvaluationContextHandler context, boolean recursive, boolean skipElement, Resource newSubject, Value currentObject) { } - private void processElement(Element element, RDFaEvaluationContextHandler context) { - processElement(element, context, true, false, this.) + /** + * Surcharge function that initialize the flags and subject and objet to their initial values for processing + * @param element + * @param context + * @param newSubject + */ + private void processElement(Element element, RDFaEvaluationContextHandler context, Resource newSubject) { + processElement(element, context, true, false, newSubject, null); } @Override public void parse(Reader reader, String baseURI) { } + + private Statement incompleteStatementToStatement(RDFaIncompleteStatement incompleteStatement) { + Objects.requireNonNull(incompleteStatement.getSubject(), "Null subject, IncompleteStatement can only be converted if all its component are non-null."); + Objects.requireNonNull(incompleteStatement.getPredicate(), "Null predicate, IncompleteStatement can only be converted if all its component are non-null."); + Objects.requireNonNull(incompleteStatement.getObject(), "Null object, IncompleteStatement can only be converted if all its component are non-null."); + + return this.getValueFactory().createStatement(incompleteStatement.getSubject(), incompleteStatement.getPredicate(), incompleteStatement.getObject()); + } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserOptions.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserOptions.java new file mode 100644 index 000000000..6ad4ae52f --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserOptions.java @@ -0,0 +1,46 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.base.io.AbstractIOOptions; +import fr.inria.corese.core.next.api.io.common.BaseIRIOptions; +import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; + +/** + * Configuration class for the parsing of RDFa HTML documents + */ +public class RDFaParserOptions extends AbstractIOOptions implements BaseIRIOptions { + + private final RDFaParserOptions.Builder builder; + private final String baseIRI; + + protected RDFaParserOptions(RDFaParserOptions.Builder builder) { + this.builder = builder; + this.baseIRI = this.builder.baseIRI; + } + + @Override + public String getBaseIRI() { + return this.baseIRI; + } + + public static class Builder extends AbstractIOOptions.Builder { + + protected String baseIRI = ParserConstants.getDefaultBaseURI(); + + @Override + public RDFaParserOptions build() { + return new RDFaParserOptions(this); + } + + /** + * Set the base IRI used for relative IRI processing + * + * @param baseIRI An IRI + * @return this + */ + public RDFaParserOptions.Builder baseIRI(String baseIRI) { + this.baseIRI = baseIRI; + return this; + } + + } +} \ No newline at end of file From 636eaebeb2bc1dd919368cf2c35db493ff3477b0 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Tue, 4 Nov 2025 15:21:46 +0100 Subject: [PATCH 04/14] Parsing up to step 5 --- build.gradle.kts | 3 + .../next/impl/io/parser/ParserFactory.java | 5 + .../io/parser/rdfa/RDFaEvaluationContext.java | 176 +++++++++++ .../rdfa/RDFaEvaluationContextHandler.java | 100 ------ .../next/impl/io/parser/rdfa/RDFaParser.java | 288 ++++++++++++++++-- .../core/next/api/base/io/RDFFormatTest.java | 4 +- .../impl/io/parser/rdfa/RDFaParserTest.java | 106 ++++++- 7 files changed, 553 insertions(+), 129 deletions(-) create mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContext.java delete mode 100644 src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java diff --git a/build.gradle.kts b/build.gradle.kts index 348728b6d..a7a028f5f 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -153,6 +153,9 @@ dependencies { testRuntimeOnly("org.junit.platform:junit-platform-launcher:1.13.2") // JUnit platform launcher (runtime) testImplementation("org.mockito:mockito-core:5.18.0") // Mockito core for mocking in tests testImplementation("org.mockito:mockito-junit-jupiter:5.18.0") // Mockito integration with JUnit Jupiter + testRuntimeOnly("org.apache.logging.log4j:log4j-core:2.25.0") // Log4j2 core for internal logging + testRuntimeOnly("org.apache.logging.log4j:log4j-slf4j2-impl:2.25.0") // SLF4J binding for Log4j2 (runtime) + } ///////////////////////// diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java index 6057b2026..29f5b11fb 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java @@ -9,6 +9,7 @@ import fr.inria.corese.core.next.impl.io.parser.jsonld.JSONLDParser; import fr.inria.corese.core.next.impl.io.parser.nquads.NQuadsParser; import fr.inria.corese.core.next.impl.io.parser.ntriples.NTriplesParser; +import fr.inria.corese.core.next.impl.io.parser.rdfa.RDFaParser; import fr.inria.corese.core.next.impl.io.parser.rdfxml.RDFXMLParser; import fr.inria.corese.core.next.impl.io.parser.turtle.TurtleParser; import fr.inria.corese.core.next.impl.io.parser.trig.TriGParser; @@ -52,6 +53,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new TriGParser(model, factory, config); } else if(format == RDFFormat.RDFC_1_0) { return new NQuadsParser(model, factory, config); + } else if (format == RDFFormat.RDFa) { + return new RDFaParser(model, factory, config); } throw new IllegalArgumentException("Unsupported format: " + format); } @@ -77,6 +80,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new RDFXMLParser(model, factory); } else if (format == RDFFormat.TRIG) { return new TriGParser(model, factory); + } else if (format == RDFFormat.RDFa) { + return new RDFaParser(model, factory); } throw new IllegalArgumentException("Unsupported format: " + format); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContext.java new file mode 100644 index 000000000..088cf6c6a --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContext.java @@ -0,0 +1,176 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Value; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; + +import java.util.*; + +/** + * This class is to be used during the evaluation of an HTML file to generate triples during the DOM traversal. + * @see RDFa recommandation + */ +public class RDFaEvaluationContext { + + /** + * This will usually be the URL of the document being processed, but it could be some other URL, set by some other mechanism, such as the XHTML base element. The important thing is that it establishes a URL against which relative paths can be resolved. + */ + private IRI baseIri; + + /** + * The initial value will be the same as the initial value of [base], but it will usually change during the course of processing. + */ + private Resource parentSubjectResource ; + + /** + * In some situations the object of a statement becomes the subject of any nested statements, and this property is used to convey this value. Note that this value may be a bnode, since in some situations a number of nested statements are grouped together on one bnode. This means that the bnode must be set in the containing statement and passed down, and this property is used to convey this value. + */ + private Resource parentObjectResource = null; + + /** + * An index of locally defined IRI prefixes + */ + private Map uriMappings = new HashMap<>(); + + /** + * Set of statement in the process of building. + */ + private Set incompleteStatement = new HashSet<>(); + + /** + * The language of the document. Note that there is no default language. + */ + private String language = null; + + public RDFaEvaluationContext(IRI baseIri) { + this.baseIri = baseIri; + this.parentSubjectResource = baseIri; + } + + public RDFaEvaluationContext(IRI baseIri, IRI parentSubjectResource) { + this.baseIri = baseIri; + this.parentSubjectResource = parentSubjectResource; + } + + public RDFaEvaluationContext(RDFaEvaluationContext context) { + this.baseIri = context.baseIri; + this.parentSubjectResource = context.parentSubjectResource; + this.parentObjectResource = context.parentObjectResource; + this.uriMappings = new HashMap<>(context.uriMappings); + this.incompleteStatement = new HashSet<>(context.incompleteStatement); + this.language = context.language; + } + + public IRI baseIri() { + return baseIri; + } + + public RDFaEvaluationContext baseIri(IRI baseIri) { + this.baseIri = baseIri; + return this; + } + + public RDFaEvaluationContext incompleteStatements(Set incompleteStatement) { + this.incompleteStatement = new HashSet<>(incompleteStatement); + return this; + } + + public Iterator getIncompleteStatementIterator() { + return this.incompleteStatement.iterator(); + } + + public RDFaEvaluationContext addStatementWithoutSubject(IRI property, Value object) { + RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); + newStatement.setObject(object); + this.incompleteStatement.add(newStatement); + return this; + } + + public RDFaEvaluationContext addStatementWithoutObject(Resource subject, IRI property) { + RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); + newStatement.setSubject(subject); + this.incompleteStatement.add(newStatement); + return this; + } + + public void clearIncompleteStatements() { + this.incompleteStatement.clear(); + } + + public Resource parentSubjectResource() { + return parentSubjectResource; + } + + public RDFaEvaluationContext parentSubjectResource(Resource parentSubjectResource) { + this.parentSubjectResource = parentSubjectResource; + return this; + } + + public Resource parentObjectResource() { + return parentObjectResource; + } + + public RDFaEvaluationContext parentObjectResource(Resource parentObjectResource) { + this.parentObjectResource = parentObjectResource; + return this; + } + + public Map uriMappings() { + return uriMappings; + } + + public RDFaEvaluationContext uriMappings(Map uriMappings) { + this.uriMappings = uriMappings; + return this; + } + + public boolean hasUriMapping(String prefix) { + return this.uriMappings.containsKey(prefix); + } + + /** + * @param prefix the prefix WITHOUT ":" + * @return the IRI associated to the prefix in this context + */ + public IRI uriMapping(String prefix) { + return this.uriMappings.get(prefix); + } + + public void addUriMapping(String prefix, IRI prefixIri) { + this.uriMappings.put(prefix, prefixIri); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append("BaseURI: ").append(this.baseIri.stringValue()).append(" "); + sb.append("Mappings: ["); + this.uriMappings.forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") ")); + sb.append("] "); + if(this.parentSubjectResource != null) { + sb.append("Subject:").append(this.parentSubjectResource.stringValue()).append(" "); + } else { + sb.append("Subject:").append((Object) null).append(" "); + } + if(this.parentObjectResource != null) { + sb.append("Object: ").append(this.parentObjectResource.stringValue()).append(" "); + } else { + sb.append("Object: ").append((Object) null).append(" "); + } + if(! this.incompleteStatement.isEmpty()) { + sb.append(this.incompleteStatement.size()).append(" incomplete statements."); + } + + return sb.toString(); + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java deleted file mode 100644 index 48ed1e0f1..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContextHandler.java +++ /dev/null @@ -1,100 +0,0 @@ -package fr.inria.corese.core.next.impl.io.parser.rdfa; - -import fr.inria.corese.core.next.api.IRI; -import fr.inria.corese.core.next.api.Resource; -import fr.inria.corese.core.next.api.Statement; -import fr.inria.corese.core.next.api.ValueFactory; -import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; - -import java.util.*; - -/** - * This class is to be used during the evaluation of an HTML file to generate triples during the DOM traversal. - * @see RDFa recommandation - */ -public class RDFaEvaluationContextHandler { - - /** - * This will usually be the URL of the document being processed, but it could be some other URL, set by some other mechanism, such as the XHTML base element. The important thing is that it establishes a URL against which relative paths can be resolved. - */ - private IRI baseIri; - - /** - * The initial value will be the same as the initial value of [base], but it will usually change during the course of processing. - */ - private Resource parentSubjectResource ; - - /** - * In some situations the object of a statement becomes the subject of any nested statements, and this property is used to convey this value. Note that this value may be a bnode, since in some situations a number of nested statements are grouped together on one bnode. This means that the bnode must be set in the containing statement and passed down, and this property is used to convey this value. - */ - private Resource parentObjectResource = null; - - /** - * An index of locally defined IRI prefixes - */ - private Map uriMappings = new HashMap<>(); - - /** - * Set of statement in the process of building. - */ - private Set incompleteStatement = new HashSet<>(); - - /** - * The language of the document. Note that there is no default language. - */ - private String language = null; - - public RDFaEvaluationContextHandler(IRI baseIri) { - this.baseIri = baseIri; - this.parentSubjectResource = baseIri; - } - - public RDFaEvaluationContextHandler(IRI baseIri, IRI parentSubjectResource) { - this.baseIri = baseIri; - this.parentSubjectResource = parentSubjectResource; - } - - public IRI getBaseIri() { - return baseIri; - } - - public void setBaseIri(IRI baseIri) { - this.baseIri = baseIri; - } - - public Resource getParentSubjectResource() { - return parentSubjectResource; - } - - public void setParentSubjectResource(Resource parentSubjectResource) { - this.parentSubjectResource = parentSubjectResource; - } - - public Resource getParentObjectResource() { - return parentObjectResource; - } - - public void setParentObjectResource(Resource parentObjectResource) { - this.parentObjectResource = parentObjectResource; - } - - public Map getUriMappings() { - return uriMappings; - } - - public void setUriMappings(Map uriMappings) { - this.uriMappings = uriMappings; - } - - /** - * @param prefix the prefix WITHOUT ":" - * @return the IRI associated to the prefix in this context - */ - public IRI getUriMapping(String prefix) { - return this.uriMappings.get(prefix); - } - - public void addUriMapping(String prefix, IRI prefixIri) { - this.uriMappings.put(prefix, prefixIri); - } -} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index b1758db9a..1c15416b1 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -4,6 +4,8 @@ import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.impl.common.util.IRIUtils; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.exception.ParsingErrorException; import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; @@ -11,19 +13,36 @@ import org.jsoup.nodes.Attribute; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.InputStream; import java.io.Reader; -import java.util.Iterator; -import java.util.Objects; -import java.util.concurrent.atomic.AtomicReference; +import java.util.*; public class RDFaParser extends AbstractRDFParser { - protected RDFaParser(Model model, ValueFactory factory) { + + private static final Logger logger = LoggerFactory.getLogger(RDFaParser.class); + + private static final String REL_ATTR = "rel"; + private static final String REV_ATTR = "rev"; + private static final String CONTENT_ATTR = "content"; + private static final String HREF_ATTR = "href"; + private static final String SRC_ATTR = "src"; + private static final String ABOUT_ATTR = "about"; + private static final String PROPERTY_ATTR = "property"; + private static final String RESOURCE_ATTR = "resource"; + private static final String DATATYPE_ATTR = "datatype"; + private static final String TYPEOF_ATTR = "typeof"; + private static final String LANG_ATTR = "xml:lang"; + + private static final String XMLNS_PREFIX = "xmlns"; + + public RDFaParser(Model model, ValueFactory factory) { this(model, factory, new RDFaParserOptions.Builder().build()); } - protected RDFaParser(Model model, ValueFactory factory, IOOptions config) { + public RDFaParser(Model model, ValueFactory factory, IOOptions config) { super(model, factory, config); } @@ -47,59 +66,216 @@ public void parse(InputStream in, String baseURIString) { /** * Intermediary function to configure the processing of a document using some basic HTML traversal to determine if a baseIri has been defined in the document. * If the baseIri in argument is the Corese default base IRI, the value stored in the document is used instead. + * * @param document Jsoup HTML document to be processed - * @param baseIri An IRI object + * @param baseIri An IRI object */ private void processDocument(Document document, IRI baseIri) { - - - // If the base Iri in argument is not the default baseIri, then we take it, else we use the one in the document - if(baseIri.stringValue().equals(ParserConstants.getDefaultBaseURI())) { + if (baseIri.stringValue().equals(ParserConstants.getDefaultBaseURI())) { // Looking for the node in the document IRI baseIriFromXml = baseIri; Iterator baseElementIterator = document.stream().filter(element -> element.nameIs("base")).iterator(); - while(baseElementIterator.hasNext()) { + while (baseElementIterator.hasNext()) { Element baseElement = baseElementIterator.next(); Attribute baseElementHrefAttribute = baseElement.attribute("href"); - if(baseElementHrefAttribute != null) { + if (baseElementHrefAttribute != null) { String baseIriString = baseElementHrefAttribute.getValue(); baseIriFromXml = getValueFactory().createIRI(baseIriString); } - }; + } + ; baseIri = this.getValueFactory().createIRI(baseIriFromXml.stringValue()); } - Iterator elementIt = document.stream().iterator(); - while (elementIt.hasNext()) { - Element element = elementIt.next(); - processElement(element, new RDFaEvaluationContextHandler(baseIri), baseIri); + for (Element element : document.children()) { + processElement(element, new RDFaEvaluationContext(baseIri), baseIri); } } /** * - * @param element Current element - * @param context Active context - * @param recursive Processing generally continues recursively through the entire tree of elements available. However, if an author indicates that some branch of the tree should be treated as an XML literal, no further processing should take place on that branch, and setting this flag to false would have that effect. + * @param element Current element + * @param context Active context + * @param recursive Processing generally continues recursively through the entire tree of elements available. However, if an author indicates that some branch of the tree should be treated as an XML literal, no further processing should take place on that branch, and setting this flag to false would have that effect. * @param skipElement Flag thet indicates whether the [current element] can safely be ignored since it has no relevant RDFa attributes. Note that descendant elements will still be processed. - * @param newSubject A [new subject] value, which once calculated will set the [parent subject] property in an [evaluation context], as well as being used to complete any [incomplete triple]s * @see RDFa processing in details */ - private void processElement(Element element, RDFaEvaluationContextHandler context, boolean recursive, boolean skipElement, Resource newSubject, Value currentObject) { + private void processElement(Element element, RDFaEvaluationContext context, boolean recursive, boolean skipElement) { + logger.debug("processElement({}, {}, ...)", element, context); + + Resource newSubject = null; + Value currentObject = null; + Map currentMappings = context.uriMappings(); + Set incompleteStatementSet = new HashSet<>(); + String language = context.getLanguage(); + // Looking for namespace declarations + // Namespace declaration are done using the XML namespace declaration mechanism, that can be seen as an attributes prefixed by "xmlns" and looks like this: "xmlns:prefix=namespace" + element.attributes().forEach(attribute -> { + logger.debug("Looking at attribute {}", attribute.getKey()); + if (attribute.getKey().startsWith(XMLNS_PREFIX)) { + String prefixName = attribute.localName(); + String prefixNamespace = attribute.getValue(); + logger.debug("Prefix found {} = {}", prefixName, prefixNamespace); + context.addUriMapping(prefixName, getValueFactory().createIRI(prefixNamespace)); + } + }); + + if (element.attribute(LANG_ATTR) != null) { + String langString = element.attr(LANG_ATTR); + language = langString; + } + if(element.attribute(REL_ATTR) == null && element.attribute(REV_ATTR) == null) { + // [new subject] is set to the URI obtained from the first match from the following rules: + if (element.attribute(ABOUT_ATTR) != null) { // by using the URI from @about, if present, obtained according to the section on CURIE and URI Processing; + Optional newSubjectResource = getResourceFromElementAttribute(element, ABOUT_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + logger.debug("@about found: {}", newSubjectResource.get().stringValue()); + } + } else if (element.attribute(SRC_ATTR) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. + Optional newSubjectResource = getResourceFromElementAttribute(element, SRC_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + logger.debug("@src found: {}", newSubjectResource.get().stringValue()); + } + } else if (element.attribute(RESOURCE_ATTR) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; + Optional newSubjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + logger.debug("@resource found: {}", newSubjectResource.get().stringValue()); + } + } else if (element.attribute(HREF_ATTR) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. + Optional newSubjectResource = getResourceFromElementAttribute(element, HREF_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + logger.debug("href found: {}", newSubjectResource.get()); + } + } else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above; + newSubject = context.baseIri(); + } else if (element.attribute(TYPEOF_ATTR) != null) { // if @typeof is present, obtained according to the section on CURIE and URI Processing, then [new subject] is set to be a newly created [bnode]. + newSubject = this.getValueFactory().createBNode(); + } else if (context.parentObjectResource() != null) { // otherwise, if [parent object] is present, [new subject] is set to the value of [parent object]. Additionally, if @property is not present then the [skip element] flag is set to 'true'; + newSubject = context.parentObjectResource(); + if(element.attribute(PROPERTY_ATTR) == null) { + skipElement = true; + } + } + } else { + // [new subject] is set to the URI obtained from the first match from the following rules: + if (element.attribute(ABOUT_ATTR) != null) { // by using the URI from @about, if present, obtained according to the section on CURIE and URI Processing; + Optional newSubjectResource = getResourceFromElementAttribute(element, ABOUT_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + logger.debug("@about found: {}", newSubjectResource.get()); + } + } else if (element.attribute(SRC_ATTR) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. + Optional newSubjectResource = getResourceFromElementAttribute(element, SRC_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + logger.debug("@src found: {}", newSubjectResource.get()); + } + } else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above; + newSubject = context.baseIri(); + } else if (element.attribute(TYPEOF_ATTR) != null) { // if @typeof is present, obtained according to the section on CURIE and URI Processing, then [new subject] is set to be a newly created [bnode]. + newSubject = this.getValueFactory().createBNode(); + } else if(context.parentObjectResource() != null) { // otherwise, if [parent object] is present, [new subject] is set to that. + newSubject = context.parentObjectResource(); + } + + // Then the [current object resource] is set to the URI obtained from the first match from the following rules: + if (element.attribute(RESOURCE_ATTR) != null) { // by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; + Optional newObjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); + if (newObjectResource.isPresent()) { + currentObject = newObjectResource.get(); + logger.debug("@resource found: {}", newObjectResource.get().stringValue()); + } + } else if (element.attribute(HREF_ATTR) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. + Optional newObjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); + if (newObjectResource.isPresent()) { + currentObject = newObjectResource.get(); + logger.debug("href found: {}", newObjectResource.get().stringValue()); + } + } + } + + if (newSubject != null) + logger.debug("New subject resolved to {}", newSubject.stringValue()); + if(currentObject != null) + logger.debug("New object resolved to {}", currentObject.stringValue()); + + // If in any of the previous steps a [new subject] was set to a non-null value, it is now used to provide a subject for type values; + if(newSubject != null) { + if(element.attribute(TYPEOF_ATTR) != null) { // One or more 'types' for the [new subject] can be set by using @typeof. If present, the attribute must contain one or more URIs, obtained according to the section on URI and CURIE Processing, each of which is used to generate a triple as follows: + Optional typeIri = getResourceFromElementAttribute(element, TYPEOF_ATTR, context); + if (typeIri.isPresent()) { + logger.debug("Typeof found: {}", typeIri.get()); + logger.debug("Type of resource resolved to {} {}", typeIri.get().stringValue(), context); + Statement stat = this.getValueFactory().createStatement(newSubject, RDF.type.getIRI(), typeIri.get()); + logger.debug("Statement added: {} {} {}", stat.getSubject().stringValue(), stat.getPredicate().stringValue(), stat.getObject().stringValue()); + this.getModel().add(stat); + } else { + throw new ParsingErrorException("Typeof statement uses unknown type " + element.attr(TYPEOF_ATTR)); + } + } + } + // If however [current object resource] was set to null, but there are predicates present, then they must be stored as [incomplete triple]s, pending the discovery of a subject that can be used as the object. Also, [current object resource] should be set to a newly created [bnode]; + if (currentObject == null && (element.attribute(REL_ATTR) != null || element.attribute(REV_ATTR) != null)) { + currentObject = getValueFactory().createBNode(); + if(element.attribute(REL_ATTR) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { + IRI property = (IRI) propertyOpt.get(); + RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); + statement.setSubject(newSubject); + incompleteStatementSet.add(statement); + } + } + if(element.attribute(REV_ATTR) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { + IRI property = (IRI) propertyOpt.get(); + RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); + statement.setObject(newSubject); + incompleteStatementSet.add(statement); + } + } + } + + if (element.attribute(TYPEOF_ATTR) != null) { + String typeIriString = element.attr(TYPEOF_ATTR); + logger.debug("Typeof found: {}", typeIriString); + if(context.parentSubjectResource().equals(context.baseIri())) { // Not current subjet was setup using about or src, so we are implicitly creating a blank node + context.parentSubjectResource(this.getValueFactory().createBNode()); + } + Optional typeIri = resolveStringResource(typeIriString, context); + if (typeIri.isPresent()) { + logger.debug("Type of resource resolved to {} {}", typeIri.get().stringValue(), context); + Statement stat = this.getValueFactory().createStatement(context.parentSubjectResource(), RDF.type.getIRI(), typeIri.get()); + logger.debug("Statement added: {} {} {}", stat.getSubject().stringValue(), stat.getPredicate().stringValue(), stat.getObject().stringValue()); + this.getModel().add(stat); + } else { + throw new ParsingErrorException("Typeof statement uses unknown type " + typeIriString); + } + } + + for (Element child : element.children()) { + processElement(child, context, recursive, skipElement); + } } /** * Surcharge function that initialize the flags and subject and objet to their initial values for processing + * * @param element * @param context * @param newSubject */ - private void processElement(Element element, RDFaEvaluationContextHandler context, Resource newSubject) { - processElement(element, context, true, false, newSubject, null); + private void processElement(Element element, RDFaEvaluationContext context, Resource newSubject) { + processElement(element, context, true, false); } @Override @@ -113,4 +289,68 @@ private Statement incompleteStatementToStatement(RDFaIncompleteStatement incompl return this.getValueFactory().createStatement(incompleteStatement.getSubject(), incompleteStatement.getPredicate(), incompleteStatement.getObject()); } + + /** + * Resolves the string representation of a resource found in attributes of an element, be it an IRI, CURIE or relative URI + * + * @param stringResource the resource as stored in the attribute of the HTML element + * @param context the context of the element evalation + * @return the full IRI if it is a relative IRI, full IRI or CURIE, nothing otherwise + */ + private Optional resolveStringResource(String stringResource, RDFaEvaluationContext context) { + logger.debug("Resolution of resource {}, {}", stringResource, context); + String resultString = stringResource; + if (resultString.startsWith("[") && resultString.endsWith("]")) { + resultString = resultString.replaceFirst("\\[", ""); + resultString = resultString.replaceFirst("]", ""); + } + + + if (stringUriIsCURIE(resultString)) { // CURIE + int colonIndex = resultString.indexOf(":"); + String prefixString = resultString.substring(0, colonIndex); + String localNameString = resultString.substring(colonIndex + 1); + logger.debug("CURIE with prefix: {} and local name: {}", prefixString, localNameString); + // Basic resolution following https://www.w3.org/TR/rdfa-syntax/#s_convertingcurietouri + if (context.hasUriMapping(prefixString)) { + IRI namespaceIRI = context.uriMapping(prefixString); + + return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); + } else if (prefixString.isEmpty()) { // CURIE is relative to the base URI + return Optional.of(this.getValueFactory().createIRI(context.baseIri().stringValue(), localNameString)); + } else { + throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix"); + } + } else if (IRIUtils.isStandardIRI(resultString)) { // Full IRI + logger.debug("Standard IRI: {}", resultString); + return Optional.of(this.getValueFactory().createIRI(resultString)); + + } else if (resultString.startsWith("_:")) { // Blank Node + int colonIndex = resultString.indexOf(":"); + String localNameString = resultString.substring(colonIndex + 1); + logger.debug("Blank Node: _:{}", localNameString); + return Optional.of(this.getValueFactory().createBNode(localNameString)); + } + return Optional.empty(); + } + + /** + * Equivalent to test if it has a colon, and it is not a blank node + * + * @param stringIri + * @return + */ + private boolean stringUriIsCURIE(String stringIri) { + int colonIndex = stringIri.indexOf(":"); + return colonIndex > -1 && !stringIri.contains("://") && !stringIri.startsWith("_:") && !stringIri.startsWith("[_:"); + } + + private Optional getResourceFromElementAttribute(Element element, String attributeName, RDFaEvaluationContext context) { + if (element.attribute(attributeName) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; + String newSubjectString = element.attr(attributeName); + return resolveStringResource(newSubjectString, context); + + } + return Optional.empty(); + } } diff --git a/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java b/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java index e5859edfa..4d4c1bfe3 100644 --- a/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java +++ b/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java @@ -298,7 +298,7 @@ void allFormats() { List allFormats = RDFFormat.all(); assertNotNull(allFormats, "List of all formats should not be null"); - assertEquals(6, allFormats.size(), "List should contain 5 predefined formats"); // TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG + assertEquals(8, allFormats.size(), "List should contain 5 predefined formats"); // TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG assertTrue(allFormats.contains(RDFFormat.TURTLE)); assertTrue(allFormats.contains(RDFFormat.NTRIPLES)); @@ -306,6 +306,8 @@ void allFormats() { assertTrue(allFormats.contains(RDFFormat.JSONLD)); assertTrue(allFormats.contains(RDFFormat.RDFXML)); assertTrue(allFormats.contains(RDFFormat.TRIG)); + assertTrue(allFormats.contains(RDFFormat.RDFa)); + assertTrue(allFormats.contains(RDFFormat.RDFC_1_0)); assertThrows(UnsupportedOperationException.class, () -> allFormats.add(RDFFormat.TURTLE), "The list returned by all() should be unmodifiable"); diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java index dcb30eaea..ddcadbe46 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -1,17 +1,33 @@ package fr.inria.corese.core.next.impl.io.parser.rdfa; +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.common.vocabulary.XSD; +import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; +import java.io.ByteArrayInputStream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + public class RDFaParserTest { + private static final ValueFactory factory = new CoreseAdaptedValueFactory(); + @Test public void basicDocTest() { - String docString = """ + String testDataString = """ + Test 0001 @@ -19,8 +35,90 @@ public void basicDocTest() { """; - /* - "Mark Birbeck" . - */ + Model testModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes())); + + IRI subject = factory.createIRI("http://www.w3.org/2006/07/SWD/RDFa/testsuite/xhtml1-testcases/photo1.jpg"); + IRI predicate = factory.createIRI("http://purl.org/dc/elements/1.1/creator"); + Literal object = factory.createLiteral("Mark Birbeck"); + + assertTrue(testModel.contains(subject, predicate, object)); + } + + @Test + public void aboutTest() { + String testDataString = """ + + + + +

+ Hello, I'm Pierre. +

+ + """; + + Model testModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI subject = factory.createIRI("http://w3id.org/people/pierre-maillot"); + IRI object = factory.createIRI("http://xmlns.com/foaf/0.1/Person"); + + assertEquals(1, testModel.size()); + assertTrue(testModel.contains(subject, RDF.type.getIRI(), object)); + } + + @Test + public void basicChainTest() { + String testDataString = """ + + + + + +
+ Albert Einstein + 1879-03-14 +
+ Federal Republic of Germany +
+ + + """; + + Model testModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI dateOfBirth = factory.createIRI("http://dbpedia.org/property/dateOfBirth"); + IRI foafName = factory.createIRI("http://xmlns.com/foaf/0.1/name"); + IRI birthPlace = factory.createIRI("http://dbpedia.org/property/birthPlace"); + IRI germany = factory.createIRI("http://dbpedia.org/resource/Germany"); + IRI conventionalLongName = factory.createIRI("http://dbpedia.org/property/conventionalLongName"); + Literal aeName = factory.createLiteral("Albert Einstein"); + Literal aeDateOfBirth = factory.createLiteral("1879-03-14", XSD.xsdDate.getIRI()); + Literal gerLongName = factory.createLiteral("Federal Republic of Germany"); + + Statement aeNameStatement = factory.createStatement(albertEinstein, foafName, aeName); + Statement aeDateOfBirthStatement = factory.createStatement(albertEinstein, dateOfBirth, aeDateOfBirth); + Statement aeBirthPlaceStatement = factory.createStatement(albertEinstein, birthPlace, germany); + Statement germanyNameStatement = factory.createStatement(germany, conventionalLongName, gerLongName); + + assertEquals(4, testModel.size()); + assertTrue(testModel.contains(aeNameStatement)); + assertTrue(testModel.contains(aeDateOfBirthStatement)); + assertTrue(testModel.contains(aeBirthPlaceStatement)); + assertTrue(testModel.contains(germanyNameStatement)); + } } From 8d7795d92077c9aee66b53eb4aeaf0eac16247b0 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Wed, 5 Nov 2025 15:05:07 +0100 Subject: [PATCH 05/14] Full algorithm implemented --- .../next/impl/io/parser/rdfa/RDFaParser.java | 132 ++++++++++++++---- .../rdfa/model/RDFaIncompleteStatement.java | 37 ++++- .../impl/io/parser/rdfa/RDFaParserTest.java | 3 +- 3 files changed, 142 insertions(+), 30 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index 1c15416b1..e47c26ff5 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -105,29 +105,35 @@ private void processDocument(Document document, IRI baseIri) { private void processElement(Element element, RDFaEvaluationContext context, boolean recursive, boolean skipElement) { logger.debug("processElement({}, {}, ...)", element, context); + // 1. First, the local values are initialized Resource newSubject = null; - Value currentObject = null; + Resource currentObject = null; + Literal currentObjectLiteral = null; Map currentMappings = context.uriMappings(); Set incompleteStatementSet = new HashSet<>(); String language = context.getLanguage(); + // 2. Next the [current element] is parsed for [URI mapping]s and these are added to the [local list of URI mappings]. Note that a [URI mapping] will simply overwrite any current mapping in the list that has the same name; // Looking for namespace declarations // Namespace declaration are done using the XML namespace declaration mechanism, that can be seen as an attributes prefixed by "xmlns" and looks like this: "xmlns:prefix=namespace" - element.attributes().forEach(attribute -> { - logger.debug("Looking at attribute {}", attribute.getKey()); + Iterator itAttribute = element.attributes().iterator(); + while(itAttribute.hasNext()) { + Attribute attribute = itAttribute.next(); if (attribute.getKey().startsWith(XMLNS_PREFIX)) { String prefixName = attribute.localName(); String prefixNamespace = attribute.getValue(); - logger.debug("Prefix found {} = {}", prefixName, prefixNamespace); + logger.debug("Mapping: {} = {}", prefixName, prefixNamespace); context.addUriMapping(prefixName, getValueFactory().createIRI(prefixNamespace)); } - }); + } + // 3. The [current element] is also parsed for any language information, and if present, [current language] is set accordingly; if (element.attribute(LANG_ATTR) != null) { String langString = element.attr(LANG_ATTR); language = langString; } + // 4. If the [current element] contains no @rel or @rev attribute, then the next step is to establish a value for [new subject]. Any of the attributes that can carry a resource can set [new subject]; if(element.attribute(REL_ATTR) == null && element.attribute(REV_ATTR) == null) { // [new subject] is set to the URI obtained from the first match from the following rules: if (element.attribute(ABOUT_ATTR) != null) { // by using the URI from @about, if present, obtained according to the section on CURIE and URI Processing; @@ -207,63 +213,133 @@ private void processElement(Element element, RDFaEvaluationContext context, bool if(currentObject != null) logger.debug("New object resolved to {}", currentObject.stringValue()); - // If in any of the previous steps a [new subject] was set to a non-null value, it is now used to provide a subject for type values; + // 6. If in any of the previous steps a [new subject] was set to a non-null value, it is now used to provide a subject for type values; if(newSubject != null) { if(element.attribute(TYPEOF_ATTR) != null) { // One or more 'types' for the [new subject] can be set by using @typeof. If present, the attribute must contain one or more URIs, obtained according to the section on URI and CURIE Processing, each of which is used to generate a triple as follows: Optional typeIri = getResourceFromElementAttribute(element, TYPEOF_ATTR, context); if (typeIri.isPresent()) { - logger.debug("Typeof found: {}", typeIri.get()); - logger.debug("Type of resource resolved to {} {}", typeIri.get().stringValue(), context); Statement stat = this.getValueFactory().createStatement(newSubject, RDF.type.getIRI(), typeIri.get()); - logger.debug("Statement added: {} {} {}", stat.getSubject().stringValue(), stat.getPredicate().stringValue(), stat.getObject().stringValue()); this.getModel().add(stat); } else { throw new ParsingErrorException("Typeof statement uses unknown type " + element.attr(TYPEOF_ATTR)); } } } - // If however [current object resource] was set to null, but there are predicates present, then they must be stored as [incomplete triple]s, pending the discovery of a subject that can be used as the object. Also, [current object resource] should be set to a newly created [bnode]; - if (currentObject == null && (element.attribute(REL_ATTR) != null || element.attribute(REV_ATTR) != null)) { - currentObject = getValueFactory().createBNode(); + + // 7. If in any of the previous steps a [current object resource] was set to a non-null value, it is now used to generate triples: + if (currentObject != null && (element.attribute(REL_ATTR) != null || element.attribute(REV_ATTR) != null)) { if(element.attribute(REL_ATTR) != null) { Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { IRI property = (IRI) propertyOpt.get(); RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); statement.setSubject(newSubject); + statement.setObject(currentObject); incompleteStatementSet.add(statement); } } if(element.attribute(REV_ATTR) != null) { Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); - if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { + if(propertyOpt.isPresent() && propertyOpt.get().isIRI() && currentObject.isResource()) { IRI property = (IRI) propertyOpt.get(); RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); statement.setObject(newSubject); + statement.setSubject((Resource) currentObject); incompleteStatementSet.add(statement); } } } - if (element.attribute(TYPEOF_ATTR) != null) { - String typeIriString = element.attr(TYPEOF_ATTR); - logger.debug("Typeof found: {}", typeIriString); - if(context.parentSubjectResource().equals(context.baseIri())) { // Not current subjet was setup using about or src, so we are implicitly creating a blank node - context.parentSubjectResource(this.getValueFactory().createBNode()); + // 8. If however [current object resource] was set to null, but there are predicates present, then they must be stored as [incomplete triple]s, pending the discovery of a subject that can be used as the object. Also, [current object resource] should be set to a newly created [bnode]; + if (currentObject == null && (element.attribute(REL_ATTR) != null || element.attribute(REV_ATTR) != null)) { + currentObject = getValueFactory().createBNode(); + if(element.attribute(REL_ATTR) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { + IRI property = (IRI) propertyOpt.get(); + RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); + incompleteStatementSet.add(statement); + } } - Optional typeIri = resolveStringResource(typeIriString, context); - if (typeIri.isPresent()) { - logger.debug("Type of resource resolved to {} {}", typeIri.get().stringValue(), context); - Statement stat = this.getValueFactory().createStatement(context.parentSubjectResource(), RDF.type.getIRI(), typeIri.get()); - logger.debug("Statement added: {} {} {}", stat.getSubject().stringValue(), stat.getPredicate().stringValue(), stat.getObject().stringValue()); - this.getModel().add(stat); - } else { - throw new ParsingErrorException("Typeof statement uses unknown type " + typeIriString); + if(element.attribute(REV_ATTR) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if(propertyOpt.isPresent() && propertyOpt.get().isIRI() && currentObject.isResource()) { + IRI property = (IRI) propertyOpt.get(); + RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); + statement.setBackward(); + incompleteStatementSet.add(statement); + } + } + } + + // 9. The next step of the iteration is to establish any [current object literal]; + if(element.attribute(PROPERTY_ATTR) != null) { // Predicates for the [current object literal] can be set by using @property. If present, one or more URIs are obtained according to the section on CURIE and URI Processing, and then the actual literal value is obtained as follows: + Optional propertyOpt = getResourceFromElementAttribute(element, PROPERTY_ATTR, context); + if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { + IRI property = (IRI)propertyOpt.get(); + logger.debug("Property found: {}", property.stringValue()); + + IRI datatype = null; + if(element.attribute(DATATYPE_ATTR) != null && ! element.attr(DATATYPE_ATTR).isEmpty()) { + Optional datatypeOpt = getResourceFromElementAttribute(element, DATATYPE_ATTR, context); + if(datatypeOpt.isPresent() && datatypeOpt.get().isIRI() && ! datatypeOpt.get().equals(RDF.XMLLiteral.getIRI())) { + datatype = (IRI) datatypeOpt.get(); + } + } + String value = element.text(); + if(element.attribute(CONTENT_ATTR) != null) { + value = element.attr(CONTENT_ATTR); + } + if(datatype != null) { + logger.debug("Literal value: {}, datatype: {}", value, datatype.stringValue()); + currentObjectLiteral = this.getValueFactory().createLiteral(value, datatype); + recursive = false; + } else if(language != null) { + logger.debug("Literal value: {}, language: {}", value, language); + currentObjectLiteral = this.getValueFactory().createLiteral(value, language); + } else { + logger.debug("Literal value: {}", value); + currentObjectLiteral = this.getValueFactory().createLiteral(value); + } + + this.getModel().add(newSubject, property, currentObjectLiteral); } } - for (Element child : element.children()) { - processElement(child, context, recursive, skipElement); + // 10. If the [skip element] flag is 'false', and [new subject] was set to a non-null value, then any [incomplete triple]s within the current context should be completed: + Iterator itStat = context.getIncompleteStatementIterator(); + while(itStat.hasNext()) { + RDFaIncompleteStatement statement = itStat.next(); + if(statement.isForward()) { + this.getModel().add(context.parentSubjectResource(), statement.getPredicate(), newSubject); + } else if (statement.isBackward()){ + this.getModel().add(newSubject, statement.getPredicate(), context.parentSubjectResource()); + } + } + + // 11. If the [recurse] flag is 'true', all elements that are children of the [current element] are processed using the rules described here, using a new [evaluation context], + if(recursive) { + if(skipElement) { + RDFaEvaluationContext newContext = new RDFaEvaluationContext(context); + newContext.setLanguage(language); + newContext.uriMappings(currentMappings); + context = newContext; + } else { + context = new RDFaEvaluationContext(context.baseIri()); + if(newSubject != null) { + context.parentObjectResource(newSubject); + } + if(currentObject != null) { + context.parentObjectResource(currentObject); + } + context.uriMappings(currentMappings); + context.incompleteStatements(incompleteStatementSet); + context.setLanguage(language); + } + + for (Element child : element.children()) { + processElement(child, context, recursive, skipElement); + } } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java index bff3564a4..0bc6f755d 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java @@ -12,13 +12,48 @@ public class RDFaIncompleteStatement { private Resource subject = null; private IRI predicate = null; private Value object = null; + private Direction direction = Direction.FORWARD; - public RDFaIncompleteStatement() { + public enum Direction { + FORWARD, + BACKWARD + } + + private RDFaIncompleteStatement() { } public RDFaIncompleteStatement(IRI predicate) { + this.predicate = predicate; + } + + public RDFaIncompleteStatement(IRI predicate, Direction direction) { + this.predicate = predicate; + this.direction = direction; + } + + public boolean isForward() { + return this.direction == Direction.FORWARD; + } + + public boolean isBackward() { + return this.direction == Direction.BACKWARD; + } + + public Direction getDirection() { + return this.direction; + } + + public void setForward() { + this.direction = Direction.FORWARD; + } + + public void setBackward() { + this.direction = Direction.BACKWARD; + } + public void setDirection(Direction direction) { + this.direction = direction; } public Resource getSubject() { diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java index ddcadbe46..005e58dce 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -46,6 +46,7 @@ public void basicDocTest() { Literal object = factory.createLiteral("Mark Birbeck"); assertTrue(testModel.contains(subject, predicate, object)); + assertEquals(1, testModel.size()); } @Test @@ -79,7 +80,7 @@ public void basicChainTest() { String testDataString = """ - +
From f0cddecf55680e0e00be05bb5ca8d5ac7a87284e Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Wed, 5 Nov 2025 17:45:16 +0100 Subject: [PATCH 06/14] Fixing IRIUtils for IRI without a localname --- .../corese/core/next/impl/common/util/IRIUtils.java | 11 +++++++++-- .../core/next/impl/common/util/IRIUtilsTest.java | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java index 36c427849..0c7e47866 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java @@ -1,5 +1,8 @@ package fr.inria.corese.core.next.impl.common.util; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.net.URI; import java.net.URISyntaxException; import java.util.Set; @@ -14,7 +17,9 @@ */ public class IRIUtils { - private static final Pattern IRI_PATTERN = Pattern.compile("^(?(?[\\w\\-]+):(?\\/\\/)?(?([\\w\\-_:@]+\\.)*[\\w\\-_:]*))((?\\/([\\w\\-\\._\\:]+\\/)*)(?[\\w\\-\\._\\:]+)?(?\\?[\\w\\-_\\:\\?\\=]+)?(\\#)?(?([\\w\\-_]+))?)?$"); + private static final Logger logger = LoggerFactory.getLogger(IRIUtils.class); + + private static final Pattern IRI_PATTERN = Pattern.compile("^(?(?[\\w\\-]+):(?\\/\\/)?(?([\\w\\-_:@]+\\.)*[\\w\\-_:]*))((?\\/([\\w\\-\\._\\:]+\\/)*)(?[\\w\\-\\._\\:]+)?(?\\?[\\w\\-_\\:\\?\\=]+)?(?(\\#))?(?([\\w\\-_]+))?)?$"); private static final Pattern STANDARD_IRI_PATTERN = Pattern.compile("^(([^:/?#\\s]+):)(\\/\\/([^/?#\\s]*))?([^?#\\s]*)(\\?([^#\\s]*))?(#(.*))?"); private static final int MAX_IRI_LENGTH = 2048; private static final long REGEX_TIMEOUT_MS = 100; @@ -40,6 +45,7 @@ public static String guessNamespace(String iri) { if (matcher == null || !matcher.matches()) { return ""; } else if (matcher.matches()) { + logger.debug("namespace {} protocol {} dblSlashes {} domain {} path {} finalPath {} query {} anchor {} fragment {}", matcher.group("namespace"), matcher.group("protocol"), matcher.group("dblSlashes"), matcher.group("domain"), matcher.group("path"), matcher.group("finalPath"), matcher.group("query"), matcher.group("anchor"), matcher.group("fragment")); if (matcher.group("protocol") != null && matcher.group("protocol").equals("_")) { return ""; } @@ -52,9 +58,10 @@ public static String guessNamespace(String iri) { if(matcher.group("path") != null) { namespace.append(matcher.group("path")); } - if(matcher.group("fragment") != null && matcher.group("finalPath") != null) { + if((matcher.group("fragment") != null || matcher.group("anchor") != null) && matcher.group("finalPath") != null) { namespace.append(matcher.group("finalPath")).append("#"); } + return namespace.toString(); } else { throw new IllegalStateException("No namespace found for the given IRI: " + iri + "."); diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/util/IRIUtilsTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/util/IRIUtilsTest.java index e7620f5ed..1c4aff026 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/common/util/IRIUtilsTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/common/util/IRIUtilsTest.java @@ -4,6 +4,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import fr.inria.corese.core.next.impl.common.literal.XSD; import org.junit.jupiter.api.Test; public class IRIUtilsTest { @@ -39,6 +40,7 @@ public void guessNamespaceTest() { assertEquals("https://www.syuno-pit.biz/tezukayama-bandai-2.html#", IRIUtils.guessNamespace(uriToHTMLPageWithQueryAndFragment)); assertEquals("https://www.syuno-pit.biz/tezukayama-bandai-2.html#", IRIUtils.guessNamespace(uriToHTMLPageWithFragment)); assertEquals("", IRIUtils.guessNamespace(blankNode)); + assertEquals("http://www.w3.org/2001/XMLSchema#", IRIUtils.guessNamespace("http://www.w3.org/2001/XMLSchema#")); } @Test From fd8cabe004745a9e2402952ec158a66fc1fa158b Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Thu, 6 Nov 2025 11:37:01 +0100 Subject: [PATCH 07/14] fix step 7 --- .../core/next/impl/common/util/IRIUtils.java | 1 - .../next/impl/io/parser/rdfa/RDFaParser.java | 31 +++++-------------- .../impl/io/parser/rdfa/RDFaParserTest.java | 25 ++++++++++++--- 3 files changed, 28 insertions(+), 29 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java index 0c7e47866..94cd4ba56 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java @@ -45,7 +45,6 @@ public static String guessNamespace(String iri) { if (matcher == null || !matcher.matches()) { return ""; } else if (matcher.matches()) { - logger.debug("namespace {} protocol {} dblSlashes {} domain {} path {} finalPath {} query {} anchor {} fragment {}", matcher.group("namespace"), matcher.group("protocol"), matcher.group("dblSlashes"), matcher.group("domain"), matcher.group("path"), matcher.group("finalPath"), matcher.group("query"), matcher.group("anchor"), matcher.group("fragment")); if (matcher.group("protocol") != null && matcher.group("protocol").equals("_")) { return ""; } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index e47c26ff5..43d29560c 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -121,9 +121,9 @@ private void processElement(Element element, RDFaEvaluationContext context, bool Attribute attribute = itAttribute.next(); if (attribute.getKey().startsWith(XMLNS_PREFIX)) { String prefixName = attribute.localName(); - String prefixNamespace = attribute.getValue(); - logger.debug("Mapping: {} = {}", prefixName, prefixNamespace); - context.addUriMapping(prefixName, getValueFactory().createIRI(prefixNamespace)); + IRI prefixNamespace = getValueFactory().createIRI(attribute.getValue(), ""); + logger.debug("Mapping: {} = {}", prefixName, prefixNamespace.stringValue()); + context.addUriMapping(prefixName, prefixNamespace); } } @@ -211,7 +211,7 @@ private void processElement(Element element, RDFaEvaluationContext context, bool if (newSubject != null) logger.debug("New subject resolved to {}", newSubject.stringValue()); if(currentObject != null) - logger.debug("New object resolved to {}", currentObject.stringValue()); + logger.debug("Current object resolved to {}", currentObject.stringValue()); // 6. If in any of the previous steps a [new subject] was set to a non-null value, it is now used to provide a subject for type values; if(newSubject != null) { @@ -232,20 +232,14 @@ private void processElement(Element element, RDFaEvaluationContext context, bool Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { IRI property = (IRI) propertyOpt.get(); - RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); - statement.setSubject(newSubject); - statement.setObject(currentObject); - incompleteStatementSet.add(statement); + this.getModel().add(newSubject, property, currentObject); } } if(element.attribute(REV_ATTR) != null) { Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); if(propertyOpt.isPresent() && propertyOpt.get().isIRI() && currentObject.isResource()) { IRI property = (IRI) propertyOpt.get(); - RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); - statement.setObject(newSubject); - statement.setSubject((Resource) currentObject); - incompleteStatementSet.add(statement); + this.getModel().add(currentObject, property, newSubject); } } } @@ -302,6 +296,7 @@ private void processElement(Element element, RDFaEvaluationContext context, bool currentObjectLiteral = this.getValueFactory().createLiteral(value); } + logger.debug("Adding {} {} {} {}", newSubject.stringValue(), property.stringValue(), currentObjectLiteral.getLabel(), currentObjectLiteral.getDatatype().stringValue()); this.getModel().add(newSubject, property, currentObjectLiteral); } } @@ -358,14 +353,6 @@ private void processElement(Element element, RDFaEvaluationContext context, Reso public void parse(Reader reader, String baseURI) { } - private Statement incompleteStatementToStatement(RDFaIncompleteStatement incompleteStatement) { - Objects.requireNonNull(incompleteStatement.getSubject(), "Null subject, IncompleteStatement can only be converted if all its component are non-null."); - Objects.requireNonNull(incompleteStatement.getPredicate(), "Null predicate, IncompleteStatement can only be converted if all its component are non-null."); - Objects.requireNonNull(incompleteStatement.getObject(), "Null object, IncompleteStatement can only be converted if all its component are non-null."); - - return this.getValueFactory().createStatement(incompleteStatement.getSubject(), incompleteStatement.getPredicate(), incompleteStatement.getObject()); - } - /** * Resolves the string representation of a resource found in attributes of an element, be it an IRI, CURIE or relative URI * @@ -374,7 +361,6 @@ private Statement incompleteStatementToStatement(RDFaIncompleteStatement incompl * @return the full IRI if it is a relative IRI, full IRI or CURIE, nothing otherwise */ private Optional resolveStringResource(String stringResource, RDFaEvaluationContext context) { - logger.debug("Resolution of resource {}, {}", stringResource, context); String resultString = stringResource; if (resultString.startsWith("[") && resultString.endsWith("]")) { resultString = resultString.replaceFirst("\\[", ""); @@ -386,7 +372,6 @@ private Optional resolveStringResource(String stringResource, RDFaEval int colonIndex = resultString.indexOf(":"); String prefixString = resultString.substring(0, colonIndex); String localNameString = resultString.substring(colonIndex + 1); - logger.debug("CURIE with prefix: {} and local name: {}", prefixString, localNameString); // Basic resolution following https://www.w3.org/TR/rdfa-syntax/#s_convertingcurietouri if (context.hasUriMapping(prefixString)) { IRI namespaceIRI = context.uriMapping(prefixString); @@ -398,13 +383,11 @@ private Optional resolveStringResource(String stringResource, RDFaEval throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix"); } } else if (IRIUtils.isStandardIRI(resultString)) { // Full IRI - logger.debug("Standard IRI: {}", resultString); return Optional.of(this.getValueFactory().createIRI(resultString)); } else if (resultString.startsWith("_:")) { // Blank Node int colonIndex = resultString.indexOf(":"); String localNameString = resultString.substring(colonIndex + 1); - logger.debug("Blank Node: _:{}", localNameString); return Optional.of(this.getValueFactory().createBNode(localNameString)); } return Optional.empty(); diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java index 005e58dce..bd242a0eb 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -3,20 +3,28 @@ import fr.inria.corese.core.next.api.*; import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.common.vocabulary.XSD; import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.io.serialization.DefaultSerializerFactory; +import fr.inria.corese.core.next.impl.io.serialization.ntriples.NTriplesSerializerOptions; import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; +import java.io.StringWriter; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; public class RDFaParserTest { + private static final Logger logger = LoggerFactory.getLogger(RDFaParserTest.class); + private static final ValueFactory factory = new CoreseAdaptedValueFactory(); @Test @@ -95,6 +103,7 @@ public void basicChainTest() { """; Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); @@ -115,11 +124,19 @@ public void basicChainTest() { Statement aeBirthPlaceStatement = factory.createStatement(albertEinstein, birthPlace, germany); Statement germanyNameStatement = factory.createStatement(germany, conventionalLongName, gerLongName); + referenceModel.add(aeNameStatement); + referenceModel.add(aeDateOfBirthStatement); + referenceModel.add(aeBirthPlaceStatement); + referenceModel.add(germanyNameStatement); + + DefaultSerializerFactory serializerFactory = new DefaultSerializerFactory(); + RDFSerializer serializer = serializerFactory.createSerializer(RDFFormat.NTRIPLES, testModel, new NTriplesSerializerOptions.Builder().build()); + StringWriter debugWriter = new StringWriter(); + serializer.write(debugWriter); + logger.debug(debugWriter.toString()); + assertEquals(4, testModel.size()); - assertTrue(testModel.contains(aeNameStatement)); - assertTrue(testModel.contains(aeDateOfBirthStatement)); - assertTrue(testModel.contains(aeBirthPlaceStatement)); - assertTrue(testModel.contains(germanyNameStatement)); + assertEquals(referenceModel, testModel); } } From 03a6df6c65beb84acb12a7b38174c65c9f6ed489 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Fri, 14 Nov 2025 12:38:44 +0100 Subject: [PATCH 08/14] Minor fixes for litterals, model and IRIs ccomparison --- .../core/next/api/base/model/AbstractIRI.java | 5 +++++ .../next/api/base/model/AbstractModel.java | 5 +++++ .../base/model/literal/AbstractLiteral.java | 22 +++++++++++++++++++ .../next/impl/temp/literal/CoreseDecimal.java | 6 +++++ .../impl/temp/literal/CoreseDuration.java | 2 ++ .../next/impl/temp/literal/CoreseInteger.java | 6 +++++ 6 files changed, 46 insertions(+) diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java index ba4e58153..13bb6f43f 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java @@ -86,4 +86,9 @@ public int hashCode() { hash = 31 * hash + (this.localName == null ? 0 : this.localName.hashCode()); return hash; } + + @Override + public String toString() { + return this.stringValue(); + } } diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java index 34b52a249..dd16108a8 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java @@ -344,6 +344,11 @@ public boolean containsAll(Collection collection) { } } + @Override + public boolean equals(Object o) { + return o instanceof Model && this.size() == ((Model) o).size() && ((Model) o).containsAll(this); + } + @Override public boolean addAll(Collection collection) { boolean modified = false; diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/literal/AbstractLiteral.java b/src/main/java/fr/inria/corese/core/next/api/base/model/literal/AbstractLiteral.java index 350a5c30f..f1d206f83 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/literal/AbstractLiteral.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/literal/AbstractLiteral.java @@ -165,4 +165,26 @@ public TemporalAmount temporalAmountValue() { public XMLGregorianCalendar calendarValue() { throw new IncorrectOperationException("Cannot convert to XML calendar"); } + + /** + * Check if two temporal literals are equal. + * @param obj the object to compare with + * @return true if compareTo returns 0, false otherwise + */ + @Override + public boolean equals(Object obj) { + if(obj == this) { + return true; + } + if(!(obj instanceof Literal)) { + return false; + } + + return ((Literal) obj).getLabel().equals(this.getLabel()) && ((Literal) obj).getDatatype().equals(this.datatype); + } + + @Override + public String toString() { + return this.stringValue(); + } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDecimal.java b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDecimal.java index 5ccbad9c4..06968d08c 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDecimal.java +++ b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDecimal.java @@ -2,6 +2,7 @@ import fr.inria.corese.core.next.api.IRI; import fr.inria.corese.core.next.api.base.model.literal.AbstractLiteral; +import fr.inria.corese.core.next.api.base.model.literal.AbstractNumber; import fr.inria.corese.core.next.impl.common.literal.XSD; import fr.inria.corese.core.next.api.literal.CoreDatatype; import fr.inria.corese.core.next.impl.exception.IncorrectDatatypeException; @@ -156,4 +157,9 @@ public BigInteger integerValue() { public BigDecimal decimalValue() { return BigDecimal.valueOf(this.doubleValue()); } + + @Override + public int compareTo(AbstractNumber abstractNumber) { + return (int) (this.doubleValue() - abstractNumber.doubleValue()); + } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDuration.java b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDuration.java index cad28d27b..f19213b47 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDuration.java +++ b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDuration.java @@ -107,6 +107,8 @@ public CoreDatatype getCoreDatatype() { public boolean equals(Object obj) { if (obj instanceof CoreseDuration) { return this.coreseObject.equals(((CoreseDuration) obj).coreseObject); + } else if (obj instanceof AbstractDuration) { + return super.equals(obj); } return false; } diff --git a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseInteger.java b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseInteger.java index a63868c27..c108d9e39 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseInteger.java +++ b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseInteger.java @@ -2,6 +2,7 @@ import fr.inria.corese.core.next.api.IRI; import fr.inria.corese.core.next.api.base.model.literal.AbstractLiteral; +import fr.inria.corese.core.next.api.base.model.literal.AbstractNumber; import fr.inria.corese.core.next.impl.common.literal.XSD; import fr.inria.corese.core.next.api.literal.CoreDatatype; import fr.inria.corese.core.next.impl.exception.IncorrectDatatypeException; @@ -154,4 +155,9 @@ public BigInteger integerValue() { public BigDecimal decimalValue() { return BigDecimal.valueOf(this.coreseObject.longValue()); } + + @Override + public int compareTo(AbstractNumber abstractNumber) { + return Math.toIntExact(this.longValue() - abstractNumber.longValue()); + } } From 9859322402fbf39fcbc2d518b37ea7e34d1ca2c9 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Fri, 14 Nov 2025 14:43:10 +0100 Subject: [PATCH 09/14] Fixing base declaration --- .../corese/core/next/impl/io/parser/rdfa/RDFaParser.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index 43d29560c..4a88c7d0a 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -351,6 +351,8 @@ private void processElement(Element element, RDFaEvaluationContext context, Reso @Override public void parse(Reader reader, String baseURI) { + InputStream inputStream = new ReaderInputStream(reader, StandardCharsets.UTF_8); + parse(inputStream , baseURI); } /** @@ -389,6 +391,9 @@ private Optional resolveStringResource(String stringResource, RDFaEval int colonIndex = resultString.indexOf(":"); String localNameString = resultString.substring(colonIndex + 1); return Optional.of(this.getValueFactory().createBNode(localNameString)); + } else if (IRIUtils.isStandardIRI(context.baseIri().stringValue() + resultString)) { + String concatenatedRelativeUri = context.baseIri().stringValue() + resultString; + return Optional.of(getValueFactory().createIRI(concatenatedRelativeUri)); } return Optional.empty(); } From 36efed706cd444d9016522f80084be49947232b5 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Mon, 17 Nov 2025 14:14:20 +0100 Subject: [PATCH 10/14] fixing corese literal generation --- .../core/next/api/base/model/AbstractIRI.java | 8 +++----- .../temp/CoreseAdaptedValueFactoryTest.java | 20 ++++++++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java index 13bb6f43f..c5b519008 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java @@ -4,12 +4,15 @@ import fr.inria.corese.core.next.impl.common.util.IRIUtils; import fr.inria.corese.core.next.impl.exception.IncorrectFormatException; +import java.io.Serial; + /** * Base class for IRI implementations. Includes base functionality for IRI * handling. */ public abstract class AbstractIRI implements IRI, Comparable { + @Serial private static final long serialVersionUID = -1005683238501772511L; private final String namespace; @@ -44,11 +47,6 @@ protected AbstractIRI(String namespace, String localName) { this.localName = localName; } - @Override - public boolean isIRI() { - return true; - } - @Override public String getNamespace() { return this.namespace; diff --git a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseAdaptedValueFactoryTest.java b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseAdaptedValueFactoryTest.java index 9e249cae9..627c351af 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseAdaptedValueFactoryTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseAdaptedValueFactoryTest.java @@ -1,12 +1,8 @@ package fr.inria.corese.core.next.impl.temp; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - import java.time.Duration; +import fr.inria.corese.core.next.impl.temp.literal.CoreseDate; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -19,6 +15,8 @@ import fr.inria.corese.core.next.impl.temp.literal.CoreseLanguageTaggedStringLiteral; import fr.inria.corese.core.next.impl.temp.literal.CoreseTyped; +import static org.junit.jupiter.api.Assertions.*; + public class CoreseAdaptedValueFactoryTest extends ValueFactoryTest { private String stringTestValue; @@ -135,4 +133,16 @@ public void testCreateFOAFURI() { assertNotNull(foaf); assertEquals("http://xmlns.com/foaf/0.1/", foaf.stringValue()); } + + @Test + public void testDateCreation() { + IRI xsdDate = valueFactory.createIRI("http://www.w3.org/2001/XMLSchema#date"); + String literalStringValue = "2025-11-20"; + Literal date = valueFactory.createLiteral(literalStringValue, xsdDate); + + assertNotNull(date); + assertEquals(fr.inria.corese.core.next.impl.common.vocabulary.XSD.xsdDate.getIRI().stringValue(), date.getDatatype().stringValue()); + assertEquals(literalStringValue, date.getLabel()); + assertInstanceOf(fr.inria.corese.core.sparql.datatype.CoreseDate.class, ((CoreseNodeAdapter) date).getCoreseNode()); + } } From 5bec1c3bf4b5d56e982238610dbe00c2e926e749 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Mon, 17 Nov 2025 14:14:59 +0100 Subject: [PATCH 11/14] fixing inheritance --- .../core/next/impl/temp/literal/AbstractCoreseNumber.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/AbstractCoreseNumber.java b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/AbstractCoreseNumber.java index 14eef1d00..bf1d26be4 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/AbstractCoreseNumber.java +++ b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/AbstractCoreseNumber.java @@ -3,6 +3,7 @@ import fr.inria.corese.core.kgram.api.core.Node; import fr.inria.corese.core.next.api.IRI; import fr.inria.corese.core.next.api.base.model.literal.AbstractLiteral; +import fr.inria.corese.core.next.api.base.model.literal.AbstractNumber; import fr.inria.corese.core.sparql.api.IDatatype; import fr.inria.corese.core.sparql.datatype.CoreseNumber; @@ -12,7 +13,7 @@ /** * Super class for all the numeric based literal in the XSD datatype hierarchy. */ -public abstract class AbstractCoreseNumber extends AbstractLiteral implements CoreseDatatypeAdapter { +public abstract class AbstractCoreseNumber extends AbstractNumber implements CoreseDatatypeAdapter { protected final CoreseNumber coreseObject; @@ -77,8 +78,8 @@ public String stringValue() { @Override public boolean equals(Object o) { if (this == o) return true; - if (!(o instanceof AbstractCoreseNumber)) return false; - AbstractCoreseNumber that = (AbstractCoreseNumber) o; + if (!(o instanceof AbstractNumber)) return false; + if (!(o instanceof AbstractCoreseNumber that)) return super.equals(o); return this.coreseObject.equals(that.coreseObject); } From bf6500f6897c8d44af52870441eb5a5d73b8bd89 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Mon, 17 Nov 2025 14:22:32 +0100 Subject: [PATCH 12/14] RDFa parser finalized --- .../next/api/base/model/AbstractModel.java | 3 +- .../next/impl/io/parser/rdfa/RDFaParser.java | 51 +++--- .../core/next/impl/temp/CoreseModel.java | 1 - .../impl/io/parser/rdfa/RDFaParserTest.java | 168 ++++++++++++++---- 4 files changed, 161 insertions(+), 62 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java index dd16108a8..123e2884f 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java @@ -334,7 +334,8 @@ public boolean containsAll(Collection collection) { Iterator iterator = collection.iterator(); try { while (iterator.hasNext()) { - if (!contains(iterator.next())) { + Object currentObject = iterator.next(); + if (! (currentObject instanceof Statement) && ! this.contains(currentObject)) { return false; } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java index 4a88c7d0a..9b275cdd3 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -4,26 +4,29 @@ import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.api.io.common.BaseIRIOptions; import fr.inria.corese.core.next.impl.common.util.IRIUtils; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.exception.ParsingErrorException; import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; +import org.apache.commons.io.input.ReaderInputStream; import org.jsoup.Jsoup; import org.jsoup.nodes.Attribute; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.InputStream; +import java.io.InputStreamReader; import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.*; +/** + * RDFa parser. This parser will load the RDF data stored as RDFa in an HTML page. Its inner implementation is based on the jsoup library. It loads the html page as DOM and process it following the recommended algorithm in the RDFa recommendation. + */ public class RDFaParser extends AbstractRDFParser { - private static final Logger logger = LoggerFactory.getLogger(RDFaParser.class); - private static final String REL_ATTR = "rel"; private static final String REV_ATTR = "rev"; private static final String CONTENT_ATTR = "content"; @@ -51,6 +54,16 @@ public RDFFormat getRDFFormat() { return RDFFormat.RDFa; } + @Override + public void parse(InputStream in) { + if(getConfig() instanceof BaseIRIOptions baseIRIOptions) { + String baseIRI = baseIRIOptions.getBaseIRI(); + parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseIRI); + } else { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), null); + } + } + @Override public void parse(InputStream in, String baseURIString) { try { @@ -90,7 +103,7 @@ private void processDocument(Document document, IRI baseIri) { } for (Element element : document.children()) { - processElement(element, new RDFaEvaluationContext(baseIri), baseIri); + processElement(element, new RDFaEvaluationContext(baseIri)); } } @@ -103,7 +116,6 @@ private void processDocument(Document document, IRI baseIri) { * @see RDFa processing in details */ private void processElement(Element element, RDFaEvaluationContext context, boolean recursive, boolean skipElement) { - logger.debug("processElement({}, {}, ...)", element, context); // 1. First, the local values are initialized Resource newSubject = null; @@ -122,7 +134,6 @@ private void processElement(Element element, RDFaEvaluationContext context, bool if (attribute.getKey().startsWith(XMLNS_PREFIX)) { String prefixName = attribute.localName(); IRI prefixNamespace = getValueFactory().createIRI(attribute.getValue(), ""); - logger.debug("Mapping: {} = {}", prefixName, prefixNamespace.stringValue()); context.addUriMapping(prefixName, prefixNamespace); } } @@ -140,25 +151,21 @@ private void processElement(Element element, RDFaEvaluationContext context, bool Optional newSubjectResource = getResourceFromElementAttribute(element, ABOUT_ATTR, context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); - logger.debug("@about found: {}", newSubjectResource.get().stringValue()); } } else if (element.attribute(SRC_ATTR) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. Optional newSubjectResource = getResourceFromElementAttribute(element, SRC_ATTR, context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); - logger.debug("@src found: {}", newSubjectResource.get().stringValue()); } } else if (element.attribute(RESOURCE_ATTR) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; Optional newSubjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); - logger.debug("@resource found: {}", newSubjectResource.get().stringValue()); } } else if (element.attribute(HREF_ATTR) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. Optional newSubjectResource = getResourceFromElementAttribute(element, HREF_ATTR, context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); - logger.debug("href found: {}", newSubjectResource.get()); } } else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above; newSubject = context.baseIri(); @@ -176,13 +183,11 @@ private void processElement(Element element, RDFaEvaluationContext context, bool Optional newSubjectResource = getResourceFromElementAttribute(element, ABOUT_ATTR, context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); - logger.debug("@about found: {}", newSubjectResource.get()); } } else if (element.attribute(SRC_ATTR) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. Optional newSubjectResource = getResourceFromElementAttribute(element, SRC_ATTR, context); if (newSubjectResource.isPresent()) { newSubject = newSubjectResource.get(); - logger.debug("@src found: {}", newSubjectResource.get()); } } else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above; newSubject = context.baseIri(); @@ -197,22 +202,15 @@ private void processElement(Element element, RDFaEvaluationContext context, bool Optional newObjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); if (newObjectResource.isPresent()) { currentObject = newObjectResource.get(); - logger.debug("@resource found: {}", newObjectResource.get().stringValue()); } } else if (element.attribute(HREF_ATTR) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. Optional newObjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); if (newObjectResource.isPresent()) { currentObject = newObjectResource.get(); - logger.debug("href found: {}", newObjectResource.get().stringValue()); } } } - if (newSubject != null) - logger.debug("New subject resolved to {}", newSubject.stringValue()); - if(currentObject != null) - logger.debug("Current object resolved to {}", currentObject.stringValue()); - // 6. If in any of the previous steps a [new subject] was set to a non-null value, it is now used to provide a subject for type values; if(newSubject != null) { if(element.attribute(TYPEOF_ATTR) != null) { // One or more 'types' for the [new subject] can be set by using @typeof. If present, the attribute must contain one or more URIs, obtained according to the section on URI and CURIE Processing, each of which is used to generate a triple as follows: @@ -271,7 +269,6 @@ private void processElement(Element element, RDFaEvaluationContext context, bool Optional propertyOpt = getResourceFromElementAttribute(element, PROPERTY_ATTR, context); if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { IRI property = (IRI)propertyOpt.get(); - logger.debug("Property found: {}", property.stringValue()); IRI datatype = null; if(element.attribute(DATATYPE_ATTR) != null && ! element.attr(DATATYPE_ATTR).isEmpty()) { @@ -285,18 +282,14 @@ private void processElement(Element element, RDFaEvaluationContext context, bool value = element.attr(CONTENT_ATTR); } if(datatype != null) { - logger.debug("Literal value: {}, datatype: {}", value, datatype.stringValue()); currentObjectLiteral = this.getValueFactory().createLiteral(value, datatype); recursive = false; } else if(language != null) { - logger.debug("Literal value: {}, language: {}", value, language); currentObjectLiteral = this.getValueFactory().createLiteral(value, language); } else { - logger.debug("Literal value: {}", value); currentObjectLiteral = this.getValueFactory().createLiteral(value); } - logger.debug("Adding {} {} {} {}", newSubject.stringValue(), property.stringValue(), currentObjectLiteral.getLabel(), currentObjectLiteral.getDatatype().stringValue()); this.getModel().add(newSubject, property, currentObjectLiteral); } } @@ -341,11 +334,10 @@ private void processElement(Element element, RDFaEvaluationContext context, bool /** * Surcharge function that initialize the flags and subject and objet to their initial values for processing * - * @param element - * @param context - * @param newSubject + * @param element HTML element + * @param context current evaluation context */ - private void processElement(Element element, RDFaEvaluationContext context, Resource newSubject) { + private void processElement(Element element, RDFaEvaluationContext context) { processElement(element, context, true, false); } @@ -413,7 +405,6 @@ private Optional getResourceFromElementAttribute(Element element, Stri if (element.attribute(attributeName) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; String newSubjectString = element.attr(attributeName); return resolveStringResource(newSubjectString, context); - } return Optional.empty(); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/temp/CoreseModel.java b/src/main/java/fr/inria/corese/core/next/impl/temp/CoreseModel.java index e91c7c009..def34c0e7 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/temp/CoreseModel.java +++ b/src/main/java/fr/inria/corese/core/next/impl/temp/CoreseModel.java @@ -148,7 +148,6 @@ public boolean add(Resource subject, IRI predicate, Value object, Resource... co @Override public boolean contains(Resource subject, IRI predicate, Value object, Resource... contexts) { - Node subjectNode = converter.toCoreseNode(subject); Node predicateNode = converter.toCoreseNode(predicate); Node objectNode = converter.toCoreseNode(object); diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java index bd242a0eb..2458067d9 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -3,32 +3,25 @@ import fr.inria.corese.core.next.api.*; import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.io.parser.RDFParser; -import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; import fr.inria.corese.core.next.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.impl.common.vocabulary.XSD; import fr.inria.corese.core.next.impl.io.parser.ParserFactory; -import fr.inria.corese.core.next.impl.io.serialization.DefaultSerializerFactory; -import fr.inria.corese.core.next.impl.io.serialization.ntriples.NTriplesSerializerOptions; import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import fr.inria.corese.core.next.impl.temp.CoreseModel; import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; -import java.io.StringWriter; +import java.util.Iterator; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; public class RDFaParserTest { - private static final Logger logger = LoggerFactory.getLogger(RDFaParserTest.class); - private static final ValueFactory factory = new CoreseAdaptedValueFactory(); @Test - public void basicDocTest() { + public void basicBaseTest() { String testDataString = """ @@ -44,17 +37,31 @@ public void basicDocTest() { """; Model testModel = new CoreseModel(); - - RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); - - parser.parse(new ByteArrayInputStream(testDataString.getBytes())); + Model referenceModel = new CoreseModel(); IRI subject = factory.createIRI("http://www.w3.org/2006/07/SWD/RDFa/testsuite/xhtml1-testcases/photo1.jpg"); IRI predicate = factory.createIRI("http://purl.org/dc/elements/1.1/creator"); Literal object = factory.createLiteral("Mark Birbeck"); + referenceModel.add(subject, predicate, object); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes())); + + assertEquals(RDFFormat.RDFa, parser.getRDFFormat()); + assertEquals(referenceModel.size(), testModel.size()); + Iterator itStatementRef = referenceModel.iterator(); + Iterator itStatementTest = testModel.iterator(); + while(itStatementRef.hasNext() && itStatementTest.hasNext()) { + Statement statementRef = itStatementRef.next(); + Statement statementTest = itStatementTest.next(); + assertEquals(statementRef.getSubject(), statementTest.getSubject()); + assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); + assertEquals(statementRef.getObject(), statementTest.getObject()); + assertEquals(statementRef.getContext(), statementTest.getContext()); + } assertTrue(testModel.contains(subject, predicate, object)); - assertEquals(1, testModel.size()); } @Test @@ -84,7 +91,42 @@ public void aboutTest() { } @Test - public void basicChainTest() { + public void basicIRItoIRITest() { + String testDataString = """ + + + + + +
+
+
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI birthPlace = factory.createIRI("http://dbpedia.org/property/birthPlace"); + IRI germany = factory.createIRI("http://dbpedia.org/resource/Germany"); + + Statement aeBirthPlaceStatement = factory.createStatement(albertEinstein, birthPlace, germany); + + referenceModel.add(aeBirthPlaceStatement); + + assertEquals(1, testModel.size()); + assertEquals(referenceModel, testModel); + assertTrue(referenceModel.containsAll(testModel)); + } + + @Test + public void basicIRItoStringTest() { String testDataString = """ @@ -93,7 +135,86 @@ public void basicChainTest() {
Albert Einstein +
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI foafName = factory.createIRI("http://xmlns.com/foaf/0.1/name"); + Literal aeName = factory.createLiteral("Albert Einstein"); + + Statement aeNameStatement = factory.createStatement(albertEinstein, foafName, aeName); + + referenceModel.add(aeNameStatement); + + assertEquals(1, testModel.size()); + assertEquals(referenceModel, testModel); + assertTrue(referenceModel.containsAll(testModel)); + + } + + @Test + public void basicIRItoTypedLiteralTest() { + String testDataString = """ + + + + + +
1879-03-14 +
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI dateOfBirth = factory.createIRI("http://dbpedia.org/property/dateOfBirth"); + Literal aeDateOfBirth = factory.createLiteral("1879-03-14", XSD.xsdDate.getIRI()); + + Statement aeDateOfBirthStatement = factory.createStatement(albertEinstein, dateOfBirth, aeDateOfBirth); + + referenceModel.add(aeDateOfBirthStatement); + + assertEquals(1, testModel.size()); + assertEquals(referenceModel.size(), testModel.size()); + Iterator itStatementRef = referenceModel.iterator(); + Iterator itStatementTest = testModel.iterator(); + while(itStatementRef.hasNext() && itStatementTest.hasNext()) { + Statement statementRef = itStatementRef.next(); + Statement statementTest = itStatementTest.next(); + assertEquals(statementRef.getSubject(), statementTest.getSubject()); + assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); + assertEquals(statementRef.getObject(), statementTest.getObject()); + assertEquals(statementRef.getContext(), statementTest.getContext()); + } + assertTrue(referenceModel.containsAll(testModel)); + } + + @Test + public void basicChainTest() { + String testDataString = """ + + + + + +
Federal Republic of Germany @@ -110,33 +231,20 @@ public void basicChainTest() { parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); - IRI dateOfBirth = factory.createIRI("http://dbpedia.org/property/dateOfBirth"); - IRI foafName = factory.createIRI("http://xmlns.com/foaf/0.1/name"); IRI birthPlace = factory.createIRI("http://dbpedia.org/property/birthPlace"); IRI germany = factory.createIRI("http://dbpedia.org/resource/Germany"); IRI conventionalLongName = factory.createIRI("http://dbpedia.org/property/conventionalLongName"); - Literal aeName = factory.createLiteral("Albert Einstein"); - Literal aeDateOfBirth = factory.createLiteral("1879-03-14", XSD.xsdDate.getIRI()); Literal gerLongName = factory.createLiteral("Federal Republic of Germany"); - Statement aeNameStatement = factory.createStatement(albertEinstein, foafName, aeName); - Statement aeDateOfBirthStatement = factory.createStatement(albertEinstein, dateOfBirth, aeDateOfBirth); Statement aeBirthPlaceStatement = factory.createStatement(albertEinstein, birthPlace, germany); Statement germanyNameStatement = factory.createStatement(germany, conventionalLongName, gerLongName); - referenceModel.add(aeNameStatement); - referenceModel.add(aeDateOfBirthStatement); referenceModel.add(aeBirthPlaceStatement); referenceModel.add(germanyNameStatement); - DefaultSerializerFactory serializerFactory = new DefaultSerializerFactory(); - RDFSerializer serializer = serializerFactory.createSerializer(RDFFormat.NTRIPLES, testModel, new NTriplesSerializerOptions.Builder().build()); - StringWriter debugWriter = new StringWriter(); - serializer.write(debugWriter); - logger.debug(debugWriter.toString()); - - assertEquals(4, testModel.size()); + assertEquals(2, testModel.size()); assertEquals(referenceModel, testModel); + assertTrue(referenceModel.containsAll(testModel)); } } From 5ad95076699c8c6dadd17be6f25046bd2a8c7fe1 Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Mon, 17 Nov 2025 14:28:03 +0100 Subject: [PATCH 13/14] cleaning --- .../fr/inria/corese/core/next/impl/common/util/IRIUtils.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java index 94cd4ba56..f7d851b75 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java @@ -1,8 +1,5 @@ package fr.inria.corese.core.next.impl.common.util; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.net.URI; import java.net.URISyntaxException; import java.util.Set; @@ -17,8 +14,6 @@ */ public class IRIUtils { - private static final Logger logger = LoggerFactory.getLogger(IRIUtils.class); - private static final Pattern IRI_PATTERN = Pattern.compile("^(?(?[\\w\\-]+):(?\\/\\/)?(?([\\w\\-_:@]+\\.)*[\\w\\-_:]*))((?\\/([\\w\\-\\._\\:]+\\/)*)(?[\\w\\-\\._\\:]+)?(?\\?[\\w\\-_\\:\\?\\=]+)?(?(\\#))?(?([\\w\\-_]+))?)?$"); private static final Pattern STANDARD_IRI_PATTERN = Pattern.compile("^(([^:/?#\\s]+):)(\\/\\/([^/?#\\s]*))?([^?#\\s]*)(\\?([^#\\s]*))?(#(.*))?"); private static final int MAX_IRI_LENGTH = 2048; From da7b123ba3851781d38d1af519b2def2a23d435c Mon Sep 17 00:00:00 2001 From: Pierre Maillot Date: Mon, 17 Nov 2025 16:45:09 +0100 Subject: [PATCH 14/14] fix file format --- .../java/fr/inria/corese/core/next/api/base/io/RDFFormat.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java index 0b1568e3d..14970afa1 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java @@ -67,7 +67,7 @@ public class RDFFormat extends FileFormat { public static final RDFFormat RDFa = new RDFFormat( "RDFa", - List.of("html"), + List.of("html", "xhtml"), List.of("text/html", "application/xhtml+xml"), true, false);