diff --git a/build.gradle.kts b/build.gradle.kts index 93fdfb7c1..a7a028f5f 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -139,6 +139,9 @@ dependencies { implementation("com.typesafe.akka:akka-stream_2.13:2.6.20") // Akka Streams for reactive streams processing implementation("com.lightbend.akka:akka-stream-alpakka-xml_2.13:3.0.4") // Alpakka XML for XML processing with Akka Streams + // HTML parsing for RDFa + implementation("org.jsoup:jsoup:1.21.2") + // === Utilities === implementation("org.apache.commons:commons-text:1.13.1") // Text manipulation utilities (Commons Text) implementation("org.json:json:20250517") // JSON processing @@ -150,6 +153,9 @@ dependencies { testRuntimeOnly("org.junit.platform:junit-platform-launcher:1.13.2") // JUnit platform launcher (runtime) testImplementation("org.mockito:mockito-core:5.18.0") // Mockito core for mocking in tests testImplementation("org.mockito:mockito-junit-jupiter:5.18.0") // Mockito integration with JUnit Jupiter + testRuntimeOnly("org.apache.logging.log4j:log4j-core:2.25.0") // Log4j2 core for internal logging + testRuntimeOnly("org.apache.logging.log4j:log4j-slf4j2-impl:2.25.0") // SLF4J binding for Log4j2 (runtime) + } ///////////////////////// diff --git a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java index e16c3a92c..14970afa1 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java @@ -23,7 +23,6 @@ public class RDFFormat extends FileFormat { true, false); - public static final RDFFormat NTRIPLES = new RDFFormat( "N-Triples", List.of("nt"), @@ -66,6 +65,13 @@ public class RDFFormat extends FileFormat { false, true); + public static final RDFFormat RDFa = new RDFFormat( + "RDFa", + List.of("html", "xhtml"), + List.of("text/html", "application/xhtml+xml"), + true, + false); + /** * Constructs a new RDF format. * @@ -152,7 +158,7 @@ public static Optional byMimeType(String mimeType) { * @return An unmodifiable List of all RdfFormat constants. */ public static List all() { - return List.of(TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG); + return List.of(TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG, RDFC_1_0, RDFa); } @Override diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java index ba4e58153..c5b519008 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java @@ -4,12 +4,15 @@ import fr.inria.corese.core.next.impl.common.util.IRIUtils; import fr.inria.corese.core.next.impl.exception.IncorrectFormatException; +import java.io.Serial; + /** * Base class for IRI implementations. Includes base functionality for IRI * handling. */ public abstract class AbstractIRI implements IRI, Comparable { + @Serial private static final long serialVersionUID = -1005683238501772511L; private final String namespace; @@ -44,11 +47,6 @@ protected AbstractIRI(String namespace, String localName) { this.localName = localName; } - @Override - public boolean isIRI() { - return true; - } - @Override public String getNamespace() { return this.namespace; @@ -86,4 +84,9 @@ public int hashCode() { hash = 31 * hash + (this.localName == null ? 0 : this.localName.hashCode()); return hash; } + + @Override + public String toString() { + return this.stringValue(); + } } diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java index 34b52a249..123e2884f 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java @@ -334,7 +334,8 @@ public boolean containsAll(Collection collection) { Iterator iterator = collection.iterator(); try { while (iterator.hasNext()) { - if (!contains(iterator.next())) { + Object currentObject = iterator.next(); + if (! (currentObject instanceof Statement) && ! this.contains(currentObject)) { return false; } } @@ -344,6 +345,11 @@ public boolean containsAll(Collection collection) { } } + @Override + public boolean equals(Object o) { + return o instanceof Model && this.size() == ((Model) o).size() && ((Model) o).containsAll(this); + } + @Override public boolean addAll(Collection collection) { boolean modified = false; diff --git a/src/main/java/fr/inria/corese/core/next/api/base/model/literal/AbstractLiteral.java b/src/main/java/fr/inria/corese/core/next/api/base/model/literal/AbstractLiteral.java index 350a5c30f..f1d206f83 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/model/literal/AbstractLiteral.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/model/literal/AbstractLiteral.java @@ -165,4 +165,26 @@ public TemporalAmount temporalAmountValue() { public XMLGregorianCalendar calendarValue() { throw new IncorrectOperationException("Cannot convert to XML calendar"); } + + /** + * Check if two temporal literals are equal. + * @param obj the object to compare with + * @return true if compareTo returns 0, false otherwise + */ + @Override + public boolean equals(Object obj) { + if(obj == this) { + return true; + } + if(!(obj instanceof Literal)) { + return false; + } + + return ((Literal) obj).getLabel().equals(this.getLabel()) && ((Literal) obj).getDatatype().equals(this.datatype); + } + + @Override + public String toString() { + return this.stringValue(); + } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java index 36c427849..f7d851b75 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java @@ -14,7 +14,7 @@ */ public class IRIUtils { - private static final Pattern IRI_PATTERN = Pattern.compile("^(?(?[\\w\\-]+):(?\\/\\/)?(?([\\w\\-_:@]+\\.)*[\\w\\-_:]*))((?\\/([\\w\\-\\._\\:]+\\/)*)(?[\\w\\-\\._\\:]+)?(?\\?[\\w\\-_\\:\\?\\=]+)?(\\#)?(?([\\w\\-_]+))?)?$"); + private static final Pattern IRI_PATTERN = Pattern.compile("^(?(?[\\w\\-]+):(?\\/\\/)?(?([\\w\\-_:@]+\\.)*[\\w\\-_:]*))((?\\/([\\w\\-\\._\\:]+\\/)*)(?[\\w\\-\\._\\:]+)?(?\\?[\\w\\-_\\:\\?\\=]+)?(?(\\#))?(?([\\w\\-_]+))?)?$"); private static final Pattern STANDARD_IRI_PATTERN = Pattern.compile("^(([^:/?#\\s]+):)(\\/\\/([^/?#\\s]*))?([^?#\\s]*)(\\?([^#\\s]*))?(#(.*))?"); private static final int MAX_IRI_LENGTH = 2048; private static final long REGEX_TIMEOUT_MS = 100; @@ -52,9 +52,10 @@ public static String guessNamespace(String iri) { if(matcher.group("path") != null) { namespace.append(matcher.group("path")); } - if(matcher.group("fragment") != null && matcher.group("finalPath") != null) { + if((matcher.group("fragment") != null || matcher.group("anchor") != null) && matcher.group("finalPath") != null) { namespace.append(matcher.group("finalPath")).append("#"); } + return namespace.toString(); } else { throw new IllegalStateException("No namespace found for the given IRI: " + iri + "."); diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java index 6057b2026..29f5b11fb 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java @@ -9,6 +9,7 @@ import fr.inria.corese.core.next.impl.io.parser.jsonld.JSONLDParser; import fr.inria.corese.core.next.impl.io.parser.nquads.NQuadsParser; import fr.inria.corese.core.next.impl.io.parser.ntriples.NTriplesParser; +import fr.inria.corese.core.next.impl.io.parser.rdfa.RDFaParser; import fr.inria.corese.core.next.impl.io.parser.rdfxml.RDFXMLParser; import fr.inria.corese.core.next.impl.io.parser.turtle.TurtleParser; import fr.inria.corese.core.next.impl.io.parser.trig.TriGParser; @@ -52,6 +53,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new TriGParser(model, factory, config); } else if(format == RDFFormat.RDFC_1_0) { return new NQuadsParser(model, factory, config); + } else if (format == RDFFormat.RDFa) { + return new RDFaParser(model, factory, config); } throw new IllegalArgumentException("Unsupported format: " + format); } @@ -77,6 +80,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac return new RDFXMLParser(model, factory); } else if (format == RDFFormat.TRIG) { return new TriGParser(model, factory); + } else if (format == RDFFormat.RDFa) { + return new RDFaParser(model, factory); } throw new IllegalArgumentException("Unsupported format: " + format); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContext.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContext.java new file mode 100644 index 000000000..088cf6c6a --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaEvaluationContext.java @@ -0,0 +1,176 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Value; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; + +import java.util.*; + +/** + * This class is to be used during the evaluation of an HTML file to generate triples during the DOM traversal. + * @see RDFa recommandation + */ +public class RDFaEvaluationContext { + + /** + * This will usually be the URL of the document being processed, but it could be some other URL, set by some other mechanism, such as the XHTML base element. The important thing is that it establishes a URL against which relative paths can be resolved. + */ + private IRI baseIri; + + /** + * The initial value will be the same as the initial value of [base], but it will usually change during the course of processing. + */ + private Resource parentSubjectResource ; + + /** + * In some situations the object of a statement becomes the subject of any nested statements, and this property is used to convey this value. Note that this value may be a bnode, since in some situations a number of nested statements are grouped together on one bnode. This means that the bnode must be set in the containing statement and passed down, and this property is used to convey this value. + */ + private Resource parentObjectResource = null; + + /** + * An index of locally defined IRI prefixes + */ + private Map uriMappings = new HashMap<>(); + + /** + * Set of statement in the process of building. + */ + private Set incompleteStatement = new HashSet<>(); + + /** + * The language of the document. Note that there is no default language. + */ + private String language = null; + + public RDFaEvaluationContext(IRI baseIri) { + this.baseIri = baseIri; + this.parentSubjectResource = baseIri; + } + + public RDFaEvaluationContext(IRI baseIri, IRI parentSubjectResource) { + this.baseIri = baseIri; + this.parentSubjectResource = parentSubjectResource; + } + + public RDFaEvaluationContext(RDFaEvaluationContext context) { + this.baseIri = context.baseIri; + this.parentSubjectResource = context.parentSubjectResource; + this.parentObjectResource = context.parentObjectResource; + this.uriMappings = new HashMap<>(context.uriMappings); + this.incompleteStatement = new HashSet<>(context.incompleteStatement); + this.language = context.language; + } + + public IRI baseIri() { + return baseIri; + } + + public RDFaEvaluationContext baseIri(IRI baseIri) { + this.baseIri = baseIri; + return this; + } + + public RDFaEvaluationContext incompleteStatements(Set incompleteStatement) { + this.incompleteStatement = new HashSet<>(incompleteStatement); + return this; + } + + public Iterator getIncompleteStatementIterator() { + return this.incompleteStatement.iterator(); + } + + public RDFaEvaluationContext addStatementWithoutSubject(IRI property, Value object) { + RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); + newStatement.setObject(object); + this.incompleteStatement.add(newStatement); + return this; + } + + public RDFaEvaluationContext addStatementWithoutObject(Resource subject, IRI property) { + RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property); + newStatement.setSubject(subject); + this.incompleteStatement.add(newStatement); + return this; + } + + public void clearIncompleteStatements() { + this.incompleteStatement.clear(); + } + + public Resource parentSubjectResource() { + return parentSubjectResource; + } + + public RDFaEvaluationContext parentSubjectResource(Resource parentSubjectResource) { + this.parentSubjectResource = parentSubjectResource; + return this; + } + + public Resource parentObjectResource() { + return parentObjectResource; + } + + public RDFaEvaluationContext parentObjectResource(Resource parentObjectResource) { + this.parentObjectResource = parentObjectResource; + return this; + } + + public Map uriMappings() { + return uriMappings; + } + + public RDFaEvaluationContext uriMappings(Map uriMappings) { + this.uriMappings = uriMappings; + return this; + } + + public boolean hasUriMapping(String prefix) { + return this.uriMappings.containsKey(prefix); + } + + /** + * @param prefix the prefix WITHOUT ":" + * @return the IRI associated to the prefix in this context + */ + public IRI uriMapping(String prefix) { + return this.uriMappings.get(prefix); + } + + public void addUriMapping(String prefix, IRI prefixIri) { + this.uriMappings.put(prefix, prefixIri); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append("BaseURI: ").append(this.baseIri.stringValue()).append(" "); + sb.append("Mappings: ["); + this.uriMappings.forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") ")); + sb.append("] "); + if(this.parentSubjectResource != null) { + sb.append("Subject:").append(this.parentSubjectResource.stringValue()).append(" "); + } else { + sb.append("Subject:").append((Object) null).append(" "); + } + if(this.parentObjectResource != null) { + sb.append("Object: ").append(this.parentObjectResource.stringValue()).append(" "); + } else { + sb.append("Object: ").append((Object) null).append(" "); + } + if(! this.incompleteStatement.isEmpty()) { + sb.append(this.incompleteStatement.size()).append(" incomplete statements."); + } + + return sb.toString(); + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java new file mode 100644 index 000000000..9b275cdd3 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java @@ -0,0 +1,411 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser; +import fr.inria.corese.core.next.api.io.IOOptions; +import fr.inria.corese.core.next.api.io.common.BaseIRIOptions; +import fr.inria.corese.core.next.impl.common.util.IRIUtils; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.exception.ParsingErrorException; +import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement; +import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; +import org.apache.commons.io.input.ReaderInputStream; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Attribute; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.StandardCharsets; +import java.util.*; + +/** + * RDFa parser. This parser will load the RDF data stored as RDFa in an HTML page. Its inner implementation is based on the jsoup library. It loads the html page as DOM and process it following the recommended algorithm in the RDFa recommendation. + */ +public class RDFaParser extends AbstractRDFParser { + + private static final String REL_ATTR = "rel"; + private static final String REV_ATTR = "rev"; + private static final String CONTENT_ATTR = "content"; + private static final String HREF_ATTR = "href"; + private static final String SRC_ATTR = "src"; + private static final String ABOUT_ATTR = "about"; + private static final String PROPERTY_ATTR = "property"; + private static final String RESOURCE_ATTR = "resource"; + private static final String DATATYPE_ATTR = "datatype"; + private static final String TYPEOF_ATTR = "typeof"; + private static final String LANG_ATTR = "xml:lang"; + + private static final String XMLNS_PREFIX = "xmlns"; + + public RDFaParser(Model model, ValueFactory factory) { + this(model, factory, new RDFaParserOptions.Builder().build()); + } + + public RDFaParser(Model model, ValueFactory factory, IOOptions config) { + super(model, factory, config); + } + + @Override + public RDFFormat getRDFFormat() { + return RDFFormat.RDFa; + } + + @Override + public void parse(InputStream in) { + if(getConfig() instanceof BaseIRIOptions baseIRIOptions) { + String baseIRI = baseIRIOptions.getBaseIRI(); + parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseIRI); + } else { + parse(new InputStreamReader(in, StandardCharsets.UTF_8), null); + } + } + + @Override + public void parse(InputStream in, String baseURIString) { + try { + Document document = Jsoup.parse(in, null, baseURIString); + + IRI baseIri = getValueFactory().createIRI(baseURIString); + processDocument(document, baseIri); + } catch (Exception e) { + throw new ParsingErrorException("Error during parsing of HTML document", e); + } + } + + /** + * Intermediary function to configure the processing of a document using some basic HTML traversal to determine if a baseIri has been defined in the document. + * If the baseIri in argument is the Corese default base IRI, the value stored in the document is used instead. + * + * @param document Jsoup HTML document to be processed + * @param baseIri An IRI object + */ + private void processDocument(Document document, IRI baseIri) { + // If the base Iri in argument is not the default baseIri, then we take it, else we use the one in the document + if (baseIri.stringValue().equals(ParserConstants.getDefaultBaseURI())) { + // Looking for the node in the document + IRI baseIriFromXml = baseIri; + Iterator baseElementIterator = document.stream().filter(element -> element.nameIs("base")).iterator(); + while (baseElementIterator.hasNext()) { + Element baseElement = baseElementIterator.next(); + Attribute baseElementHrefAttribute = baseElement.attribute("href"); + if (baseElementHrefAttribute != null) { + String baseIriString = baseElementHrefAttribute.getValue(); + baseIriFromXml = getValueFactory().createIRI(baseIriString); + } + } + ; + + baseIri = this.getValueFactory().createIRI(baseIriFromXml.stringValue()); + } + + for (Element element : document.children()) { + processElement(element, new RDFaEvaluationContext(baseIri)); + } + } + + /** + * + * @param element Current element + * @param context Active context + * @param recursive Processing generally continues recursively through the entire tree of elements available. However, if an author indicates that some branch of the tree should be treated as an XML literal, no further processing should take place on that branch, and setting this flag to false would have that effect. + * @param skipElement Flag thet indicates whether the [current element] can safely be ignored since it has no relevant RDFa attributes. Note that descendant elements will still be processed. + * @see RDFa processing in details + */ + private void processElement(Element element, RDFaEvaluationContext context, boolean recursive, boolean skipElement) { + + // 1. First, the local values are initialized + Resource newSubject = null; + Resource currentObject = null; + Literal currentObjectLiteral = null; + Map currentMappings = context.uriMappings(); + Set incompleteStatementSet = new HashSet<>(); + String language = context.getLanguage(); + + // 2. Next the [current element] is parsed for [URI mapping]s and these are added to the [local list of URI mappings]. Note that a [URI mapping] will simply overwrite any current mapping in the list that has the same name; + // Looking for namespace declarations + // Namespace declaration are done using the XML namespace declaration mechanism, that can be seen as an attributes prefixed by "xmlns" and looks like this: "xmlns:prefix=namespace" + Iterator itAttribute = element.attributes().iterator(); + while(itAttribute.hasNext()) { + Attribute attribute = itAttribute.next(); + if (attribute.getKey().startsWith(XMLNS_PREFIX)) { + String prefixName = attribute.localName(); + IRI prefixNamespace = getValueFactory().createIRI(attribute.getValue(), ""); + context.addUriMapping(prefixName, prefixNamespace); + } + } + + // 3. The [current element] is also parsed for any language information, and if present, [current language] is set accordingly; + if (element.attribute(LANG_ATTR) != null) { + String langString = element.attr(LANG_ATTR); + language = langString; + } + + // 4. If the [current element] contains no @rel or @rev attribute, then the next step is to establish a value for [new subject]. Any of the attributes that can carry a resource can set [new subject]; + if(element.attribute(REL_ATTR) == null && element.attribute(REV_ATTR) == null) { + // [new subject] is set to the URI obtained from the first match from the following rules: + if (element.attribute(ABOUT_ATTR) != null) { // by using the URI from @about, if present, obtained according to the section on CURIE and URI Processing; + Optional newSubjectResource = getResourceFromElementAttribute(element, ABOUT_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + } + } else if (element.attribute(SRC_ATTR) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. + Optional newSubjectResource = getResourceFromElementAttribute(element, SRC_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + } + } else if (element.attribute(RESOURCE_ATTR) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; + Optional newSubjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + } + } else if (element.attribute(HREF_ATTR) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. + Optional newSubjectResource = getResourceFromElementAttribute(element, HREF_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + } + } else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above; + newSubject = context.baseIri(); + } else if (element.attribute(TYPEOF_ATTR) != null) { // if @typeof is present, obtained according to the section on CURIE and URI Processing, then [new subject] is set to be a newly created [bnode]. + newSubject = this.getValueFactory().createBNode(); + } else if (context.parentObjectResource() != null) { // otherwise, if [parent object] is present, [new subject] is set to the value of [parent object]. Additionally, if @property is not present then the [skip element] flag is set to 'true'; + newSubject = context.parentObjectResource(); + if(element.attribute(PROPERTY_ATTR) == null) { + skipElement = true; + } + } + } else { + // [new subject] is set to the URI obtained from the first match from the following rules: + if (element.attribute(ABOUT_ATTR) != null) { // by using the URI from @about, if present, obtained according to the section on CURIE and URI Processing; + Optional newSubjectResource = getResourceFromElementAttribute(element, ABOUT_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + } + } else if (element.attribute(SRC_ATTR) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing. + Optional newSubjectResource = getResourceFromElementAttribute(element, SRC_ATTR, context); + if (newSubjectResource.isPresent()) { + newSubject = newSubjectResource.get(); + } + } else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above; + newSubject = context.baseIri(); + } else if (element.attribute(TYPEOF_ATTR) != null) { // if @typeof is present, obtained according to the section on CURIE and URI Processing, then [new subject] is set to be a newly created [bnode]. + newSubject = this.getValueFactory().createBNode(); + } else if(context.parentObjectResource() != null) { // otherwise, if [parent object] is present, [new subject] is set to that. + newSubject = context.parentObjectResource(); + } + + // Then the [current object resource] is set to the URI obtained from the first match from the following rules: + if (element.attribute(RESOURCE_ATTR) != null) { // by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; + Optional newObjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); + if (newObjectResource.isPresent()) { + currentObject = newObjectResource.get(); + } + } else if (element.attribute(HREF_ATTR) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing. + Optional newObjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context); + if (newObjectResource.isPresent()) { + currentObject = newObjectResource.get(); + } + } + } + + // 6. If in any of the previous steps a [new subject] was set to a non-null value, it is now used to provide a subject for type values; + if(newSubject != null) { + if(element.attribute(TYPEOF_ATTR) != null) { // One or more 'types' for the [new subject] can be set by using @typeof. If present, the attribute must contain one or more URIs, obtained according to the section on URI and CURIE Processing, each of which is used to generate a triple as follows: + Optional typeIri = getResourceFromElementAttribute(element, TYPEOF_ATTR, context); + if (typeIri.isPresent()) { + Statement stat = this.getValueFactory().createStatement(newSubject, RDF.type.getIRI(), typeIri.get()); + this.getModel().add(stat); + } else { + throw new ParsingErrorException("Typeof statement uses unknown type " + element.attr(TYPEOF_ATTR)); + } + } + } + + // 7. If in any of the previous steps a [current object resource] was set to a non-null value, it is now used to generate triples: + if (currentObject != null && (element.attribute(REL_ATTR) != null || element.attribute(REV_ATTR) != null)) { + if(element.attribute(REL_ATTR) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { + IRI property = (IRI) propertyOpt.get(); + this.getModel().add(newSubject, property, currentObject); + } + } + if(element.attribute(REV_ATTR) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if(propertyOpt.isPresent() && propertyOpt.get().isIRI() && currentObject.isResource()) { + IRI property = (IRI) propertyOpt.get(); + this.getModel().add(currentObject, property, newSubject); + } + } + } + + // 8. If however [current object resource] was set to null, but there are predicates present, then they must be stored as [incomplete triple]s, pending the discovery of a subject that can be used as the object. Also, [current object resource] should be set to a newly created [bnode]; + if (currentObject == null && (element.attribute(REL_ATTR) != null || element.attribute(REV_ATTR) != null)) { + currentObject = getValueFactory().createBNode(); + if(element.attribute(REL_ATTR) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { + IRI property = (IRI) propertyOpt.get(); + RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); + incompleteStatementSet.add(statement); + } + } + if(element.attribute(REV_ATTR) != null) { + Optional propertyOpt = getResourceFromElementAttribute(element, REL_ATTR, context); + if(propertyOpt.isPresent() && propertyOpt.get().isIRI() && currentObject.isResource()) { + IRI property = (IRI) propertyOpt.get(); + RDFaIncompleteStatement statement = new RDFaIncompleteStatement(property); + statement.setBackward(); + incompleteStatementSet.add(statement); + } + } + } + + // 9. The next step of the iteration is to establish any [current object literal]; + if(element.attribute(PROPERTY_ATTR) != null) { // Predicates for the [current object literal] can be set by using @property. If present, one or more URIs are obtained according to the section on CURIE and URI Processing, and then the actual literal value is obtained as follows: + Optional propertyOpt = getResourceFromElementAttribute(element, PROPERTY_ATTR, context); + if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) { + IRI property = (IRI)propertyOpt.get(); + + IRI datatype = null; + if(element.attribute(DATATYPE_ATTR) != null && ! element.attr(DATATYPE_ATTR).isEmpty()) { + Optional datatypeOpt = getResourceFromElementAttribute(element, DATATYPE_ATTR, context); + if(datatypeOpt.isPresent() && datatypeOpt.get().isIRI() && ! datatypeOpt.get().equals(RDF.XMLLiteral.getIRI())) { + datatype = (IRI) datatypeOpt.get(); + } + } + String value = element.text(); + if(element.attribute(CONTENT_ATTR) != null) { + value = element.attr(CONTENT_ATTR); + } + if(datatype != null) { + currentObjectLiteral = this.getValueFactory().createLiteral(value, datatype); + recursive = false; + } else if(language != null) { + currentObjectLiteral = this.getValueFactory().createLiteral(value, language); + } else { + currentObjectLiteral = this.getValueFactory().createLiteral(value); + } + + this.getModel().add(newSubject, property, currentObjectLiteral); + } + } + + // 10. If the [skip element] flag is 'false', and [new subject] was set to a non-null value, then any [incomplete triple]s within the current context should be completed: + Iterator itStat = context.getIncompleteStatementIterator(); + while(itStat.hasNext()) { + RDFaIncompleteStatement statement = itStat.next(); + if(statement.isForward()) { + this.getModel().add(context.parentSubjectResource(), statement.getPredicate(), newSubject); + } else if (statement.isBackward()){ + this.getModel().add(newSubject, statement.getPredicate(), context.parentSubjectResource()); + } + } + + // 11. If the [recurse] flag is 'true', all elements that are children of the [current element] are processed using the rules described here, using a new [evaluation context], + if(recursive) { + if(skipElement) { + RDFaEvaluationContext newContext = new RDFaEvaluationContext(context); + newContext.setLanguage(language); + newContext.uriMappings(currentMappings); + context = newContext; + } else { + context = new RDFaEvaluationContext(context.baseIri()); + if(newSubject != null) { + context.parentObjectResource(newSubject); + } + if(currentObject != null) { + context.parentObjectResource(currentObject); + } + context.uriMappings(currentMappings); + context.incompleteStatements(incompleteStatementSet); + context.setLanguage(language); + } + + for (Element child : element.children()) { + processElement(child, context, recursive, skipElement); + } + } + } + + /** + * Surcharge function that initialize the flags and subject and objet to their initial values for processing + * + * @param element HTML element + * @param context current evaluation context + */ + private void processElement(Element element, RDFaEvaluationContext context) { + processElement(element, context, true, false); + } + + @Override + public void parse(Reader reader, String baseURI) { + InputStream inputStream = new ReaderInputStream(reader, StandardCharsets.UTF_8); + parse(inputStream , baseURI); + } + + /** + * Resolves the string representation of a resource found in attributes of an element, be it an IRI, CURIE or relative URI + * + * @param stringResource the resource as stored in the attribute of the HTML element + * @param context the context of the element evalation + * @return the full IRI if it is a relative IRI, full IRI or CURIE, nothing otherwise + */ + private Optional resolveStringResource(String stringResource, RDFaEvaluationContext context) { + String resultString = stringResource; + if (resultString.startsWith("[") && resultString.endsWith("]")) { + resultString = resultString.replaceFirst("\\[", ""); + resultString = resultString.replaceFirst("]", ""); + } + + + if (stringUriIsCURIE(resultString)) { // CURIE + int colonIndex = resultString.indexOf(":"); + String prefixString = resultString.substring(0, colonIndex); + String localNameString = resultString.substring(colonIndex + 1); + // Basic resolution following https://www.w3.org/TR/rdfa-syntax/#s_convertingcurietouri + if (context.hasUriMapping(prefixString)) { + IRI namespaceIRI = context.uriMapping(prefixString); + + return Optional.of(this.getValueFactory().createIRI(namespaceIRI.stringValue(), localNameString)); + } else if (prefixString.isEmpty()) { // CURIE is relative to the base URI + return Optional.of(this.getValueFactory().createIRI(context.baseIri().stringValue(), localNameString)); + } else { + throw new ParsingErrorException("CURIE " + stringResource + " uses unknown prefix"); + } + } else if (IRIUtils.isStandardIRI(resultString)) { // Full IRI + return Optional.of(this.getValueFactory().createIRI(resultString)); + + } else if (resultString.startsWith("_:")) { // Blank Node + int colonIndex = resultString.indexOf(":"); + String localNameString = resultString.substring(colonIndex + 1); + return Optional.of(this.getValueFactory().createBNode(localNameString)); + } else if (IRIUtils.isStandardIRI(context.baseIri().stringValue() + resultString)) { + String concatenatedRelativeUri = context.baseIri().stringValue() + resultString; + return Optional.of(getValueFactory().createIRI(concatenatedRelativeUri)); + } + return Optional.empty(); + } + + /** + * Equivalent to test if it has a colon, and it is not a blank node + * + * @param stringIri + * @return + */ + private boolean stringUriIsCURIE(String stringIri) { + int colonIndex = stringIri.indexOf(":"); + return colonIndex > -1 && !stringIri.contains("://") && !stringIri.startsWith("_:") && !stringIri.startsWith("[_:"); + } + + private Optional getResourceFromElementAttribute(Element element, String attributeName, RDFaEvaluationContext context) { + if (element.attribute(attributeName) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing; + String newSubjectString = element.attr(attributeName); + return resolveStringResource(newSubjectString, context); + } + return Optional.empty(); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserOptions.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserOptions.java new file mode 100644 index 000000000..6ad4ae52f --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserOptions.java @@ -0,0 +1,46 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.base.io.AbstractIOOptions; +import fr.inria.corese.core.next.api.io.common.BaseIRIOptions; +import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants; + +/** + * Configuration class for the parsing of RDFa HTML documents + */ +public class RDFaParserOptions extends AbstractIOOptions implements BaseIRIOptions { + + private final RDFaParserOptions.Builder builder; + private final String baseIRI; + + protected RDFaParserOptions(RDFaParserOptions.Builder builder) { + this.builder = builder; + this.baseIRI = this.builder.baseIRI; + } + + @Override + public String getBaseIRI() { + return this.baseIRI; + } + + public static class Builder extends AbstractIOOptions.Builder { + + protected String baseIRI = ParserConstants.getDefaultBaseURI(); + + @Override + public RDFaParserOptions build() { + return new RDFaParserOptions(this); + } + + /** + * Set the base IRI used for relative IRI processing + * + * @param baseIRI An IRI + * @return this + */ + public RDFaParserOptions.Builder baseIRI(String baseIRI) { + this.baseIRI = baseIRI; + return this; + } + + } +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java new file mode 100644 index 000000000..0bc6f755d --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/model/RDFaIncompleteStatement.java @@ -0,0 +1,129 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa.model; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Value; + +/** + * This class represents triples in the process of creation during the chaining of element in an RDFa document. + */ +public class RDFaIncompleteStatement { + + private Resource subject = null; + private IRI predicate = null; + private Value object = null; + private Direction direction = Direction.FORWARD; + + public enum Direction { + FORWARD, + BACKWARD + } + + private RDFaIncompleteStatement() { + + } + + public RDFaIncompleteStatement(IRI predicate) { + this.predicate = predicate; + } + + public RDFaIncompleteStatement(IRI predicate, Direction direction) { + this.predicate = predicate; + this.direction = direction; + } + + public boolean isForward() { + return this.direction == Direction.FORWARD; + } + + public boolean isBackward() { + return this.direction == Direction.BACKWARD; + } + + public Direction getDirection() { + return this.direction; + } + + public void setForward() { + this.direction = Direction.FORWARD; + } + + public void setBackward() { + this.direction = Direction.BACKWARD; + } + + public void setDirection(Direction direction) { + this.direction = direction; + } + + public Resource getSubject() { + return subject; + } + + public void setSubject(Resource subject) { + this.subject = subject; + } + + public IRI getPredicate() { + return predicate; + } + + public void setPredicate(IRI predicate) { + this.predicate = predicate; + } + + public Value getObject() { + return object; + } + + public void setObject(Value object) { + this.object = object; + } + + public boolean hasSubject() { + return this.getSubject() != null; + } + + public boolean hasPredicate() { + return this.getPredicate() != null; + } + + public boolean hasObject() { + return this.getObject() != null; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + if(this.hasSubject()) { + sb.append(this.getSubject().toString()); + } else { + sb.append("?"); + } + sb.append(" "); + + if(this.hasPredicate()) { + sb.append(this.getPredicate().toString()); + } else { + sb.append("?"); + } + sb.append(" "); + + if(this.hasObject()) { + sb.append(this.getObject().toString()); + } else { + sb.append("?"); + } + + return sb.toString(); + } + + @Override + public int hashCode() { + int hash = 7; + hash = 31 * hash + (getSubject() == null ? 0 : getSubject().hashCode()); + hash = 31 * hash + (getPredicate() == null ? 0 : getPredicate().hashCode()); + hash = 31 * hash + (getObject() == null ? 0 : getObject().hashCode()); + return hash; + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/temp/CoreseModel.java b/src/main/java/fr/inria/corese/core/next/impl/temp/CoreseModel.java index e91c7c009..def34c0e7 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/temp/CoreseModel.java +++ b/src/main/java/fr/inria/corese/core/next/impl/temp/CoreseModel.java @@ -148,7 +148,6 @@ public boolean add(Resource subject, IRI predicate, Value object, Resource... co @Override public boolean contains(Resource subject, IRI predicate, Value object, Resource... contexts) { - Node subjectNode = converter.toCoreseNode(subject); Node predicateNode = converter.toCoreseNode(predicate); Node objectNode = converter.toCoreseNode(object); diff --git a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/AbstractCoreseNumber.java b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/AbstractCoreseNumber.java index 14eef1d00..bf1d26be4 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/AbstractCoreseNumber.java +++ b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/AbstractCoreseNumber.java @@ -3,6 +3,7 @@ import fr.inria.corese.core.kgram.api.core.Node; import fr.inria.corese.core.next.api.IRI; import fr.inria.corese.core.next.api.base.model.literal.AbstractLiteral; +import fr.inria.corese.core.next.api.base.model.literal.AbstractNumber; import fr.inria.corese.core.sparql.api.IDatatype; import fr.inria.corese.core.sparql.datatype.CoreseNumber; @@ -12,7 +13,7 @@ /** * Super class for all the numeric based literal in the XSD datatype hierarchy. */ -public abstract class AbstractCoreseNumber extends AbstractLiteral implements CoreseDatatypeAdapter { +public abstract class AbstractCoreseNumber extends AbstractNumber implements CoreseDatatypeAdapter { protected final CoreseNumber coreseObject; @@ -77,8 +78,8 @@ public String stringValue() { @Override public boolean equals(Object o) { if (this == o) return true; - if (!(o instanceof AbstractCoreseNumber)) return false; - AbstractCoreseNumber that = (AbstractCoreseNumber) o; + if (!(o instanceof AbstractNumber)) return false; + if (!(o instanceof AbstractCoreseNumber that)) return super.equals(o); return this.coreseObject.equals(that.coreseObject); } diff --git a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDecimal.java b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDecimal.java index 5ccbad9c4..06968d08c 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDecimal.java +++ b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDecimal.java @@ -2,6 +2,7 @@ import fr.inria.corese.core.next.api.IRI; import fr.inria.corese.core.next.api.base.model.literal.AbstractLiteral; +import fr.inria.corese.core.next.api.base.model.literal.AbstractNumber; import fr.inria.corese.core.next.impl.common.literal.XSD; import fr.inria.corese.core.next.api.literal.CoreDatatype; import fr.inria.corese.core.next.impl.exception.IncorrectDatatypeException; @@ -156,4 +157,9 @@ public BigInteger integerValue() { public BigDecimal decimalValue() { return BigDecimal.valueOf(this.doubleValue()); } + + @Override + public int compareTo(AbstractNumber abstractNumber) { + return (int) (this.doubleValue() - abstractNumber.doubleValue()); + } } diff --git a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDuration.java b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDuration.java index cad28d27b..f19213b47 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDuration.java +++ b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseDuration.java @@ -107,6 +107,8 @@ public CoreDatatype getCoreDatatype() { public boolean equals(Object obj) { if (obj instanceof CoreseDuration) { return this.coreseObject.equals(((CoreseDuration) obj).coreseObject); + } else if (obj instanceof AbstractDuration) { + return super.equals(obj); } return false; } diff --git a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseInteger.java b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseInteger.java index a63868c27..c108d9e39 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseInteger.java +++ b/src/main/java/fr/inria/corese/core/next/impl/temp/literal/CoreseInteger.java @@ -2,6 +2,7 @@ import fr.inria.corese.core.next.api.IRI; import fr.inria.corese.core.next.api.base.model.literal.AbstractLiteral; +import fr.inria.corese.core.next.api.base.model.literal.AbstractNumber; import fr.inria.corese.core.next.impl.common.literal.XSD; import fr.inria.corese.core.next.api.literal.CoreDatatype; import fr.inria.corese.core.next.impl.exception.IncorrectDatatypeException; @@ -154,4 +155,9 @@ public BigInteger integerValue() { public BigDecimal decimalValue() { return BigDecimal.valueOf(this.coreseObject.longValue()); } + + @Override + public int compareTo(AbstractNumber abstractNumber) { + return Math.toIntExact(this.longValue() - abstractNumber.longValue()); + } } diff --git a/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java b/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java index e5859edfa..4d4c1bfe3 100644 --- a/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java +++ b/src/test/java/fr/inria/corese/core/next/api/base/io/RDFFormatTest.java @@ -298,7 +298,7 @@ void allFormats() { List allFormats = RDFFormat.all(); assertNotNull(allFormats, "List of all formats should not be null"); - assertEquals(6, allFormats.size(), "List should contain 5 predefined formats"); // TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG + assertEquals(8, allFormats.size(), "List should contain 5 predefined formats"); // TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG assertTrue(allFormats.contains(RDFFormat.TURTLE)); assertTrue(allFormats.contains(RDFFormat.NTRIPLES)); @@ -306,6 +306,8 @@ void allFormats() { assertTrue(allFormats.contains(RDFFormat.JSONLD)); assertTrue(allFormats.contains(RDFFormat.RDFXML)); assertTrue(allFormats.contains(RDFFormat.TRIG)); + assertTrue(allFormats.contains(RDFFormat.RDFa)); + assertTrue(allFormats.contains(RDFFormat.RDFC_1_0)); assertThrows(UnsupportedOperationException.class, () -> allFormats.add(RDFFormat.TURTLE), "The list returned by all() should be unmodifiable"); diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/util/IRIUtilsTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/util/IRIUtilsTest.java index e7620f5ed..1c4aff026 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/common/util/IRIUtilsTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/common/util/IRIUtilsTest.java @@ -4,6 +4,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import fr.inria.corese.core.next.impl.common.literal.XSD; import org.junit.jupiter.api.Test; public class IRIUtilsTest { @@ -39,6 +40,7 @@ public void guessNamespaceTest() { assertEquals("https://www.syuno-pit.biz/tezukayama-bandai-2.html#", IRIUtils.guessNamespace(uriToHTMLPageWithQueryAndFragment)); assertEquals("https://www.syuno-pit.biz/tezukayama-bandai-2.html#", IRIUtils.guessNamespace(uriToHTMLPageWithFragment)); assertEquals("", IRIUtils.guessNamespace(blankNode)); + assertEquals("http://www.w3.org/2001/XMLSchema#", IRIUtils.guessNamespace("http://www.w3.org/2001/XMLSchema#")); } @Test diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java new file mode 100644 index 000000000..2458067d9 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParserTest.java @@ -0,0 +1,250 @@ +package fr.inria.corese.core.next.impl.io.parser.rdfa; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.common.vocabulary.XSD; +import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.util.Iterator; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class RDFaParserTest { + + private static final ValueFactory factory = new CoreseAdaptedValueFactory(); + + @Test + public void basicBaseTest() { + String testDataString = """ + + + + + + Test 0001 + + +

This photo was taken by Mark Birbeck.

+ +"""; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + IRI subject = factory.createIRI("http://www.w3.org/2006/07/SWD/RDFa/testsuite/xhtml1-testcases/photo1.jpg"); + IRI predicate = factory.createIRI("http://purl.org/dc/elements/1.1/creator"); + Literal object = factory.createLiteral("Mark Birbeck"); + + referenceModel.add(subject, predicate, object); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes())); + + assertEquals(RDFFormat.RDFa, parser.getRDFFormat()); + assertEquals(referenceModel.size(), testModel.size()); + Iterator itStatementRef = referenceModel.iterator(); + Iterator itStatementTest = testModel.iterator(); + while(itStatementRef.hasNext() && itStatementTest.hasNext()) { + Statement statementRef = itStatementRef.next(); + Statement statementTest = itStatementTest.next(); + assertEquals(statementRef.getSubject(), statementTest.getSubject()); + assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); + assertEquals(statementRef.getObject(), statementTest.getObject()); + assertEquals(statementRef.getContext(), statementTest.getContext()); + } + assertTrue(testModel.contains(subject, predicate, object)); + } + + @Test + public void aboutTest() { + String testDataString = """ + + + + +

+ Hello, I'm Pierre. +

+ + """; + + Model testModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI subject = factory.createIRI("http://w3id.org/people/pierre-maillot"); + IRI object = factory.createIRI("http://xmlns.com/foaf/0.1/Person"); + + assertEquals(1, testModel.size()); + assertTrue(testModel.contains(subject, RDF.type.getIRI(), object)); + } + + @Test + public void basicIRItoIRITest() { + String testDataString = """ + + + + + +
+
+
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI birthPlace = factory.createIRI("http://dbpedia.org/property/birthPlace"); + IRI germany = factory.createIRI("http://dbpedia.org/resource/Germany"); + + Statement aeBirthPlaceStatement = factory.createStatement(albertEinstein, birthPlace, germany); + + referenceModel.add(aeBirthPlaceStatement); + + assertEquals(1, testModel.size()); + assertEquals(referenceModel, testModel); + assertTrue(referenceModel.containsAll(testModel)); + } + + @Test + public void basicIRItoStringTest() { + String testDataString = """ + + + + + +
+ Albert Einstein +
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI foafName = factory.createIRI("http://xmlns.com/foaf/0.1/name"); + Literal aeName = factory.createLiteral("Albert Einstein"); + + Statement aeNameStatement = factory.createStatement(albertEinstein, foafName, aeName); + + referenceModel.add(aeNameStatement); + + assertEquals(1, testModel.size()); + assertEquals(referenceModel, testModel); + assertTrue(referenceModel.containsAll(testModel)); + + } + + @Test + public void basicIRItoTypedLiteralTest() { + String testDataString = """ + + + + + +
+ 1879-03-14 +
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI dateOfBirth = factory.createIRI("http://dbpedia.org/property/dateOfBirth"); + Literal aeDateOfBirth = factory.createLiteral("1879-03-14", XSD.xsdDate.getIRI()); + + Statement aeDateOfBirthStatement = factory.createStatement(albertEinstein, dateOfBirth, aeDateOfBirth); + + referenceModel.add(aeDateOfBirthStatement); + + assertEquals(1, testModel.size()); + assertEquals(referenceModel.size(), testModel.size()); + Iterator itStatementRef = referenceModel.iterator(); + Iterator itStatementTest = testModel.iterator(); + while(itStatementRef.hasNext() && itStatementTest.hasNext()) { + Statement statementRef = itStatementRef.next(); + Statement statementTest = itStatementTest.next(); + assertEquals(statementRef.getSubject(), statementTest.getSubject()); + assertEquals(statementRef.getPredicate(), statementTest.getPredicate()); + assertEquals(statementRef.getObject(), statementTest.getObject()); + assertEquals(statementRef.getContext(), statementTest.getContext()); + } + assertTrue(referenceModel.containsAll(testModel)); + } + + @Test + public void basicChainTest() { + String testDataString = """ + + + + + +
+
+ Federal Republic of Germany +
+ + + """; + + Model testModel = new CoreseModel(); + Model referenceModel = new CoreseModel(); + + RDFParser parser = new ParserFactory().createRDFParser(RDFFormat.RDFa, testModel, factory); + + parser.parse(new ByteArrayInputStream(testDataString.getBytes()), "http://not.the.right.base.uri"); + + IRI albertEinstein = factory.createIRI("http://dbpedia.org/resource/Albert_Einstein"); + IRI birthPlace = factory.createIRI("http://dbpedia.org/property/birthPlace"); + IRI germany = factory.createIRI("http://dbpedia.org/resource/Germany"); + IRI conventionalLongName = factory.createIRI("http://dbpedia.org/property/conventionalLongName"); + Literal gerLongName = factory.createLiteral("Federal Republic of Germany"); + + Statement aeBirthPlaceStatement = factory.createStatement(albertEinstein, birthPlace, germany); + Statement germanyNameStatement = factory.createStatement(germany, conventionalLongName, gerLongName); + + referenceModel.add(aeBirthPlaceStatement); + referenceModel.add(germanyNameStatement); + + assertEquals(2, testModel.size()); + assertEquals(referenceModel, testModel); + assertTrue(referenceModel.containsAll(testModel)); + + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseAdaptedValueFactoryTest.java b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseAdaptedValueFactoryTest.java index 9e249cae9..627c351af 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseAdaptedValueFactoryTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/temp/CoreseAdaptedValueFactoryTest.java @@ -1,12 +1,8 @@ package fr.inria.corese.core.next.impl.temp; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - import java.time.Duration; +import fr.inria.corese.core.next.impl.temp.literal.CoreseDate; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -19,6 +15,8 @@ import fr.inria.corese.core.next.impl.temp.literal.CoreseLanguageTaggedStringLiteral; import fr.inria.corese.core.next.impl.temp.literal.CoreseTyped; +import static org.junit.jupiter.api.Assertions.*; + public class CoreseAdaptedValueFactoryTest extends ValueFactoryTest { private String stringTestValue; @@ -135,4 +133,16 @@ public void testCreateFOAFURI() { assertNotNull(foaf); assertEquals("http://xmlns.com/foaf/0.1/", foaf.stringValue()); } + + @Test + public void testDateCreation() { + IRI xsdDate = valueFactory.createIRI("http://www.w3.org/2001/XMLSchema#date"); + String literalStringValue = "2025-11-20"; + Literal date = valueFactory.createLiteral(literalStringValue, xsdDate); + + assertNotNull(date); + assertEquals(fr.inria.corese.core.next.impl.common.vocabulary.XSD.xsdDate.getIRI().stringValue(), date.getDatatype().stringValue()); + assertEquals(literalStringValue, date.getLabel()); + assertInstanceOf(fr.inria.corese.core.sparql.datatype.CoreseDate.class, ((CoreseNodeAdapter) date).getCoreseNode()); + } }