Skip to content
Open
6 changes: 6 additions & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ dependencies {
implementation("com.typesafe.akka:akka-stream_2.13:2.6.20") // Akka Streams for reactive streams processing
implementation("com.lightbend.akka:akka-stream-alpakka-xml_2.13:3.0.4") // Alpakka XML for XML processing with Akka Streams

// HTML parsing for RDFa
implementation("org.jsoup:jsoup:1.21.2")

// === Utilities ===
implementation("org.apache.commons:commons-text:1.13.1") // Text manipulation utilities (Commons Text)
implementation("org.json:json:20250517") // JSON processing
Expand All @@ -150,6 +153,9 @@ dependencies {
testRuntimeOnly("org.junit.platform:junit-platform-launcher:1.13.2") // JUnit platform launcher (runtime)
testImplementation("org.mockito:mockito-core:5.18.0") // Mockito core for mocking in tests
testImplementation("org.mockito:mockito-junit-jupiter:5.18.0") // Mockito integration with JUnit Jupiter
testRuntimeOnly("org.apache.logging.log4j:log4j-core:2.25.0") // Log4j2 core for internal logging
testRuntimeOnly("org.apache.logging.log4j:log4j-slf4j2-impl:2.25.0") // SLF4J binding for Log4j2 (runtime)

}

/////////////////////////
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ public class RDFFormat extends FileFormat {
true,
false);


public static final RDFFormat NTRIPLES = new RDFFormat(
"N-Triples",
List.of("nt"),
Expand Down Expand Up @@ -66,6 +65,13 @@ public class RDFFormat extends FileFormat {
false,
true);

public static final RDFFormat RDFa = new RDFFormat(
"RDFa",
List.of("html", "xhtml"),
List.of("text/html", "application/xhtml+xml"),
true,
false);

/**
* Constructs a new RDF format.
*
Expand Down Expand Up @@ -152,7 +158,7 @@ public static Optional<RDFFormat> byMimeType(String mimeType) {
* @return An unmodifiable List of all RdfFormat constants.
*/
public static List<RDFFormat> all() {
return List.of(TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG);
return List.of(TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG, RDFC_1_0, RDFa);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
import fr.inria.corese.core.next.impl.common.util.IRIUtils;
import fr.inria.corese.core.next.impl.exception.IncorrectFormatException;

import java.io.Serial;

/**
* Base class for IRI implementations. Includes base functionality for IRI
* handling.
*/
public abstract class AbstractIRI implements IRI, Comparable<IRI> {

@Serial
private static final long serialVersionUID = -1005683238501772511L;

private final String namespace;
Expand Down Expand Up @@ -44,11 +47,6 @@ protected AbstractIRI(String namespace, String localName) {
this.localName = localName;
}

@Override
public boolean isIRI() {
return true;
}

@Override
public String getNamespace() {
return this.namespace;
Expand Down Expand Up @@ -86,4 +84,9 @@ public int hashCode() {
hash = 31 * hash + (this.localName == null ? 0 : this.localName.hashCode());
return hash;
}

@Override
public String toString() {
return this.stringValue();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,8 @@ public boolean containsAll(Collection<?> collection) {
Iterator<?> iterator = collection.iterator();
try {
while (iterator.hasNext()) {
if (!contains(iterator.next())) {
Object currentObject = iterator.next();
if (! (currentObject instanceof Statement) && ! this.contains(currentObject)) {
return false;
}
}
Expand All @@ -344,6 +345,11 @@ public boolean containsAll(Collection<?> collection) {
}
}

@Override
public boolean equals(Object o) {
return o instanceof Model && this.size() == ((Model) o).size() && ((Model) o).containsAll(this);
}

@Override
public boolean addAll(Collection<? extends V> collection) {
boolean modified = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,26 @@ public TemporalAmount temporalAmountValue() {
public XMLGregorianCalendar calendarValue() {
throw new IncorrectOperationException("Cannot convert to XML calendar");
}

/**
* Check if two temporal literals are equal.
* @param obj the object to compare with
* @return true if compareTo returns 0, false otherwise
*/
@Override
public boolean equals(Object obj) {
if(obj == this) {
return true;
}
if(!(obj instanceof Literal)) {
return false;
}

return ((Literal) obj).getLabel().equals(this.getLabel()) && ((Literal) obj).getDatatype().equals(this.datatype);
}

@Override
public String toString() {
return this.stringValue();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
*/
public class IRIUtils {

private static final Pattern IRI_PATTERN = Pattern.compile("^(?<namespace>(?<protocol>[\\w\\-]+):(?<dblSlashes>\\/\\/)?(?<domain>([\\w\\-_:@]+\\.)*[\\w\\-_:]*))((?<path>\\/([\\w\\-\\._\\:]+\\/)*)(?<finalPath>[\\w\\-\\._\\:]+)?(?<query>\\?[\\w\\-_\\:\\?\\=]+)?(\\#)?(?<fragment>([\\w\\-_]+))?)?$");
private static final Pattern IRI_PATTERN = Pattern.compile("^(?<namespace>(?<protocol>[\\w\\-]+):(?<dblSlashes>\\/\\/)?(?<domain>([\\w\\-_:@]+\\.)*[\\w\\-_:]*))((?<path>\\/([\\w\\-\\._\\:]+\\/)*)(?<finalPath>[\\w\\-\\._\\:]+)?(?<query>\\?[\\w\\-_\\:\\?\\=]+)?(?<anchor>(\\#))?(?<fragment>([\\w\\-_]+))?)?$");
private static final Pattern STANDARD_IRI_PATTERN = Pattern.compile("^(([^:/?#\\s]+):)(\\/\\/([^/?#\\s]*))?([^?#\\s]*)(\\?([^#\\s]*))?(#(.*))?");
private static final int MAX_IRI_LENGTH = 2048;
private static final long REGEX_TIMEOUT_MS = 100;
Expand Down Expand Up @@ -52,9 +52,10 @@ public static String guessNamespace(String iri) {
if(matcher.group("path") != null) {
namespace.append(matcher.group("path"));
}
if(matcher.group("fragment") != null && matcher.group("finalPath") != null) {
if((matcher.group("fragment") != null || matcher.group("anchor") != null) && matcher.group("finalPath") != null) {
namespace.append(matcher.group("finalPath")).append("#");
}

return namespace.toString();
} else {
throw new IllegalStateException("No namespace found for the given IRI: " + iri + ".");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import fr.inria.corese.core.next.impl.io.parser.jsonld.JSONLDParser;
import fr.inria.corese.core.next.impl.io.parser.nquads.NQuadsParser;
import fr.inria.corese.core.next.impl.io.parser.ntriples.NTriplesParser;
import fr.inria.corese.core.next.impl.io.parser.rdfa.RDFaParser;
import fr.inria.corese.core.next.impl.io.parser.rdfxml.RDFXMLParser;
import fr.inria.corese.core.next.impl.io.parser.turtle.TurtleParser;
import fr.inria.corese.core.next.impl.io.parser.trig.TriGParser;
Expand Down Expand Up @@ -52,6 +53,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac
return new TriGParser(model, factory, config);
} else if(format == RDFFormat.RDFC_1_0) {
return new NQuadsParser(model, factory, config);
} else if (format == RDFFormat.RDFa) {
return new RDFaParser(model, factory, config);
}
throw new IllegalArgumentException("Unsupported format: " + format);
}
Expand All @@ -77,6 +80,8 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac
return new RDFXMLParser(model, factory);
} else if (format == RDFFormat.TRIG) {
return new TriGParser(model, factory);
} else if (format == RDFFormat.RDFa) {
return new RDFaParser(model, factory);
}
throw new IllegalArgumentException("Unsupported format: " + format);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package fr.inria.corese.core.next.impl.io.parser.rdfa;

import fr.inria.corese.core.next.api.IRI;
import fr.inria.corese.core.next.api.Resource;
import fr.inria.corese.core.next.api.Value;
import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement;

import java.util.*;

/**
* This class is to be used during the evaluation of an HTML file to generate triples during the DOM traversal.
* @see <a href="https://www.w3.org/TR/rdfa-syntax/#sec_5.2.">RDFa recommandation<a/>
*/
public class RDFaEvaluationContext {

/**
* This will usually be the URL of the document being processed, but it could be some other URL, set by some other mechanism, such as the XHTML base element. The important thing is that it establishes a URL against which relative paths can be resolved.
*/
private IRI baseIri;

/**
* The initial value will be the same as the initial value of [base], but it will usually change during the course of processing.
*/
private Resource parentSubjectResource ;

/**
* In some situations the object of a statement becomes the subject of any nested statements, and this property is used to convey this value. Note that this value may be a bnode, since in some situations a number of nested statements are grouped together on one bnode. This means that the bnode must be set in the containing statement and passed down, and this property is used to convey this value.
*/
private Resource parentObjectResource = null;

/**
* An index of locally defined IRI prefixes
*/
private Map<String, IRI> uriMappings = new HashMap<>();

/**
* Set of statement in the process of building.
*/
private Set<RDFaIncompleteStatement> incompleteStatement = new HashSet<>();

/**
* The language of the document. Note that there is no default language.
*/
private String language = null;

public RDFaEvaluationContext(IRI baseIri) {
this.baseIri = baseIri;
this.parentSubjectResource = baseIri;
}

public RDFaEvaluationContext(IRI baseIri, IRI parentSubjectResource) {
this.baseIri = baseIri;
this.parentSubjectResource = parentSubjectResource;
}

public RDFaEvaluationContext(RDFaEvaluationContext context) {
this.baseIri = context.baseIri;
this.parentSubjectResource = context.parentSubjectResource;
this.parentObjectResource = context.parentObjectResource;
this.uriMappings = new HashMap<>(context.uriMappings);
this.incompleteStatement = new HashSet<>(context.incompleteStatement);
this.language = context.language;
}

public IRI baseIri() {
return baseIri;
}

public RDFaEvaluationContext baseIri(IRI baseIri) {
this.baseIri = baseIri;
return this;
}

public RDFaEvaluationContext incompleteStatements(Set<RDFaIncompleteStatement> incompleteStatement) {
this.incompleteStatement = new HashSet<>(incompleteStatement);
return this;
}

public Iterator<RDFaIncompleteStatement> getIncompleteStatementIterator() {
return this.incompleteStatement.iterator();
}

public RDFaEvaluationContext addStatementWithoutSubject(IRI property, Value object) {
RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property);
newStatement.setObject(object);
this.incompleteStatement.add(newStatement);
return this;
}

public RDFaEvaluationContext addStatementWithoutObject(Resource subject, IRI property) {
RDFaIncompleteStatement newStatement = new RDFaIncompleteStatement(property);
newStatement.setSubject(subject);
this.incompleteStatement.add(newStatement);
return this;
}

public void clearIncompleteStatements() {
this.incompleteStatement.clear();
}

public Resource parentSubjectResource() {
return parentSubjectResource;
}

public RDFaEvaluationContext parentSubjectResource(Resource parentSubjectResource) {
this.parentSubjectResource = parentSubjectResource;
return this;
}

public Resource parentObjectResource() {
return parentObjectResource;
}

public RDFaEvaluationContext parentObjectResource(Resource parentObjectResource) {
this.parentObjectResource = parentObjectResource;
return this;
}

public Map<String, IRI> uriMappings() {
return uriMappings;
}

public RDFaEvaluationContext uriMappings(Map<String, IRI> uriMappings) {
this.uriMappings = uriMappings;
return this;
}

public boolean hasUriMapping(String prefix) {
return this.uriMappings.containsKey(prefix);
}

/**
* @param prefix the prefix WITHOUT ":"
* @return the IRI associated to the prefix in this context
*/
public IRI uriMapping(String prefix) {
return this.uriMappings.get(prefix);
}

public void addUriMapping(String prefix, IRI prefixIri) {
this.uriMappings.put(prefix, prefixIri);
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder();

sb.append("BaseURI: ").append(this.baseIri.stringValue()).append(" ");
sb.append("Mappings: [");
this.uriMappings.forEach((key, value) -> sb.append("(").append(key).append(", ").append(value.stringValue()).append(") "));
sb.append("] ");
if(this.parentSubjectResource != null) {
sb.append("Subject:").append(this.parentSubjectResource.stringValue()).append(" ");
} else {
sb.append("Subject:").append((Object) null).append(" ");
}
if(this.parentObjectResource != null) {
sb.append("Object: ").append(this.parentObjectResource.stringValue()).append(" ");
} else {
sb.append("Object: ").append((Object) null).append(" ");
}
if(! this.incompleteStatement.isEmpty()) {
sb.append(this.incompleteStatement.size()).append(" incomplete statements.");
}

return sb.toString();
}

public String getLanguage() {
return language;
}

public void setLanguage(String language) {
this.language = language;
}
}
Loading