diff --git a/src/main/java/fr/inria/corese/core/next/data/impl/common/prefix/PrefixHandler.java b/src/main/java/fr/inria/corese/core/next/data/impl/common/prefix/PrefixHandler.java index 70601ab79..813a03b80 100644 --- a/src/main/java/fr/inria/corese/core/next/data/impl/common/prefix/PrefixHandler.java +++ b/src/main/java/fr/inria/corese/core/next/data/impl/common/prefix/PrefixHandler.java @@ -29,6 +29,13 @@ public class PrefixHandler implements IPrefixHandler, Cloneable { */ private String defaultNamespace; + /** + * Creates a new PrefixHandler. + */ + public PrefixHandler() { + this(false); + } + /** * Creates a new PrefixHandler. * @@ -418,6 +425,21 @@ public String toString() { return sb.toString(); } + @Override + public boolean equals(Object other) { + if(! (other instanceof IPrefixHandler)) { + return false; + } + return Objects.equals(this.getDefaultNamespace(), ((IPrefixHandler) other).getDefaultNamespace()) + && Objects.equals(this.getNamespaceMap(), ((IPrefixHandler) other).getNamespaceMap()) + && Objects.equals(this.getPrefixMap(), ((IPrefixHandler) other).getPrefixMap()); + } + + @Override + public int hashCode() { + return Objects.hash(defaultNamespace, prefixToNamespace, namespaceToPrefix); + } + /** * Simple immutable implementation of Namespace interface. * Used internally to create Namespace objects from prefix-URI pairs. diff --git a/src/main/java/fr/inria/corese/core/next/data/impl/common/util/IRIUtils.java b/src/main/java/fr/inria/corese/core/next/data/impl/common/util/IRIUtils.java index fe2abff8f..00a245a59 100644 --- a/src/main/java/fr/inria/corese/core/next/data/impl/common/util/IRIUtils.java +++ b/src/main/java/fr/inria/corese/core/next/data/impl/common/util/IRIUtils.java @@ -1,5 +1,7 @@ package fr.inria.corese.core.next.data.impl.common.util; +import fr.inria.corese.core.next.data.impl.io.parser.util.ParserConstants; + import java.net.URI; import java.net.URISyntaxException; import java.util.Set; @@ -27,6 +29,13 @@ public class IRIUtils { "(?[\\w\\-_]+)?" + // line1 "$" ); + private static final Pattern ABSOLUTE_IRI_PATTERN = Pattern.compile("^(?(" + + "?[\\w\\-]+)" + + ":(\\/\\/)?" + + "(?([\\S\\-\\._\\:]+[\\/\\.\\:\\@\\-])?)+" + + "(?\\?[\\S\\-\\\"\\'_\\:\\?\\=]+)?)" + + "(?[\\S\\-_]+)?" + + "$"); private static final Pattern STANDARD_IRI_PATTERN = Pattern.compile("^(([^:/?#\\s]+):)(\\/\\/([^/?#\\s]*))?([^?#\\s]*)(\\?([^#\\s]*))?(#(.*))?"); private static final int MAX_IRI_LENGTH = 2048; private static final long REGEX_TIMEOUT_MS = 100; @@ -115,6 +124,20 @@ public static String guessLocalName(String iri) { } } + /** + * Detects if an IRI is absolute according to the REGEX given in the recommendation RFC3987 + * @param iri any uri (expecting to be the content between < and > + * @return true if it is compliant with RFC3987. May accept the prefixed for of uri, as there is no way to + * distinguish a prefix from a protocol + */ + public static boolean isAbsoluteIRI(String iri) { + Matcher matcher = matchWithTimeout(ABSOLUTE_IRI_PATTERN, iri); + if (matcher == null || !matcher.matches()) { + return false; + } + return matcher.matches(); + } + /** * Checks if the given string is a valid IRI using a regex pattern extracted from the W3C standards. * @param iriString The string to be checked. @@ -260,79 +283,276 @@ public static boolean isInvalidIRICharacter(char c) { }; } + public static String resolveIRIAgainstBase(String baseIri, String relativePath) { + + if (relativePath.isEmpty()) { + return baseIri; + } + + try { + URI baseUri = new URI(baseIri); + String baseScheme = baseUri.getScheme(); + String baseAuthority = baseUri.getAuthority(); + String basePath = baseUri.getPath(); + String baseQuery = baseUri.getQuery(); + + String[] refParts = parseReference(relativePath); + String refScheme = refParts[0]; + String refAuthority = refParts[1]; + String refPath = refParts[2]; + String refQuery = refParts[3]; + String refFragment = refParts[4]; + + String targetScheme, targetAuthority, targetPath, targetQuery, targetFragment; + + // RFC 3986 Section 5.2.2 - Reference Resolution Algorithm + if (refScheme != null) { + targetScheme = refScheme; + targetAuthority = refAuthority; + targetPath = removeDotSegments(refPath); + targetQuery = refQuery; + } else { + if (refAuthority != null) { + targetScheme = baseScheme; + targetAuthority = refAuthority; + targetPath = removeDotSegments(refPath); + targetQuery = refQuery; + } else { + targetScheme = baseScheme; + targetAuthority = baseAuthority; + if (refPath.isEmpty()) { + targetPath = basePath; + targetQuery = refQuery != null ? refQuery : baseQuery; + } else { + if (refPath.startsWith(ParserConstants.SLASH)) { + targetPath = removeDotSegments(refPath); + } else { + targetPath = removeDotSegments(mergePaths(basePath, refPath)); + } + targetQuery = refQuery; + } + } + } + targetFragment = refFragment; + + return buildURI(targetScheme, targetAuthority, targetPath, targetQuery, targetFragment); + + } catch (URISyntaxException e) { + return performSimpleFallback(baseIri, relativePath); + } + } + + /** + * Constructs a URI from its components. + * + * @param scheme URI scheme (e.g., "http", "file") + * @param authority authority component (host, port, userinfo) + * @param path path component + * @param query query component + * @param fragment fragment identifier + * @return normalized URI string + */ + private static String buildURI(String scheme, String authority, String path, String query, String fragment) { + StringBuilder result = new StringBuilder(); + if (scheme != null) { + result.append(scheme).append(ParserConstants.COLON); + } + if (authority != null) { + result.append(ParserConstants.DOUBLE_SLASH).append(authority); + } + if (path != null) { + result.append(path); + } + if (query != null) { + result.append(ParserConstants.QUERY_MARK).append(query); + } + if (fragment != null) { + result.append(ParserConstants.HASH).append(fragment); + } + return normalizeURI(result.toString()); + } + /** - * Returns a human-readable description of a character for error messages. + * Parses a URI reference into its five components. * - * @param c the character to describe - * @return human-readable description + * @param ref URI reference to parse + * @return array containing [scheme, authority, path, query, fragment] */ - public static String getCharacterDescription(char c) { - switch (c) { - case 0x00: - return "null character"; - case 0x09: - return "tab"; - case 0x0A: - return "line feed"; - case 0x0D: - return "carriage return"; - case 0x20: - return "space"; - case 0x7F: - return "delete"; - case '<': - return "less than"; - case '>': - return "greater than"; - case '{': - return "left curly bracket"; - case '}': - return "right curly bracket"; - case '\\': - return "backslash"; - case '^': - return "circumflex"; - case '`': - return "grave accent"; - case '|': - return "pipe"; - case '"': - return "quotation mark"; - default: - if (c < 0x20) { - return "control character"; - } else if (c >= 0x80 && c <= 0x9F) { - return "high control character"; + private static String[] parseReference(String ref) { + String[] parts = new String[5]; + String remaining = ref; + + int fragmentIndex = remaining.indexOf('#'); + if (fragmentIndex >= 0) { + parts[4] = remaining.substring(fragmentIndex + 1); + remaining = remaining.substring(0, fragmentIndex); + } + + int queryIndex = remaining.indexOf('?'); + if (queryIndex >= 0) { + parts[3] = remaining.substring(queryIndex + 1); + remaining = remaining.substring(0, queryIndex); + } + + int colonIndex = remaining.indexOf(':'); + if (colonIndex > 0 && isValidScheme(remaining.substring(0, colonIndex))) { + parts[0] = remaining.substring(0, colonIndex); + remaining = remaining.substring(colonIndex + 1); + } + + if (remaining.startsWith(ParserConstants.DOUBLE_SLASH)) { + int authorityEnd = remaining.indexOf('/', 2); + if (authorityEnd < 0) { + authorityEnd = remaining.length(); + } + parts[1] = remaining.substring(2, authorityEnd); + remaining = remaining.substring(authorityEnd); + } + + parts[2] = remaining; + return parts; + } + + /** + * Merges a base path with a relative path. + * + * @param basePath base path from base URI + * @param refPath relative path from reference + * @return merged path + */ + private static String mergePaths(String basePath, String refPath) { + if (basePath == null || basePath.isEmpty()) { + return ParserConstants.SLASH + refPath; + } + int lastSlash = basePath.lastIndexOf('/'); + return lastSlash >= 0 ? basePath.substring(0, lastSlash + 1) + refPath : refPath; + } + + /** + * Removes dot segments from a path (RFC 3986 Section 5.2.4). + * Processes ".." and "." segments according to the normalization algorithm. + * + * @param path path to normalize + * @return normalized path without dot segments + */ + private static String removeDotSegments(String path) { + if (path == null || path.isEmpty()) { + return ParserConstants.EMPTY_STRING; + } + + String input = path; + StringBuilder output = new StringBuilder(); + + while (!input.isEmpty()) { + if (input.startsWith(ParserConstants.DOUBLE_DOT + ParserConstants.SLASH)) { + input = input.substring(3); + } else if (input.startsWith(ParserConstants.DOT + ParserConstants.SLASH)) { + input = input.substring(2); + } else if (input.startsWith(ParserConstants.SLASH + ParserConstants.DOT + ParserConstants.SLASH)) { + input = ParserConstants.SLASH + input.substring(3); + } else if (input.equals(ParserConstants.SLASH + ParserConstants.DOT)) { + input = ParserConstants.SLASH; + } else if (input.startsWith(ParserConstants.SLASH + ParserConstants.DOUBLE_DOT + ParserConstants.SLASH)) { + input = ParserConstants.SLASH + input.substring(4); + removeLastSegment(output); + } else if (input.equals(ParserConstants.SLASH + ParserConstants.DOUBLE_DOT)) { + input = ParserConstants.SLASH; + removeLastSegment(output); + } else if (input.equals(ParserConstants.POINT) || input.equals(ParserConstants.DOUBLE_DOT)) { + input = ParserConstants.EMPTY_STRING; + } else { + int nextSlash; + if (input.startsWith(ParserConstants.SLASH)) { + nextSlash = input.indexOf(ParserConstants.SLASH, 1); + if (nextSlash >= 0) { + output.append(input, 0, nextSlash); + input = input.substring(nextSlash); + } else { + output.append(input); + input = ParserConstants.EMPTY_STRING; + } } else { - return String.format("character '%c'", c); + nextSlash = input.indexOf(ParserConstants.SLASH); + if (nextSlash >= 0) { + output.append(input, 0, nextSlash); + input = input.substring(nextSlash); + } else { + output.append(input); + input = ParserConstants.EMPTY_STRING; + } } + } } + + return output.toString(); } /** - * Escapes characters in a string for display in error messages. + * Removes the last path segment from the output buffer. + * Used during dot segment removal when processing ".." segments. * - * @param iri the IRI to escape for display - * @return escaped version suitable for error messages + * @param output string builder containing the path being constructed */ - public static String escapeForDisplay(String iri) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < iri.length(); i++) { - char c = iri.charAt(i); - if (c < 0x20 || (c >= 0x7F && c <= 0x9F)) { - // Display control characters as Unicode escapes - sb.append(String.format("\\u%04X", (int) c)); - } else if (c > 0x7E) { - // Display non-ASCII as Unicode escapes for clarity - sb.append(String.format("\\u%04X", (int) c)); - } else if (c == '<' || c == '>' || c == '{' || c == '}' || c == '\\' || c == '^' || c == '`' || c == '|' || c == '"') { - // Display reserved characters with backslash escape - sb.append('\\').append(c); - } else { - // Display normal ASCII characters as-is - sb.append(c); + private static void removeLastSegment(StringBuilder output) { + String outputStr = output.toString(); + int lastSlash = outputStr.lastIndexOf(ParserConstants.SLASH); + output.setLength(Math.max(lastSlash, 0)); + } + + /** + * Provides a fallback resolution mechanism when RFC 3986 parsing fails. + * + * @param base base URI + * @param relative relative IRI reference + * @return resolved IRI using simple concatenation rules + */ + private static String performSimpleFallback(String base, String relative) { + if (relative.isEmpty()) { + return base; + } + if (base.endsWith(ParserConstants.SLASH)) { + return base + relative; + } + int lastSlash = base.lastIndexOf('/'); + return lastSlash >= 0 ? base.substring(0, lastSlash + 1) + relative : base + ParserConstants.SLASH + relative; + } + + /** + * Normalizes URI strings, ensuring proper format for file:// URIs. + * + * @param uri URI to normalize + * @return normalized URI string + */ + public static String normalizeURI(String uri) { + if (uri == null) { + return null; + } + if (uri.startsWith(ParserConstants.FILE_PROTOCOL_SIMPLE) && !uri.startsWith(ParserConstants.FILE_PROTOCOL_TRIPLE_SLASH)) { + if (!uri.startsWith(ParserConstants.FILE_PROTOCOL)) { + uri = uri.replace(ParserConstants.FILE_PROTOCOL_SIMPLE, ParserConstants.FILE_PROTOCOL_TRIPLE_SLASH); } } - return sb.toString(); + return uri; } + + /** + * Validates a URI scheme according to RFC 3986. + * A valid scheme must start with a letter and contain only letters, digits, '+', '-', or '.'. + * + * @param scheme scheme to validate + * @return true if the scheme is valid, false otherwise + */ + public static boolean isValidScheme(String scheme) { + if (scheme == null || scheme.isEmpty() || !Character.isLetter(scheme.charAt(0))) { + return false; + } + for (int i = 1; i < scheme.length(); i++) { + char c = scheme.charAt(i); + if (!Character.isLetterOrDigit(c) && c != '+' && c != '-' && c != '.') { + return false; + } + } + return true; + } + } \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/data/impl/io/parser/common/AbstractTurtleTriGListener.java b/src/main/java/fr/inria/corese/core/next/data/impl/io/parser/common/AbstractTurtleTriGListener.java index 9e7d120e2..b96e49364 100644 --- a/src/main/java/fr/inria/corese/core/next/data/impl/io/parser/common/AbstractTurtleTriGListener.java +++ b/src/main/java/fr/inria/corese/core/next/data/impl/io/parser/common/AbstractTurtleTriGListener.java @@ -7,10 +7,14 @@ import fr.inria.corese.core.next.data.impl.common.vocabulary.RDF; import fr.inria.corese.core.next.data.impl.exception.ParsingErrorException; import fr.inria.corese.core.next.data.impl.io.parser.util.ParserConstants; +import fr.inria.corese.core.next.util.StringUtils; import java.net.URI; import java.net.URISyntaxException; +import static fr.inria.corese.core.next.data.impl.common.util.IRIUtils.isAbsoluteIRI; +import static fr.inria.corese.core.next.data.impl.common.util.IRIUtils.normalizeURI; + /** * Base class for RDF parsers (Turtle, TriG) providing common functionality. * Implements IRI resolution according to RFC 3986, Unicode escape handling, @@ -158,292 +162,7 @@ public String resolveIRIAgainstBase(String iri) { if (isAbsoluteIRI(iri)) { return iri; } - - if (iri.isEmpty()) { - return effectiveBase; - } - - try { - URI baseUri = new URI(effectiveBase); - String baseScheme = baseUri.getScheme(); - String baseAuthority = baseUri.getAuthority(); - String basePath = baseUri.getPath(); - String baseQuery = baseUri.getQuery(); - - String[] refParts = parseReference(iri); - String refScheme = refParts[0]; - String refAuthority = refParts[1]; - String refPath = refParts[2]; - String refQuery = refParts[3]; - String refFragment = refParts[4]; - - String targetScheme, targetAuthority, targetPath, targetQuery, targetFragment; - - // RFC 3986 Section 5.2.2 - Reference Resolution Algorithm - if (refScheme != null) { - targetScheme = refScheme; - targetAuthority = refAuthority; - targetPath = removeDotSegments(refPath); - targetQuery = refQuery; - } else { - if (refAuthority != null) { - targetScheme = baseScheme; - targetAuthority = refAuthority; - targetPath = removeDotSegments(refPath); - targetQuery = refQuery; - } else { - targetScheme = baseScheme; - targetAuthority = baseAuthority; - if (refPath.isEmpty()) { - targetPath = basePath; - targetQuery = refQuery != null ? refQuery : baseQuery; - } else { - if (refPath.startsWith(ParserConstants.SLASH)) { - targetPath = removeDotSegments(refPath); - } else { - targetPath = removeDotSegments(mergePaths(basePath, refPath)); - } - targetQuery = refQuery; - } - } - } - targetFragment = refFragment; - - return buildURI(targetScheme, targetAuthority, targetPath, targetQuery, targetFragment); - - } catch (URISyntaxException e) { - return performSimpleFallback(effectiveBase, iri); - } - } - - /** - * Constructs a URI from its components. - * - * @param scheme URI scheme (e.g., "http", "file") - * @param authority authority component (host, port, userinfo) - * @param path path component - * @param query query component - * @param fragment fragment identifier - * @return normalized URI string - */ - public String buildURI(String scheme, String authority, String path, String query, String fragment) { - StringBuilder result = new StringBuilder(); - if (scheme != null) { - result.append(scheme).append(ParserConstants.COLON); - } - if (authority != null) { - result.append(ParserConstants.DOUBLE_SLASH).append(authority); - } - if (path != null) { - result.append(path); - } - if (query != null) { - result.append(ParserConstants.QUERY_MARK).append(query); - } - if (fragment != null) { - result.append(ParserConstants.HASH).append(fragment); - } - return normalizeURI(result.toString()); - } - - /** - * Parses a URI reference into its five components. - * - * @param ref URI reference to parse - * @return array containing [scheme, authority, path, query, fragment] - */ - public String[] parseReference(String ref) { - String[] parts = new String[5]; - String remaining = ref; - - int fragmentIndex = remaining.indexOf('#'); - if (fragmentIndex >= 0) { - parts[4] = remaining.substring(fragmentIndex + 1); - remaining = remaining.substring(0, fragmentIndex); - } - - int queryIndex = remaining.indexOf('?'); - if (queryIndex >= 0) { - parts[3] = remaining.substring(queryIndex + 1); - remaining = remaining.substring(0, queryIndex); - } - - int colonIndex = remaining.indexOf(':'); - if (colonIndex > 0 && isValidScheme(remaining.substring(0, colonIndex))) { - parts[0] = remaining.substring(0, colonIndex); - remaining = remaining.substring(colonIndex + 1); - } - - if (remaining.startsWith(ParserConstants.DOUBLE_SLASH)) { - int authorityEnd = remaining.indexOf('/', 2); - if (authorityEnd < 0) { - authorityEnd = remaining.length(); - } - parts[1] = remaining.substring(2, authorityEnd); - remaining = remaining.substring(authorityEnd); - } - - parts[2] = remaining; - return parts; - } - - /** - * Merges a base path with a relative path. - * - * @param basePath base path from base URI - * @param refPath relative path from reference - * @return merged path - */ - public String mergePaths(String basePath, String refPath) { - if (basePath == null || basePath.isEmpty()) { - return ParserConstants.SLASH + refPath; - } - int lastSlash = basePath.lastIndexOf('/'); - return lastSlash >= 0 ? basePath.substring(0, lastSlash + 1) + refPath : refPath; - } - - /** - * Removes dot segments from a path (RFC 3986 Section 5.2.4). - * Processes ".." and "." segments according to the normalization algorithm. - * - * @param path path to normalize - * @return normalized path without dot segments - */ - public String removeDotSegments(String path) { - if (path == null || path.isEmpty()) { - return ParserConstants.EMPTY_STRING; - } - - String input = path; - StringBuilder output = new StringBuilder(); - - while (!input.isEmpty()) { - if (input.startsWith(ParserConstants.DOUBLE_DOT + ParserConstants.SLASH)) { - input = input.substring(3); - } else if (input.startsWith(ParserConstants.DOT + ParserConstants.SLASH)) { - input = input.substring(2); - } else if (input.startsWith(ParserConstants.SLASH + ParserConstants.DOT + ParserConstants.SLASH)) { - input = ParserConstants.SLASH + input.substring(3); - } else if (input.equals(ParserConstants.SLASH + ParserConstants.DOT)) { - input = ParserConstants.SLASH; - } else if (input.startsWith(ParserConstants.SLASH + ParserConstants.DOUBLE_DOT + ParserConstants.SLASH)) { - input = ParserConstants.SLASH + input.substring(4); - removeLastSegment(output); - } else if (input.equals(ParserConstants.SLASH + ParserConstants.DOUBLE_DOT)) { - input = ParserConstants.SLASH; - removeLastSegment(output); - } else if (input.equals(ParserConstants.POINT) || input.equals(ParserConstants.DOUBLE_DOT)) { - input = ParserConstants.EMPTY_STRING; - } else { - int nextSlash; - if (input.startsWith(ParserConstants.SLASH)) { - nextSlash = input.indexOf(ParserConstants.SLASH, 1); - if (nextSlash >= 0) { - output.append(input, 0, nextSlash); - input = input.substring(nextSlash); - } else { - output.append(input); - input = ParserConstants.EMPTY_STRING; - } - } else { - nextSlash = input.indexOf(ParserConstants.SLASH); - if (nextSlash >= 0) { - output.append(input, 0, nextSlash); - input = input.substring(nextSlash); - } else { - output.append(input); - input = ParserConstants.EMPTY_STRING; - } - } - } - } - - return output.toString(); - } - - /** - * Removes the last path segment from the output buffer. - * Used during dot segment removal when processing ".." segments. - * - * @param output string builder containing the path being constructed - */ - public void removeLastSegment(StringBuilder output) { - String outputStr = output.toString(); - int lastSlash = outputStr.lastIndexOf(ParserConstants.SLASH); - output.setLength(Math.max(lastSlash, 0)); - } - - /** - * Provides a fallback resolution mechanism when RFC 3986 parsing fails. - * - * @param base base URI - * @param relative relative IRI reference - * @return resolved IRI using simple concatenation rules - */ - public String performSimpleFallback(String base, String relative) { - if (relative.isEmpty()) { - return base; - } - if (base.endsWith(ParserConstants.SLASH)) { - return base + relative; - } - int lastSlash = base.lastIndexOf('/'); - return lastSlash >= 0 ? base.substring(0, lastSlash + 1) + relative : base + ParserConstants.SLASH + relative; - } - - /** - * Normalizes URI strings, ensuring proper format for file:// URIs. - * - * @param uri URI to normalize - * @return normalized URI string - */ - public String normalizeURI(String uri) { - if (uri == null) { - return null; - } - if (uri.startsWith(ParserConstants.FILE_PROTOCOL_SIMPLE) && !uri.startsWith(ParserConstants.FILE_PROTOCOL_TRIPLE_SLASH)) { - if (!uri.startsWith(ParserConstants.FILE_PROTOCOL)) { - uri = uri.replace(ParserConstants.FILE_PROTOCOL_SIMPLE, ParserConstants.FILE_PROTOCOL_TRIPLE_SLASH); - } - } - return uri; - } - - /** - * Determines whether an IRI is absolute (contains a valid scheme). - * - * @param iri IRI to check - * @return true if the IRI is absolute, false otherwise - */ - public boolean isAbsoluteIRI(String iri) { - if (iri == null || iri.isEmpty()) { - return false; - } - int colonIndex = iri.indexOf(':'); - if (colonIndex == -1 || colonIndex == 0) { - return false; - } - return isValidScheme(iri.substring(0, colonIndex)); - } - - /** - * Validates a URI scheme according to RFC 3986. - * A valid scheme must start with a letter and contain only letters, digits, '+', '-', or '.'. - * - * @param scheme scheme to validate - * @return true if the scheme is valid, false otherwise - */ - public boolean isValidScheme(String scheme) { - if (scheme == null || scheme.isEmpty() || !Character.isLetter(scheme.charAt(0))) { - return false; - } - for (int i = 1; i < scheme.length(); i++) { - char c = scheme.charAt(i); - if (!Character.isLetterOrDigit(c) && c != '+' && c != '-' && c != '.') { - return false; - } - } - return true; + return IRIUtils.resolveIRIAgainstBase(effectiveBase, iri); } /** @@ -652,8 +371,8 @@ private void validateIRI(String iri) throws ParsingErrorException { // Check for forbidden characters if (IRIUtils.isInvalidIRICharacter(c)) { String codePoint = String.format("U+%04X", (int) c); - String charDesc = IRIUtils.getCharacterDescription(c); - String displayIRI = IRIUtils.escapeForDisplay(iri); + String charDesc = StringUtils.getCharacterDescription(c); + String displayIRI = StringUtils.escapeForDisplay(iri); throw new ParsingErrorException( "Invalid character in IRI: " + codePoint + " (" + charDesc + ") " + diff --git a/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlAstBuilder.java b/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlAstBuilder.java index 49eff7f1a..90475b32d 100644 --- a/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlAstBuilder.java +++ b/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlAstBuilder.java @@ -117,12 +117,39 @@ public final class SparqlAstBuilder { */ private final VariableScopeAnalyzer variableScopeAnalyzer = new VariableScopeAnalyzer(); + /** + * Effective base URI after prologue (parser options, then possibly {@code BASE}). + */ + private String baseUri; + /** + * Prefix declarations in source order (including redeclarations). + */ + private final List prefixDeclarations = new ArrayList<>(); + public SparqlAstBuilder(SparqlParserOptions options) { this.options = options; + this.baseUri = options.getBaseIRI(); } // --- Construction entry points (called by listener) --- + public void setBaseUri(String uri) { + if(this.baseUri != null && !this.baseUri.equals(options.getBaseIRI())) { + throw new QuerySyntaxException("Base URI already set, multiple BASE declarations are forbidden."); + } + this.baseUri = uri; + } + + public void addPrefix(String prefix, String uri) { + PrefixDeclarationAst declarationAst = new PrefixDeclarationAst(prefix, new IriAst(uri)); + if(this.prefixDeclarations.stream().anyMatch(declaration -> + Objects.equals(declaration.prefix(), declarationAst.prefix()) + )) { + throw new QuerySyntaxException("Prefix " + prefix + " has already been declared"); + } + this.prefixDeclarations.add(declarationAst); + } + public void enterAskQuery() { queryType = ASTConstants.QUERY_TYPE.ASK; } @@ -332,11 +359,12 @@ public QueryAst getResult() { throw new IllegalStateException("No WHERE clause: did you call exitGroup() for the top-level GroupGraphPattern?"); } DatasetClauseAst datasetClauseAst = new DatasetClauseAst(datasetDefaultGraphs, datasetNamedGraphs); + QueryPrologueAst prologueAst = new QueryPrologueAst(List.copyOf(prefixDeclarations), new IriAst(baseUri)); return switch (this.queryType) { - case ASK -> buildAskQueryAst(datasetClauseAst); - case CONSTRUCT -> buildConstructQueryAst(datasetClauseAst); - case DESCRIBE -> buildDescribeQueryAst(datasetClauseAst); - case SELECT -> buildSelectQueryAst(datasetClauseAst); + case ASK -> buildAskQueryAst(datasetClauseAst, prologueAst); + case CONSTRUCT -> buildConstructQueryAst(datasetClauseAst, prologueAst); + case DESCRIBE -> buildDescribeQueryAst(datasetClauseAst, prologueAst); + case SELECT -> buildSelectQueryAst(datasetClauseAst, prologueAst); case UNDEFINED -> throw new QueryEvaluationException("Could not determine the type of query during parsing"); }; } @@ -370,36 +398,37 @@ private void ensureNoOpenBgp() { /** * Builds the AST for ASK queries. */ - private AskQueryAst buildAskQueryAst(DatasetClauseAst datasetClauseAst) { - return new AskQueryAst(datasetClauseAst, whereClause); + private AskQueryAst buildAskQueryAst(DatasetClauseAst datasetClauseAst, QueryPrologueAst prologue) { + return new AskQueryAst(datasetClauseAst, whereClause, prologue); } /** * Builds the AST for SELECT queries. */ - private SelectQueryAst buildSelectQueryAst(DatasetClauseAst datasetClauseAst) { + private SelectQueryAst buildSelectQueryAst(DatasetClauseAst datasetClauseAst, QueryPrologueAst prologue) { validateSelectQueryScope(); - return new SelectQueryAst(projection, datasetClauseAst, whereClause, buildSolutionModifier()); + return new SelectQueryAst(projection, datasetClauseAst, whereClause, buildSolutionModifier(), prologue); } /** * Builds the AST for DESCRIBE queries. */ - private DescribeQueryAst buildDescribeQueryAst(DatasetClauseAst datasetClauseAst) { + private DescribeQueryAst buildDescribeQueryAst(DatasetClauseAst datasetClauseAst, QueryPrologueAst prologue) { // TODO #306: validate variable scope for DESCRIBE modifiers when DescribeQueryAst carries them. - return new DescribeQueryAst(datasetClauseAst, describeResources, whereClause); + return new DescribeQueryAst(datasetClauseAst, describeResources, whereClause, prologue); } /** * Builds the AST for CONSTRUCT queries. */ - private ConstructQueryAst buildConstructQueryAst(DatasetClauseAst datasetClauseAst) { + private ConstructQueryAst buildConstructQueryAst(DatasetClauseAst datasetClauseAst, QueryPrologueAst prologue) { // TODO #306: validate variable scope for CONSTRUCT modifiers when ConstructQueryAst carries them. return new ConstructQueryAst( constructTemplate != null ? constructTemplate : new ConstructTemplateAst(List.of()), datasetClauseAst, whereClause, - buildSolutionModifier()); + buildSolutionModifier(), + prologue); } /** diff --git a/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlListener.java b/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlListener.java index 5c3230487..470f4c7a7 100644 --- a/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlListener.java +++ b/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlListener.java @@ -163,4 +163,20 @@ public void exitDefaultGraphClause(SparqlParser.DefaultGraphClauseContext ctx) { public void exitNamedGraphClause(SparqlParser.NamedGraphClauseContext ctx) { for (var d : delegates) d.exitNamedGraphClause(ctx); } + + @Override public void enterBaseDecl(SparqlParser.BaseDeclContext ctx) { + for (var d : delegates) d.enterBaseDecl(ctx); + } + + @Override public void exitBaseDecl(SparqlParser.BaseDeclContext ctx) { + for (var d : delegates) d.exitBaseDecl(ctx); + } + + @Override public void enterPrefixDecl(SparqlParser.PrefixDeclContext ctx) { + for (var d : delegates) d.enterPrefixDecl(ctx); + } + + @Override public void exitPrefixDecl(SparqlParser.PrefixDeclContext ctx) { + for (var d : delegates) d.exitPrefixDecl(ctx); + } } diff --git a/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlParser.java b/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlParser.java index a10944d2d..097fd9b79 100644 --- a/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlParser.java +++ b/src/main/java/fr/inria/corese/core/next/query/impl/parser/SparqlParser.java @@ -7,6 +7,7 @@ import java.nio.charset.StandardCharsets; import java.util.List; +import fr.inria.corese.core.next.query.impl.parser.listener.*; import org.antlr.v4.runtime.BailErrorStrategy; import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CharStreams; @@ -26,15 +27,6 @@ import fr.inria.corese.core.next.query.api.exception.QueryValidationException; import fr.inria.corese.core.next.query.api.io.parser.QueryOptions; import fr.inria.corese.core.next.query.api.sparql.options.BaseIRIOptions; -import fr.inria.corese.core.next.query.impl.parser.listener.AskQueryFeature; -import fr.inria.corese.core.next.query.impl.parser.listener.BgpFeature; -import fr.inria.corese.core.next.query.impl.parser.listener.ConstructQueryFeature; -import fr.inria.corese.core.next.query.impl.parser.listener.DatasetClauseFeature; -import fr.inria.corese.core.next.query.impl.parser.listener.DescribeQueryFeature; -import fr.inria.corese.core.next.query.impl.parser.listener.FilterFeature; -import fr.inria.corese.core.next.query.impl.parser.listener.SelectQueryFeature; -import fr.inria.corese.core.next.query.impl.parser.listener.SolutionModifierFeature; -import fr.inria.corese.core.next.query.impl.parser.listener.UnionFeature; import fr.inria.corese.core.next.query.impl.sparql.ast.QueryAst; public class SparqlParser extends AbstractQueryParser { @@ -125,7 +117,8 @@ public QueryAst parse(Reader reader, String baseIRI) { new FilterFeature(builder), new UnionFeature(builder), new DescribeQueryFeature(builder), - new DatasetClauseFeature(builder) + new DatasetClauseFeature(builder), + new PrologueFeature(builder) )); walker.walk(listener, tree); diff --git a/src/main/java/fr/inria/corese/core/next/query/impl/parser/listener/PrologueFeature.java b/src/main/java/fr/inria/corese/core/next/query/impl/parser/listener/PrologueFeature.java new file mode 100644 index 000000000..e35f73c20 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/query/impl/parser/listener/PrologueFeature.java @@ -0,0 +1,21 @@ +package fr.inria.corese.core.next.query.impl.parser.listener; + +import fr.inria.corese.core.next.impl.parser.antlr.SparqlParser; +import fr.inria.corese.core.next.query.impl.parser.SparqlAstBuilder; +import fr.inria.corese.core.next.util.StringUtils; + +public class PrologueFeature extends AbstractSparqlFeature { + public PrologueFeature(SparqlAstBuilder builder) { + super(builder); + } + + @Override + public void exitBaseDecl(SparqlParser.BaseDeclContext ctx) { + builder().setBaseUri(StringUtils.trimChevronIRIs(ctx.IRI_REF().getText())); + } + + @Override + public void exitPrefixDecl(SparqlParser.PrefixDeclContext ctx) { + builder().addPrefix(ctx.PNAME_NS().getText(), ctx.IRI_REF().getText()); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/AskQueryAst.java b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/AskQueryAst.java index f7f51ed00..ade193d8c 100644 --- a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/AskQueryAst.java +++ b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/AskQueryAst.java @@ -15,13 +15,23 @@ * } * } */ -public record AskQueryAst(DatasetClauseAst datasetClause, GroupGraphPatternAst whereClause) implements QueryAst { +public record AskQueryAst(DatasetClauseAst datasetClause, GroupGraphPatternAst whereClause, QueryPrologueAst prologue) implements QueryAst { + /** + * constructor with default prefix handler + */ + public AskQueryAst(DatasetClauseAst datasetClause, GroupGraphPatternAst whereClause) { + this(datasetClause, whereClause, null); + } + public AskQueryAst { if (whereClause == null) { whereClause = new GroupGraphPatternAst(List.of()); } - if(datasetClause == null) { + if (datasetClause == null) { datasetClause = DatasetClauseAst.none(); } + if (prologue == null) { + prologue = QueryPrologueAst.empty(); + } } } \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/ConstructQueryAst.java b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/ConstructQueryAst.java index eb9acb30d..9c30bca4d 100644 --- a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/ConstructQueryAst.java +++ b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/ConstructQueryAst.java @@ -34,8 +34,9 @@ public record ConstructQueryAst( ConstructTemplateAst constructTemplate, DatasetClauseAst datasetClause, GroupGraphPatternAst whereClause, - SolutionModifierAst solutionModifier -) implements QueryAst { + SolutionModifierAst solutionModifier, + QueryPrologueAst prologue + ) implements QueryAst { public ConstructQueryAst { if (constructTemplate == null) { constructTemplate = new ConstructTemplateAst(List.of()); @@ -49,9 +50,22 @@ public record ConstructQueryAst( if (solutionModifier == null) { solutionModifier = SolutionModifierAst.empty(); } + if (prologue == null) { + prologue = QueryPrologueAst.empty(); + } + } + + /** + * constructor with default prefix handler + */ + public ConstructQueryAst(ConstructTemplateAst template, DatasetClauseAst datasetClause, GroupGraphPatternAst whereClause, SolutionModifierAst solutionModifier) { + this(template, datasetClause, whereClause, solutionModifier, null); } + /** + * constructor with default prefix handler and default solution modifier + */ public ConstructQueryAst(ConstructTemplateAst template, GroupGraphPatternAst whereClause) { - this(template, DatasetClauseAst.none(), whereClause, SolutionModifierAst.empty()); + this(template, DatasetClauseAst.none(), whereClause, SolutionModifierAst.empty(), null); } } \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/DescribeQueryAst.java b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/DescribeQueryAst.java index e70fa98e3..6043ba473 100644 --- a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/DescribeQueryAst.java +++ b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/DescribeQueryAst.java @@ -21,7 +21,7 @@ * } * } */ -public record DescribeQueryAst(DatasetClauseAst datasetClause, List described, GroupGraphPatternAst whereClause) implements QueryAst { +public record DescribeQueryAst(DatasetClauseAst datasetClause, List described, GroupGraphPatternAst whereClause, QueryPrologueAst prologue) implements QueryAst { public DescribeQueryAst { described = described != null ? List.copyOf(described) : List.of(); if (whereClause == null) { @@ -30,6 +30,16 @@ public record DescribeQueryAst(DatasetClauseAst datasetClause, List des if(datasetClause == null) { datasetClause = DatasetClauseAst.none(); } + if(prologue == null) { + prologue = QueryPrologueAst.empty(); + } + } + + /** + * constructor with default prefix handler + */ + public DescribeQueryAst(DatasetClauseAst datasetClause, List described, GroupGraphPatternAst whereClause) { + this(datasetClause, described, whereClause, null); } /** diff --git a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/PrefixDeclarationAst.java b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/PrefixDeclarationAst.java new file mode 100644 index 000000000..8957ffb5c --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/PrefixDeclarationAst.java @@ -0,0 +1,22 @@ +package fr.inria.corese.core.next.query.impl.sparql.ast; + +import java.util.Objects; + +import static fr.inria.corese.core.next.util.StringUtils.trimChevronIRIs; +import static fr.inria.corese.core.next.util.StringUtils.trimPrefixWithColon; + +/** + * A {@code PREFIX p: <ns>} declaration from the SPARQL prologue ({@code p} without trailing colon). + */ +public record PrefixDeclarationAst(String prefix, IriAst namespace) { + public PrefixDeclarationAst { + if (prefix == null ) { + throw new IllegalArgumentException("prefix must be non-null"); + } + prefix = trimPrefixWithColon(prefix); + namespace = Objects.requireNonNull(namespace, "namespace"); + if(! namespace.raw().isEmpty()) { + namespace = new IriAst(trimChevronIRIs(namespace.raw())); + } + } +} diff --git a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/QueryAst.java b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/QueryAst.java index 03e29ba1e..6df83c7bd 100644 --- a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/QueryAst.java +++ b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/QueryAst.java @@ -7,4 +7,5 @@ public sealed interface QueryAst permits AskQueryAst, ConstructQueryAst, DescribeQueryAst, SelectQueryAst { DatasetClauseAst datasetClause(); GroupGraphPatternAst whereClause(); + QueryPrologueAst prologue(); } \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/QueryPrologueAst.java b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/QueryPrologueAst.java new file mode 100644 index 000000000..c6700b411 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/QueryPrologueAst.java @@ -0,0 +1,43 @@ +package fr.inria.corese.core.next.query.impl.sparql.ast; + +import fr.inria.corese.core.next.data.impl.common.util.IRIUtils; +import fr.inria.corese.core.next.data.impl.io.common.IOConstants; +import fr.inria.corese.core.next.query.api.exception.QuerySyntaxException; + +import java.util.List; + +import static fr.inria.corese.core.next.util.StringUtils.trimChevronIRIs; + +/** + * Snapshot of the SPARQL prologue: prefix declarations in source order and the effective base IRI + * after the prologue (parser options initial base, possibly overridden by {@code BASE}). + *

+ * For now this type is only attached to {@link SelectQueryAst}; other query forms still expose + * prefix/base state via {@link fr.inria.corese.core.next.data.api.IPrefixHandler} on {@link QueryAst}. + */ +public record QueryPrologueAst(List prefixDeclarations, IriAst baseIri) { + + public QueryPrologueAst { + prefixDeclarations = prefixDeclarations != null ? List.copyOf(prefixDeclarations) : List.of(); + if (baseIri == null) { + baseIri = new IriAst(IOConstants.getDefaultBaseURI()); + } else { + baseIri = new IriAst(trimChevronIRIs(baseIri.raw())); + } + if (!IRIUtils.isAbsoluteIRI(baseIri.raw())) { + throw new QuerySyntaxException("Base IRI should be absolute, got " + baseIri.raw()); + } + // resolving relative namespaces in prefix declarations + IriAst finalBaseIri = baseIri; + prefixDeclarations = prefixDeclarations.stream().map(prefixDecl -> { + if(IRIUtils.isAbsoluteIRI(prefixDecl.namespace().raw())) { + return prefixDecl; + } + return new PrefixDeclarationAst(prefixDecl.prefix(), new IriAst(IRIUtils.resolveIRIAgainstBase(finalBaseIri.raw(), prefixDecl.namespace().raw()))); + }).toList(); + } + + public static QueryPrologueAst empty() { + return new QueryPrologueAst(List.of(), new IriAst(IOConstants.getDefaultBaseURI())); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/SelectQueryAst.java b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/SelectQueryAst.java index 6f8fc3cb7..fbcf02164 100644 --- a/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/SelectQueryAst.java +++ b/src/main/java/fr/inria/corese/core/next/query/impl/sparql/ast/SelectQueryAst.java @@ -5,24 +5,32 @@ /** * Abstract Syntax Tree (AST) representation of a SPARQL {@code SELECT} query. * Holds the projection (SELECT * or SELECT ?v1 ?v2 ...) and the WHERE clause. + *

+ * {@link #prologue()} captures PREFIX/BASE for SELECT; {@link #prefixHandler()} is derived from it + * for {@link QueryAst} compatibility. */ -public record SelectQueryAst(ProjectionAst projection, DatasetClauseAst datasetClause, GroupGraphPatternAst whereClause, SolutionModifierAst solutionModifier) implements QueryAst { +public record SelectQueryAst(ProjectionAst projection, DatasetClauseAst datasetClause, GroupGraphPatternAst whereClause, SolutionModifierAst solutionModifier, QueryPrologueAst prologue) implements QueryAst { /** Constructor with default projection SELECT *. */ public SelectQueryAst(GroupGraphPatternAst whereClause) { this(ProjectionAsts.selectAll(), DatasetClauseAst.none(), whereClause); } - /** Constructor with default solution modifier (no DISTINCT/REDUCED/ORDER BY/LIMIT/OFFSET). */ + /** Constructor with default solution modifier (no DISTINCT/REDUCED/ORDER BY/LIMIT/OFFSET) and default prologue. */ public SelectQueryAst(ProjectionAst projection, DatasetClauseAst datasetClause, GroupGraphPatternAst whereClause) { - this(projection, datasetClause, whereClause, null); + this(projection, datasetClause, whereClause, null, null); + } + + /** Constructor with default prologue */ + public SelectQueryAst(ProjectionAst projection, DatasetClauseAst datasetClause, GroupGraphPatternAst whereClause, SolutionModifierAst solutionModifier) { + this(projection, datasetClause, whereClause, solutionModifier, null); } public SelectQueryAst { if (projection == null) { projection = ProjectionAsts.selectAll(); } - if(datasetClause == null) { + if (datasetClause == null) { datasetClause = DatasetClauseAst.none(); } if (whereClause == null) { @@ -31,5 +39,8 @@ public SelectQueryAst(ProjectionAst projection, DatasetClauseAst datasetClause, if (solutionModifier == null) { solutionModifier = SolutionModifierAst.empty(); } + if (prologue == null) { + prologue = QueryPrologueAst.empty(); + } } -} \ No newline at end of file +} diff --git a/src/main/java/fr/inria/corese/core/next/util/StringUtils.java b/src/main/java/fr/inria/corese/core/next/util/StringUtils.java new file mode 100644 index 000000000..1cf3b3601 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/util/StringUtils.java @@ -0,0 +1,97 @@ +package fr.inria.corese.core.next.util; + +public class StringUtils { + + public static String trimChevronIRIs(String uri) { + uri = uri.trim(); + if(uri.startsWith("<") && uri.endsWith(">")) { + uri = uri.substring(0, uri.lastIndexOf(">")); + uri = uri.substring(uri.indexOf("<") +1); + } + return uri; + } + + public static String trimPrefixWithColon(String prefix) { + prefix = prefix.trim(); + if(prefix.endsWith(":")) { + prefix = prefix.substring(0, prefix.lastIndexOf(":")); + } + return prefix; + } + + /** + * Returns a human-readable description of a character for error messages. + * + * @param c the character to describe + * @return human-readable description + */ + public static String getCharacterDescription(char c) { + switch (c) { + case 0x00: + return "null character"; + case 0x09: + return "tab"; + case 0x0A: + return "line feed"; + case 0x0D: + return "carriage return"; + case 0x20: + return "space"; + case 0x7F: + return "delete"; + case '<': + return "less than"; + case '>': + return "greater than"; + case '{': + return "left curly bracket"; + case '}': + return "right curly bracket"; + case '\\': + return "backslash"; + case '^': + return "circumflex"; + case '`': + return "grave accent"; + case '|': + return "pipe"; + case '"': + return "quotation mark"; + default: + if (c < 0x20) { + return "control character"; + } else if (c >= 0x80 && c <= 0x9F) { + return "high control character"; + } else { + return String.format("character '%c'", c); + } + } + } + + /** + * Escapes characters in a string for display in error messages. + * + * @param iri the IRI to escape for display + * @return escaped version suitable for error messages + */ + public static String escapeForDisplay(String iri) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < iri.length(); i++) { + char c = iri.charAt(i); + if (c < 0x20 || (c >= 0x7F && c <= 0x9F)) { + // Display control characters as Unicode escapes + sb.append(String.format("\\u%04X", (int) c)); + } else if (c > 0x7E) { + // Display non-ASCII as Unicode escapes for clarity + sb.append(String.format("\\u%04X", (int) c)); + } else if (c == '<' || c == '>' || c == '{' || c == '}' || c == '\\' || c == '^' || c == '`' || c == '|' || c == '"') { + // Display reserved characters with backslash escape + sb.append('\\').append(c); + } else { + // Display normal ASCII characters as-is + sb.append(c); + } + } + return sb.toString(); + } +} diff --git a/src/test/java/fr/inria/corese/core/next/data/impl/common/util/IRIUtilsTest.java b/src/test/java/fr/inria/corese/core/next/data/impl/common/util/IRIUtilsTest.java index 59041d4a8..89005dbae 100644 --- a/src/test/java/fr/inria/corese/core/next/data/impl/common/util/IRIUtilsTest.java +++ b/src/test/java/fr/inria/corese/core/next/data/impl/common/util/IRIUtilsTest.java @@ -75,6 +75,23 @@ public void isStandardIRITest() { } } + @Test + public void isAbsoluteIRITest() { + assertTrue(IRIUtils.isAbsoluteIRI("mailto://user@example.com")); + assertTrue(IRIUtils.isAbsoluteIRI("mongodb://user:password@127.0.0.1:3307")); + assertTrue(IRIUtils.isAbsoluteIRI("https://laconsole.dev")); + assertTrue(IRIUtils.isAbsoluteIRI("http://127.0.0.1:3000"));; + assertTrue(IRIUtils.isAbsoluteIRI("urn:isbn:978-2-7654-0912-0")); + assertTrue(IRIUtils.isAbsoluteIRI("urn:uuid:f81d4fae-7dec-11d0-a765-00a0c91e6bf6")); + assertTrue(IRIUtils.isAbsoluteIRI("urn:ietf:rfc:2648")); + assertTrue(IRIUtils.isAbsoluteIRI("https://www.w3.org/TR/rdf-sparql-query/#iriRefs")); + assertTrue(IRIUtils.isAbsoluteIRI("https://ns.inria.fr/otherTest1/#")); + assertTrue(IRIUtils.isAbsoluteIRI("https://www.w3.org/TR/rdf-sparql-query/#iriRefs")); + assertTrue(IRIUtils.isAbsoluteIRI("http://xmlns.com/foaf/0.1/")); + assertFalse(IRIUtils.isAbsoluteIRI("child/password@127.0.0.1:3307")); + assertFalse(IRIUtils.isAbsoluteIRI("child/otherChild/otherotherchild/#patate")); + } + /** * Helper method to escape strings for display in test failure messages */ diff --git a/src/test/java/fr/inria/corese/core/next/query/impl/parser/SparqlAstBuilderTest.java b/src/test/java/fr/inria/corese/core/next/query/impl/parser/SparqlAstBuilderTest.java index 2c133d33f..3b09b7692 100644 --- a/src/test/java/fr/inria/corese/core/next/query/impl/parser/SparqlAstBuilderTest.java +++ b/src/test/java/fr/inria/corese/core/next/query/impl/parser/SparqlAstBuilderTest.java @@ -1,6 +1,7 @@ package fr.inria.corese.core.next.query.impl.parser; +import fr.inria.corese.core.next.data.impl.io.common.IOConstants; import fr.inria.corese.core.next.query.impl.sparql.ast.*; import org.junit.jupiter.api.Test; @@ -43,6 +44,7 @@ void shouldBuildEmptyWhereGroupWhenNoTriples() { void shouldBuildSingleBgpWithOneTriple() { SparqlAstBuilder b = newBuilder(); + b.setBaseUri(IOConstants.getDefaultBaseURI()); b.enterSelectQuery(); b.enterGroup(); b.enterBgp(); diff --git a/src/test/java/fr/inria/corese/core/next/query/impl/parser/SparqlParserPrologueTest.java b/src/test/java/fr/inria/corese/core/next/query/impl/parser/SparqlParserPrologueTest.java new file mode 100644 index 000000000..97f59b355 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/query/impl/parser/SparqlParserPrologueTest.java @@ -0,0 +1,261 @@ +package fr.inria.corese.core.next.query.impl.parser; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import fr.inria.corese.core.next.query.api.exception.QuerySyntaxException; +import fr.inria.corese.core.next.query.impl.sparql.ast.QueryAst; +import fr.inria.corese.core.next.query.impl.sparql.ast.SelectQueryAst; + +@SuppressWarnings("java:S5976") +class SparqlParserPrologueTest extends AbstractSparqlParserFeatureTest { + + @Test + @DisplayName("Basic Ask with base") + void askWithBase() { + String query = """ + BASE + ASK { + ?s ?p ?o . + } + """; + + SparqlParser parser = newParserDefault(); + + QueryAst ast = parser.parse(query); + assertEquals("http://ns.inria.fr/test/", ast.prologue().baseIri().raw()); + } + + @Test + @DisplayName("Basic Construct with base") + void constructWithBase() { + String query = """ + BASE + CONSTRUCT { + ?o ?p ?s . + } + { + ?s ?p ?o . + } LIMIT 10 + """; + + SparqlParser parser = newParserDefault(); + + QueryAst ast = parser.parse(query); + assertEquals("http://ns.inria.fr/test/", ast.prologue().baseIri().raw()); + } + + @Test + @DisplayName("Basic Select with base") + void describeWithBase() { + String query = """ + BASE + DESCRIBE ?s { + ?s ?p ?o . + } LIMIT 10 + """; + + SparqlParser parser = newParserDefault(); + + QueryAst ast = parser.parse(query); + assertEquals("http://ns.inria.fr/test/", ast.prologue().baseIri().raw()); + } + + @Test + @DisplayName("Basic Select with base") + void selectWithBase() { + String query = """ + BASE + SELECT * { + ?s ?p ?o . + } LIMIT 10 + """; + + SparqlParser parser = newParserDefault(); + + SelectQueryAst ast = (SelectQueryAst) parser.parse(query); + assertEquals("http://ns.inria.fr/test/", ast.prologue().baseIri().raw()); + } + + @Test + @DisplayName("Basic Select with base and one prefix") + void selectWithBaseAndOnePrefix() { + String query = """ + BASE + PREFIX test: + SELECT * { + ?s ?p ?o . + } LIMIT 10 + """; + + SparqlParser parser = newParserDefault(); + + SelectQueryAst ast = (SelectQueryAst) parser.parse(query); + + assertEquals("http://ns.inria.fr/test/", ast.prologue().baseIri().raw()); + assertTrue(ast.prologue().prefixDeclarations().stream() + .anyMatch(prefixDecl -> prefixDecl.prefix().equals("test"))); + assertTrue( + ast.prologue().prefixDeclarations().stream().anyMatch(prefixDecl -> prefixDecl.prefix().equals("test") + && prefixDecl.namespace().raw().equals("https://ns.inria.fr/otherTest/#"))); + } + + @Test + @DisplayName("Basic Select with base and multiple prefix") + void selectWithBaseAndMultiplePrefix() { + String query = """ + BASE + PREFIX test1: + PREFIX test2: + PREFIX test3: + SELECT * { + ?s ?p ?o . + } LIMIT 10 + """; + + SparqlParser parser = newParserDefault(); + + SelectQueryAst ast = (SelectQueryAst) parser.parse(query); + assertEquals("http://ns.inria.fr/test/", ast.prologue().baseIri().raw()); + assertTrue(ast.prologue().prefixDeclarations().stream() + .anyMatch(prefixDecl -> prefixDecl.prefix().equals("test1"))); + assertTrue( + ast.prologue().prefixDeclarations().stream().anyMatch(prefixDecl -> prefixDecl.prefix().equals("test1") + && prefixDecl.namespace().raw().equals("https://ns.inria.fr/otherTest1/#"))); + assertTrue(ast.prologue().prefixDeclarations().stream() + .anyMatch(prefixDecl -> prefixDecl.prefix().equals("test2"))); + assertTrue( + ast.prologue().prefixDeclarations().stream().anyMatch(prefixDecl -> prefixDecl.prefix().equals("test2") + && prefixDecl.namespace().raw().equals("https://ns.inria.fr/otherTest2/#"))); + assertTrue(ast.prologue().prefixDeclarations().stream() + .anyMatch(prefixDecl -> prefixDecl.prefix().equals("test3"))); + assertTrue( + ast.prologue().prefixDeclarations().stream().anyMatch(prefixDecl -> prefixDecl.prefix().equals("test3") + && prefixDecl.namespace().raw().equals("https://ns.inria.fr/otherTest3/#"))); + } + + @Test + @DisplayName("Basic Select with base and multiple prefix with overlap") + void selectWithBaseAndMultiplePrefixWithOverlap() { + String query = """ + BASE + PREFIX test1: + PREFIX test2: + PREFIX test1: + SELECT * { + ?s ?p ?o . + } LIMIT 10 + """; + + SparqlParser parser = newParserDefault(); + + assertThrows(QuerySyntaxException.class, () -> { + parser.parse(query); + }); + } + + @Test + @DisplayName("Basic Select with multiple base should throw") + void selectWithMultipleBase() { + String query = """ + BASE + BASE + SELECT * { + ?s ?p ?o . + } LIMIT 10 + """; + + SparqlParser parser = newParserDefault(); + + assertThrows(QuerySyntaxException.class, () -> { + parser.parse(query); + }); + } + + @Test + @DisplayName("PREFIX with empty prefix label should be accepted") + void selectWithDefaultPrefixDeclaration() { + String query = """ + PREFIX : + SELECT * { + ?s :p ?o . + } + """; + + SparqlParser parser = newParserDefault(); + + SelectQueryAst ast = assertDoesNotThrow(() -> (SelectQueryAst) parser.parse(query)); + assertTrue(ast.prologue().prefixDeclarations().stream().anyMatch(prefixDecl -> prefixDecl.prefix().isEmpty())); + assertTrue(ast.prologue().prefixDeclarations().stream().anyMatch(prefixDecl -> prefixDecl.prefix().isEmpty() + && prefixDecl.namespace().raw().equals("https://ns.inria.fr/default/#"))); + } + + @Test + @DisplayName("Relative BASE should be rejected because BASE must be absolute") + void relativeBaseShouldBeRejected() { + String query = """ + BASE + SELECT * { + ?s ?p ?o . + } + """; + + SparqlParser parser = newParserDefault(); + + assertThrows(QuerySyntaxException.class, () -> parser.parse(query)); + } + + @Test + @DisplayName("Relative PREFIX IRI should be resolved against effective base") + void relativePrefixShouldBeResolvedAgainstEffectiveBase() { + String query = """ + BASE + PREFIX ex: + SELECT * { + ?s ex:p ?o . + } + """; + + SparqlParser parser = newParserDefault(); + + SelectQueryAst ast = assertDoesNotThrow(() -> (SelectQueryAst) parser.parse(query)); + + assertTrue( + ast.prologue().prefixDeclarations().stream().anyMatch(prefixDecl -> prefixDecl.prefix().equals("ex")), + "ex: is in the prologue"); + assertTrue( + ast.prologue().prefixDeclarations().stream() + .anyMatch(prefixDecl -> prefixDecl.prefix().equals("ex") + && prefixDecl.namespace().raw().equals("http://example.org/root/ns/")), + "the IRI of ex: in http://example.org/root/ns/"); + } + + @Test + @DisplayName("Relative PREFIX IRI should use RFC3986 resolution, not string concatenation") + void relativePrefixShouldUseRfc3986Resolution() { + String query = """ + BASE + PREFIX ex: + SELECT * { + ?s ex:p ?o . + } + """; + + SparqlParser parser = newParserDefault(); + + SelectQueryAst ast = assertDoesNotThrow(() -> (SelectQueryAst) parser.parse(query)); + assertTrue( + ast.prologue().prefixDeclarations().stream().anyMatch(prefixDecl -> prefixDecl.prefix().equals("ex")), + "ex: is in the prologue"); + assertTrue( + ast.prologue().prefixDeclarations().stream() + .anyMatch(prefixDecl -> prefixDecl.prefix().equals("ex") + && prefixDecl.namespace().raw().equals("http://example.org/ns/")), + "the IRI of ex: in http://example.org/ns/"); + } +}