Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -84,39 +84,7 @@ public class TestIRIxRIOT {
@Test public void irix_uuid_1_nt_check() { testLang(urnuuid01, Lang.NT, UNSET, TRUE, 0, 0); }
@Test public void irix_uuid_1_ttl() { testDft (urnuuid01, Lang.TTL, 0, 0); }

// urn:uuid -- IRI3986 answers
//
// private static String urnuuid02 = "<urn:uuid:bad>";
// @Test public void irix_uuid_2_nt() { testDft (urnuuid02, Lang.NT, 0, 0); }
// @Test public void irix_uuid_2_nt_check() { testLang(urnuuid02, Lang.NT, UNSET, TRUE, 0, 1); }
// @Test public void irix_uuid_2_ttl() { testDft (urnuuid02, Lang.TTL, 0, 1); }
//
// private static String uuid03 = "<uuid:bad>";
// @Test public void irix_uuid_3_nt() { testDft (uuid03, Lang.NT, 0, 0); }
// @Test public void irix_uuid_3_nt_check() { testLang(uuid03, Lang.NT, UNSET, TRUE, 0, 1); }
// @Test public void irix_uuid_3_ttl() { testDft (uuid03, Lang.TTL, 0, 1); }
//
// private static String urnuuid04 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query>";
// @Test public void irix_uuid_4_nt() { testDft (urnuuid04, Lang.NT, 0, 0); }
// @Test public void irix_uuid_4_nt_check() { testLang(urnuuid04, Lang.NT, UNSET, TRUE, 0, 1); }
// @Test public void irix_uuid_4_ttl() { testDft (urnuuid04, Lang.TTL, 0, 1); }
//
// private static String uruuidurn05 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79#fragment>";
// @Test public void irix_uuid_5_nt() { testDft (uruuidurn05, Lang.NT, 0, 0); }
// @Test public void irix_uuid_5_nt_check() { testLang(uruuidurn05, Lang.NT, UNSET, TRUE, 0, 1); }
// @Test public void irix_uuid_5_ttl() { testDft (uruuidurn05, Lang.TTL, 0, 1); }
//
// private static String urnuuid06 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query#fragment>";
// @Test public void irix_uuid_6_nt() { testDft (urnuuid06, Lang.NT, 0, 0); }
// @Test public void irix_uuid_6_nt_check() { testLang(urnuuid06, Lang.NT, UNSET, TRUE, 0, 2); }
// @Test public void irix_uuid_6_ttl() { testDft (urnuuid06, Lang.TTL, 0, 2); }
//
// private static String uuid07 = "<uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query#fragment>";
// @Test public void irix_uuid_7_nt() { testDft (uuid07, Lang.NT, 0, 0); }
// @Test public void irix_uuid_7_nt_check() { testLang(uuid07, Lang.NT, UNSET, TRUE, 0, 2); }
// @Test public void irix_uuid_7_ttl() { testDft (uuid07, Lang.TTL, 0, 2); }

// -- urn:uuid -- jena-iri answers
// -- uuid: & urn:uuid -- jena-iri answers
// The warning on bad UUIDs is from IRIProviderjenaIRI, not jena-iri, and so it isn't check/no check sensitive.
private static String urnuuid02 = "<urn:uuid:bad>";
@Test public void irix_uuid_2_nt() { testDft (urnuuid02, Lang.NT, 0, 1); }
Expand All @@ -133,10 +101,10 @@ public class TestIRIxRIOT {
@Test public void irix_uuid_4_nt_check() { testLang(urnuuid04, Lang.NT, UNSET, TRUE, 0, 1); }
@Test public void irix_uuid_4_ttl() { testDft (urnuuid04, Lang.TTL, 0, 1); }

private static String uruuidurn05 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79#fragment>";
@Test public void irix_uuid_5_nt() { testDft (uruuidurn05, Lang.NT, 0, 1); }
@Test public void irix_uuid_5_nt_check() { testLang(uruuidurn05, Lang.NT, UNSET, TRUE, 0, 1); }
@Test public void irix_uuid_5_ttl() { testDft (uruuidurn05, Lang.TTL, 0, 1); }
private static String urnuuid05 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79#fragment>";
@Test public void irix_uuid_5_nt() { testDft (urnuuid05, Lang.NT, 0, 0); }
@Test public void irix_uuid_5_nt_check() { testLang(urnuuid05, Lang.NT, UNSET, TRUE, 0, 0); }
@Test public void irix_uuid_5_ttl() { testDft (urnuuid05, Lang.TTL, 0, 0); }

private static String urnuuid06 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query#fragment>";
@Test public void irix_uuid_6_nt() { testDft (urnuuid06, Lang.NT, 0, 0); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ protected IRIxString(String string) {
@Override public boolean isAbsolute() { return true; }
@Override public boolean isRelative() { return false; }
@Override public boolean hasScheme(String scheme) { return str().startsWith(scheme); }
@Override public String scheme() { return IRIs.scheme(str()); }

@Override
public boolean isReference() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,15 @@ public boolean isReference() {

@Override
public boolean hasScheme(String scheme) {
String iriScheme = javaURI.getScheme();
String iriScheme = scheme();
if ( iriScheme == null )
return false;
return iriScheme.startsWith(scheme);
return iriScheme.equalsIgnoreCase(scheme);
}

@Override
public String scheme() {
return javaURI.getScheme();
}

@Override
Expand Down
115 changes: 103 additions & 12 deletions jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,15 @@ public boolean isReference() {

@Override
public boolean hasScheme(String scheme) {
if ( jenaIRI.getScheme() == null )
String iriScheme = scheme();
if ( iriScheme == null )
return false;
return jenaIRI.getScheme().startsWith(scheme);
return iriScheme.equalsIgnoreCase(scheme);
}

@Override
public String scheme() {
return jenaIRI.getScheme();
}

@Override
Expand Down Expand Up @@ -224,11 +230,12 @@ private static IRI exceptions(IRI iri, String iriStr) {
if ( STRICT_FILE && isFILE(iri) ) {
if ( iriStr.startsWith("file://" ) && ! iriStr.startsWith("file:///") )
throw new IRIException("file: URLs should start file:///: <"+iriStr+">");
}

if ( isUUID(iri, iriStr) ) {
} else if ( isUUID(iri, iriStr) ) {
checkUUID(iri, iriStr);
} else if ( isURNUUID(iri, iriStr) ) {
checkURNUUID(iri, iriStr);
}

if (!showExceptions)
return iri;
if (!iri.hasViolation(includeWarnings))
Expand Down Expand Up @@ -277,25 +284,109 @@ private static boolean isHTTP(IRI iri) {
private static boolean isURN(IRI iri) { return "urn".equalsIgnoreCase(iri.getScheme()); }
private static boolean isFILE(IRI iri) { return "file".equalsIgnoreCase(iri.getScheme()); }

private static String UUID_REGEXP = "^(?:urn:uuid|uuid):[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$";
private static Pattern UUID_PATTERN = Pattern.compile(UUID_REGEXP, Pattern.CASE_INSENSITIVE);

private static boolean isUUID(IRI iri, String iriStr) {
return iriStr.regionMatches(true, 0, "urn:uuid:", 0, "urn:uuid:".length())
|| iriStr.regionMatches(true, 0, "uuid:", 0, "uuid:".length());
// Ignore case
return iriStr.regionMatches(true, 0, "uuid:", 0, "uuid:".length());
}

private static boolean isURNUUID(IRI iri, String iriStr) {
// Ignore case
return iriStr.regionMatches(true, 0, "urn:uuid:", 0, "urn:uuid:".length());
}

// ---- uuid:
// UUID match, no anchors or URI scheme.
private static String UUID_BASE = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}";
private static String UUID_REGEXP = "^uuid:"+UUID_BASE+"$";
private static Pattern UUID_PATTERN = Pattern.compile(UUID_REGEXP, Pattern.CASE_INSENSITIVE);

private static void checkUUID(IRI iriObj, String original) {
if ( iriObj.hasViolation(true) )
// Already has problems.
return;
// jena-iri does not have UUID checks.
// Unfortunately, these tests are check/no-check sensitive.
if ( iriObj.getRawFragment() != null )
throw new IRIException("Fragment used with UUID");
throw new IRIException("Fragment used with uuid:");
if ( iriObj.getRawQuery() != null )
throw new IRIException("Query used with UUID");
throw new IRIException("Query used with uuid:");
boolean matches = UUID_PATTERN.matcher(original).matches();
if ( !matches )
throw new IRIException("Not a valid UUID string: "+original);
}


// ---- urn:uuid:
// RFC 8141 added the possibility for r-component, q-component (combined
// into the URI query string) and f-component (restricted fragment). This
// regexp has a weak test for r/q/f. It does not check the character
// limitations to ASCII on r/q/f

//private static String A2Z = "[0-9a-z]";

// Non-strict regexp: Any order r- and q-compoments, UCSchars.
private static String URN_UUID_REGEXP_LAX = "^urn:uuid:"+UUID_BASE+"(?:(?:\\?\\+.|\\?=.|#).*)?$";

// Strict regex for urn:uuid
// Only ASCII.
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
// pct-encoded = "%" HEXDIG HEXDIG
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
// iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
// reserved = gen-delims / sub-delims
// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
// / "*" / "+" / "," / ";" / "="
// Not:
// ipchar = iunreserved / pct-encoded / sub-delims / ":" / "@"
// = ipchar / ucschar

// ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
// / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
// / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
// / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
// / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
// / %xD0000-DFFFD / %xE1000-EFFFD

// "(?: )" is a non-binding group.
private static String PCT = "(?:%[a-f][a-f])";

// As contents of "[]" used in PCHAR
private static String UNRESERVED = "-0-9a-z._~";
// Or use \p{IsAlphabetic}
private static String UCSCHAR = "\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
/*
/ %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
/ %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
/ %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
/ %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
/ %xD0000-DFFFD / %xE1000-EFFFD
*/
// private = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD
//private static String IPRIVATE
private static String IUNRESERVED = UNRESERVED+UCSCHAR;

//private static String GEN_DELIMS = ":/\\?#\\[\\]@";
private static String SUB_DELIMS = "!\\$&'\\(\\)\\*\\+,;=";
// Switch IUNRESERVED / UNRESERVED
private static String PCHARS1 = UNRESERVED+SUB_DELIMS+":"+"@";
private static String PCHAR = "(?:(?:["+PCHARS1+"]|"+PCT+"))";

private static String URN_COMP_X = "/\\?";
private static String URN_RQ_COMP_CHAR = PCHAR+URN_COMP_X;
private static String URN_R_COMP = "(?:\\?\\+["+URN_RQ_COMP_CHAR+"]+)?";
private static String URN_Q_COMP = "(?:\\?=["+URN_RQ_COMP_CHAR+"]+)?";
private static String URN_F_COMP = "(?:#["+PCHAR+"]*)?";
private static String URN_UUID_REGEXP = "^urn:uuid:"+UUID_BASE+URN_R_COMP+URN_Q_COMP+URN_F_COMP+"$";

private static Pattern URN_UUID_PATTERN = Pattern.compile(URN_UUID_REGEXP, Pattern.CASE_INSENSITIVE);

private static void checkURNUUID(IRI iriObj, String original) {
if ( iriObj.hasViolation(true) )
// Already has problems.
return;
boolean matches = URN_UUID_PATTERN.matcher(original).matches();
if ( !matches )
throw new IRIException("Not a valid UUID string: "+original);
}
}
2 changes: 2 additions & 0 deletions jena-core/src/main/java/org/apache/jena/irix/IRIs.java
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ public static String resolve(String baseStr, String iriStr) {
* </pre>
*/
public static String scheme(String str) {
if ( str == null )
return null;
int idx = scheme(str, 0);
if ( idx <= 0 || idx > str.length())
return null;
Expand Down
9 changes: 8 additions & 1 deletion jena-core/src/main/java/org/apache/jena/irix/IRIx.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ static public IRIx create(String iri) throws IRIException {
* It returns a IRIx holder and does no checking whatsoever.
* Whether the IRI "works" is down to care by the application.
*/
static public IRIx createAny(String iri) throws IRIException {
static public IRIx createAny(String iri) {
Objects.requireNonNull(iri);
return IRIProviderAny.stringProvider().create(iri);
}
Expand Down Expand Up @@ -110,6 +110,13 @@ protected IRIx(String string) {
*/
public abstract boolean hasScheme(String scheme);

/**
* Return the IRI scheme, if known.
* <p>
* Returns null for "no scheme" (relative IRI).
*/
public abstract String scheme();

/**
* An <em>RDF Reference</em> is an URI which has scheme.
* If it is hierarchical, it should have a non-empty host authority.
Expand Down
3 changes: 2 additions & 1 deletion jena-core/src/test/java/org/apache/jena/irix/TS_IRIx.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
@RunWith(Suite.class)
@Suite.SuiteClasses( {
// IRIx tests with matrix of providers.
TestIRIx.class,
TestIRIxSyntax.class,
TestIRIxOps.class,
TestRFC3986.class,
TestResolve.class,
TestNormalize.class,
Expand Down
Loading