Skip to content
Permalink
Browse files
COMMONSRDF-51: compare language tags in lower case
  • Loading branch information
stain committed Feb 8, 2017
2 parents 6d4e333 + 202a8c6 commit 4c69d42f40028a2faa613be04ff0d8a0d20b6ec3
Show file tree
Hide file tree
Showing 10 changed files with 515 additions and 36 deletions.
@@ -18,6 +18,7 @@
package org.apache.commons.rdf.api;

import java.io.Serializable;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;

@@ -72,31 +73,43 @@ public interface Literal extends RDFTerm {
* <a href="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
* >http://www.w3.org/1999/02/22-rdf-syntax-ns#langString</a>, this method
* must return {@link Optional#empty()}.
*
* <p>
* The value space of language tags is always in lower case; although
* RDF implementations MAY convert all language tags to lower case,
* safe comparisons of language tags should be done using
* {@link String#toLowerCase(Locale)} with the locale
* {@link Locale#ROOT}.
* <p>
* Implementation note: If your application requires {@link Serializable}
* objects, it is best not to store an {@link Optional} in a field. It is
* recommended to use {@link Optional#ofNullable(Object)} to create the
* return value for this method.
*
* @return The {@link Optional} language tag for this literal. If
* {@link Optional#isPresent()} returns true, the value returned by
* {@link Optional#get()} must be a non-empty string conforming to
* BCP47.
* {@link Optional#get()} must be a non-empty language tag string
* conforming to BCP47.
* @see <a href=
* "http://www.w3.org/TR/rdf11-concepts/#dfn-language-tag">RDF-1.1
* Literal language tag</a>
*/
Optional<String> getLanguageTag();

/**
* Check it this Literal is equal to another Literal. <blockquote>
* Check it this Literal is equal to another Literal.
* <blockquote>
* <a href="http://www.w3.org/TR/rdf11-concepts/#dfn-literal-term">Literal
* term equality</a>: Two literals are term-equal (the same RDF literal) if
* term equality</a>:
* Two literals are term-equal (the same RDF literal) if
* and only if the two lexical forms, the two datatype IRIs, and the two
* language tags (if any) compare equal, character by character. Thus, two
* literals can have the same value without being the same RDF term.
* </blockquote>
*
* As the value space for language tags is lower-space, if they are present,
* they MUST be compared character by character
* using the equivalent of {@link String#toLowerCase(java.util.Locale)} with
* the locale {@link Locale#ROOT}.
* <p>
* Implementations MUST also override {@link #hashCode()} so that two equal
* Literals produce the same hash code.
*
@@ -114,7 +127,7 @@ public interface Literal extends RDFTerm {
* The returned hash code MUST be equal to the result of
* {@link Objects#hash(Object...)} with the arguments
* {@link #getLexicalForm()}, {@link #getDatatype()},
* {@link #getLanguageTag()}.
* {@link #getLanguageTag()}<code>.map(s-&gt;s.toLowerString(Locale.ROOT))</code>.
* <p>
* This method MUST be implemented in conjunction with
* {@link #equals(Object)} so that two equal Literals produce the same hash
@@ -23,6 +23,7 @@
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
@@ -587,7 +588,145 @@ public void getGraph2() throws Exception {
}


@Test
public void containsLanguageTagsCaseInsensitive() {
// COMMONSRDF-51: Ensure we can add/contains/remove with any casing
// of literal language tag
final Literal lower = factory.createLiteral("Hello", "en-gb");
final Literal upper = factory.createLiteral("Hello", "EN-GB");
final Literal mixed = factory.createLiteral("Hello", "en-GB");

final IRI example1 = factory.createIRI("http://example.com/s1");
final IRI greeting = factory.createIRI("http://example.com/greeting");


dataset.add(null, example1, greeting, upper);

// any kind of Triple should match
assertTrue(dataset.contains(factory.createQuad(null, example1, greeting, upper)));
assertTrue(dataset.contains(factory.createQuad(null, example1, greeting, lower)));
assertTrue(dataset.contains(factory.createQuad(null, example1, greeting, mixed)));

// or as patterns
assertTrue(dataset.contains(null, null, null, upper));
assertTrue(dataset.contains(null, null, null, lower));
assertTrue(dataset.contains(null, null, null, mixed));
}

@Test
public void containsLanguageTagsCaseInsensitiveTurkish() {
// COMMONSRDF-51: Special test for Turkish issue where
// "i".toLowerCase() != "i"
// See also:
// https://garygregory.wordpress.com/2015/11/03/java-lowercase-conversion-turkey/

// This is similar to the test in AbstractRDFTest, but on a graph
Locale defaultLocale = Locale.getDefault();
try {
Locale.setDefault(Locale.ROOT);
final Literal lowerROOT = factory.createLiteral("moi", "fi");
final Literal upperROOT = factory.createLiteral("moi", "FI");
final Literal mixedROOT = factory.createLiteral("moi", "fI");
final IRI exampleROOT = factory.createIRI("http://example.com/s1");
final IRI greeting = factory.createIRI("http://example.com/greeting");
dataset.add(null, exampleROOT, greeting, mixedROOT);

Locale turkish = Locale.forLanguageTag("TR");
Locale.setDefault(turkish);
// If the below assertion fails, then the Turkish
// locale no longer have this peculiarity that
// we want to test.
Assume.assumeFalse("FI".toLowerCase().equals("fi"));

// Below is pretty much the same as in
// containsLanguageTagsCaseInsensitive()
final Literal lower = factory.createLiteral("moi", "fi");
final Literal upper = factory.createLiteral("moi", "FI");
final Literal mixed = factory.createLiteral("moi", "fI");

final IRI exampleTR = factory.createIRI("http://example.com/s2");
dataset.add(null, exampleTR, greeting, upper);
assertTrue(dataset.contains(factory.createQuad(null, exampleTR, greeting, upper)));
assertTrue(dataset.contains(factory.createQuad(null, exampleTR, greeting, upperROOT)));
assertTrue(dataset.contains(factory.createQuad(null, exampleTR, greeting, lower)));
assertTrue(dataset.contains(factory.createQuad(null, exampleTR, greeting, lowerROOT)));
assertTrue(dataset.contains(factory.createQuad(null, exampleTR, greeting, mixed)));
assertTrue(dataset.contains(factory.createQuad(null, exampleTR, greeting, mixedROOT)));
assertTrue(dataset.contains(null, exampleTR, null, upper));
assertTrue(dataset.contains(null, exampleTR, null, upperROOT));
assertTrue(dataset.contains(null, exampleTR, null, lower));
assertTrue(dataset.contains(null, exampleTR, null, lowerROOT));
assertTrue(dataset.contains(null, exampleTR, null, mixed));
assertTrue(dataset.contains(null, exampleTR, null, mixedROOT));

// What about the triple we added while in ROOT locale?
assertTrue(dataset.contains(factory.createQuad(null, exampleROOT, greeting, upper)));
assertTrue(dataset.contains(factory.createQuad(null, exampleROOT, greeting, lower)));
assertTrue(dataset.contains(factory.createQuad(null, exampleROOT, greeting, mixed)));
assertTrue(dataset.contains(null, exampleROOT, null, upper));
assertTrue(dataset.contains(null, exampleROOT, null, lower));
assertTrue(dataset.contains(null, exampleROOT, null, mixed));
} finally {
Locale.setDefault(defaultLocale);
}
}


@Test
public void removeLanguageTagsCaseInsensitive() {
// COMMONSRDF-51: Ensure we can remove with any casing
// of literal language tag
final Literal lower = factory.createLiteral("Hello", "en-gb");
final Literal upper = factory.createLiteral("Hello", "EN-GB");
final Literal mixed = factory.createLiteral("Hello", "en-GB");

final IRI example1 = factory.createIRI("http://example.com/s1");
final IRI greeting = factory.createIRI("http://example.com/greeting");

dataset.add(null, example1, greeting, upper);

// Remove should also honour any case
dataset.remove(null, example1, null, mixed);
assertFalse(dataset.contains(null, null, greeting, null));

dataset.add(null, example1, greeting, lower);
dataset.remove(null, example1, null, upper);

// Check with Triple
dataset.add(factory.createQuad(null, example1, greeting, mixed));
dataset.remove(factory.createQuad(null, example1, greeting, upper));
assertFalse(dataset.contains(null, null, greeting, null));
}

private static Optional<? extends Quad> closableFindAny(Stream<? extends Quad> stream) {
try (Stream<? extends Quad> s = stream) {
return s.findAny();
}
}

@Test
public void streamLanguageTagsCaseInsensitive() {
// COMMONSRDF-51: Ensure we can add/contains/remove with any casing
// of literal language tag
final Literal lower = factory.createLiteral("Hello", "en-gb");
final Literal upper = factory.createLiteral("Hello", "EN-GB");
final Literal mixed = factory.createLiteral("Hello", "en-GB");

final IRI example1 = factory.createIRI("http://example.com/s1");
final IRI greeting = factory.createIRI("http://example.com/greeting");

dataset.add(null, example1, greeting, upper);

// or as patterns
assertTrue(closableFindAny(dataset.stream(null, null, null, upper)).isPresent());
assertTrue(closableFindAny(dataset.stream(null, null, null, lower)).isPresent());
assertTrue(closableFindAny(dataset.stream(null, null, null, mixed)).isPresent());

// Check the quad returned equal a new quad
Quad q = closableFindAny(dataset.stream(null, null, null, lower)).get();
assertEquals(q, factory.createQuad(null, example1, greeting, mixed));
}

/**
* An attempt to use the Java 8 streams to look up a more complicated query.
* <p>

0 comments on commit 4c69d42

Please sign in to comment.