Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JENA-1313: compare using a Collator when both literals are tagged with same language #237

Merged
merged 5 commits into from Jun 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 15 additions & 2 deletions jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
Expand Up @@ -243,9 +243,12 @@ public static NodeValue makeFloat(float f)
public static NodeValue makeDouble(double d)
{ return new NodeValueDouble(d) ; }

public static NodeValue makeString(String s)
public static NodeValue makeString(String s)
{ return new NodeValueString(s) ; }

public static NodeValue makeSortKey(String s, String collation)
{ return new NodeValueSortKey(s, collation) ; }

public static NodeValue makeLangString(String s, String lang)
{ return new NodeValueLang(s, lang) ; }

Expand Down Expand Up @@ -730,6 +733,7 @@ private static int compare(NodeValue nv1, NodeValue nv2, boolean sortOrderingCom
case VSPACE_NODE :
case VSPACE_NUM :
case VSPACE_STRING :
case VSPACE_SORTKEY :
case VSPACE_UNKNOWN :
// Drop through.
}
Expand Down Expand Up @@ -766,6 +770,13 @@ private static int compare(NodeValue nv1, NodeValue nv2, boolean sortOrderingCom
return Expr.CMP_GREATER ;
return Expr.CMP_EQUAL; // Both plain or both xsd:string.
}
case VSPACE_SORTKEY :
{
if (!(nv1 instanceof NodeValueSortKey) || !(nv2 instanceof NodeValueSortKey)) {
raise(new ExprNotComparableException("Can't compare (not node value sort keys) "+nv1+" and "+nv2)) ;
}
return ((NodeValueSortKey) nv1).compareTo((NodeValueSortKey) nv2);
}
case VSPACE_BOOLEAN: return XSDFuncOp.compareBoolean(nv1, nv2) ;

case VSPACE_LANG:
Expand Down Expand Up @@ -867,6 +878,7 @@ private static ValueSpaceClassification classifyValueSpace(NodeValue nv)
return VSPACE_DATE ;

if ( nv.isString()) return VSPACE_STRING ;
if ( nv.isSortKey()) return VSPACE_SORTKEY ;
if ( nv.isBoolean()) return VSPACE_BOOLEAN ;

if ( ! nv.isLiteral() ) return VSPACE_NODE ;
Expand Down Expand Up @@ -910,6 +922,7 @@ public final Node asNode()
public boolean isBoolean() { return false ; }
public boolean isString() { return false ; }
public boolean isLangString() { return false ; }
public boolean isSortKey() { return false ; }

public boolean isNumber() { return false ; }
public boolean isInteger() { return false ; }
Expand Down Expand Up @@ -954,7 +967,7 @@ public boolean isDayTimeDuration()
public boolean getBoolean() { raise(new ExprEvalTypeException("Not a boolean: "+this)) ; return false ; }
public String getString() { raise(new ExprEvalTypeException("Not a string: "+this)) ; return null ; }
public String getLang() { raise(new ExprEvalTypeException("Not a string: "+this)) ; return null ; }

public BigInteger getInteger() { raise(new ExprEvalTypeException("Not an integer: "+this)) ; return null ; }
public BigDecimal getDecimal() { raise(new ExprEvalTypeException("Not a decimal: "+this)) ; return null ; }
public float getFloat() { raise(new ExprEvalTypeException("Not a float: "+this)) ; return Float.NaN ; }
Expand Down
Expand Up @@ -33,7 +33,7 @@ public enum ValueSpaceClassification {
VSPACE_G_MONTH,
VSPACE_G_DAY,

VSPACE_STRING, VSPACE_LANG,
VSPACE_STRING, VSPACE_LANG, VSPACE_SORTKEY,
VSPACE_BOOLEAN,
VSPACE_UNKNOWN,
VSPACE_DIFFERENT
Expand Down
Expand Up @@ -202,6 +202,12 @@ public static String str(Node node) {
return "[undef]" ;
}

// -------- sort key (collation)

public static NodeValue sortKey(NodeValue nv, String collation) {
return NodeValue.makeSortKey(str(nv.asNode()), collation) ;
}

// -------- datatype
public static NodeValue datatype(NodeValue nv) {
return NodeValue.makeNode(datatype(nv.asNode())) ;
Expand Down
@@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.jena.sparql.expr.nodevalue;

import java.text.Collator;
import java.util.Locale;

import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.graph.Node_Literal;
import org.apache.jena.sparql.expr.NodeValue;
import org.apache.jena.sparql.util.FmtUtils;

/**
* A {@link NodeValue} that supports collation value for a string. This allows query values
* to be sorted following rules for a specific collation.
*/
public final class NodeValueSortKey extends NodeValue implements Comparable<NodeValueSortKey> {

/**
* Node value text.
*/
private final String string;
/**
* Node value collation language tag (e.g. fi, pt-BR, en, en-CA, etc).
*/
private final String collation;

public NodeValueSortKey(final String string, final String collation) {
this.string = string;
this.collation = collation;
}

public NodeValueSortKey(final String string, final String collation, Node n) {
super(n);
this.string = string;
this.collation = collation;
}

@Override
public boolean isSortKey() {
return Boolean.TRUE;
}

@Override
public String getString() {
return string;
}

@Override
public String asString() {
return string;
}

public String getCollation() {
return collation;
}

/**
* The node created by a NodeValueSortKey is a {@link Node_Literal}. This is used to represent
* the node value internally for comparison, and should no be expected to work in other cases.
* Users are not expected to extend it, or use in other functions.
*/
@Override
protected Node makeNode() {
return NodeFactory.createLiteral(string);
}

kinow marked this conversation as resolved.
Show resolved Hide resolved
@Override
public void visit(NodeValueVisitor visitor) {
visitor.visit(this);
}

@Override
public String toString()
{
if (getNode() != null) {
return FmtUtils.stringForNode(getNode()) ;
}
return "'"+getString()+"'";
}

@Override
public int compareTo(NodeValueSortKey other) {
int cmp = 0;
if (other != null) {
String c1 = this.getCollation();
String c2 = other.getCollation();
if (c1 != null && c2 != null && c1.equals(c2)) {
// locales are parsed. Here we could think about caching if necessary
Locale desiredLocale = Locale.forLanguageTag(c1);
// collators are already stored in a concurrent map by the JVM, with <locale, softref<collator>>
Collator collator = Collator.getInstance(desiredLocale);
cmp = collator.compare(this.getString(), other.getString());
} else {
cmp = XSDFuncOp.compareString(this, other) ;
}
}
return cmp;
}

}
Expand Up @@ -30,6 +30,7 @@ public interface NodeValueVisitor
public void visit(NodeValueNode nv) ;
public void visit(NodeValueLang nv) ;
public void visit(NodeValueString nv) ;
public void visit(NodeValueSortKey nv) ;
public void visit(NodeValueDT nv) ;
// public void visit(NodeValueTime nv) ;
public void visit(NodeValueDuration nodeValueDuration);
Expand Down
Expand Up @@ -95,6 +95,9 @@ public static void loadStdDefs(FunctionRegistry registry) {
addCastTemporal(registry, XSDDatatype.XSDgMonthDay) ;
addCastTemporal(registry, XSDDatatype.XSDgDay) ;

// Using ARQ prefix http://jena.apache.org/ARQ/function#
add(registry, ARQConstants.ARQFunctionLibraryURI+"collation", FN_Collation.class) ;

//TODO op:numeric-greater-than etc.
//TODO sparql:* for all the SPARQL builtins.

Expand Down
@@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.jena.sparql.function.library;

import java.text.Collator;
import java.text.RuleBasedCollator;
import java.util.Locale;

import org.apache.jena.sparql.expr.Expr;
import org.apache.jena.sparql.expr.NodeValue;
import org.apache.jena.sparql.expr.nodevalue.NodeFunctions;
import org.apache.jena.sparql.expr.nodevalue.NodeValueLang;
import org.apache.jena.sparql.expr.nodevalue.NodeValueSortKey;
import org.apache.jena.sparql.function.FunctionBase2;

/**
* Collation function. Takes two parameters. First is the collation, second the
* Node, that is an {@link Expr} (ExprVar, ExprFunctionN, NodeValue, etc).
*
* <p>Called with a prefix @{code p}, e.g. {@code ORDER BY p:collation("fi", ?label);}.
* The first argument (in this case, "fi") is then resolved to a {@link Locale}, that is
* used to build a {@link Collator}. If a locale does not match any known collator, then
* a rule based collator ({@link RuleBasedCollator}) is returned, but with no rules,
* returning values in natural order, not applying any specific collation order.</p>
*
* <p>The second argument, which is an {@link Expr}, will have its literal string value
* extracted (or will raise an error if it is not possible). This means that if the
* expr is a {@link NodeValueLang} (e.g. rendered from "Casa"@pt), the language tag will
* be discarded, and only the literal string value (i.e. Casa) will be taken into account
* for this function.</p>
*
* @see {@link NodeValueSortKey}
*/
public class FN_Collation extends FunctionBase2 {

public FN_Collation() {
super();
}

@Override
public NodeValue exec(NodeValue v1, NodeValue v2) {
// retrieve collation value
String collation = NodeFunctions.str(v1.asNode());
// return a NodeValue that contains the v2 literal string, plus the given collation
return NodeFunctions.sortKey(v2, collation);
}

}