Skip to content

Commit

Permalink
Merge #32500
Browse files Browse the repository at this point in the history
32500: opt: support COLLATE expressions r=justinj a=justinj

This commit adds support for COLLATE expressions. In particular, this
allows doing comparisons against existing collated string indexes, which
was not possible before.

Release note (sql change): Queries involving COLLATE expressions are now
supported by the cost-based optimizer.

Co-authored-by: Justin Jaffray <justin@cockroachlabs.com>
  • Loading branch information
craig[bot] and Justin Jaffray committed Nov 21, 2018
2 parents 293e3fa + 722a9e9 commit ae59524
Show file tree
Hide file tree
Showing 13 changed files with 181 additions and 7 deletions.
10 changes: 10 additions & 0 deletions pkg/sql/opt/exec/execbuilder/scalar_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func init() {
opt.AnyOp: (*Builder).buildAny,
opt.AnyScalarOp: (*Builder).buildAnyScalar,
opt.IndirectionOp: (*Builder).buildIndirection,
opt.CollateOp: (*Builder).buildCollate,
opt.UnsupportedExprOp: (*Builder).buildUnsupportedExpr,

// Item operators.
Expand Down Expand Up @@ -402,6 +403,15 @@ func (b *Builder) buildIndirection(
return tree.NewTypedIndirectionExpr(expr, index), nil
}

func (b *Builder) buildCollate(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.TypedExpr, error) {
expr, err := b.buildScalar(ctx, scalar.Child(0).(opt.ScalarExpr))
if err != nil {
return nil, err
}

return tree.NewTypedCollateExpr(expr, scalar.(*memo.CollateExpr).Locale), nil
}

func (b *Builder) buildUnsupportedExpr(
ctx *buildScalarCtx, scalar opt.ScalarExpr,
) (tree.TypedExpr, error) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/memo/expr_format.go
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ func (f *ExprFmtCtx) FormatScalarProps(scalar opt.ScalarExpr) {
func (f *ExprFmtCtx) formatScalarPrivate(scalar opt.ScalarExpr) {
var private interface{}
switch t := scalar.(type) {
case *NullExpr, *TupleExpr:
case *NullExpr, *TupleExpr, *CollateExpr:
// Private is redundant with logical type property.
private = nil

Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/typing.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ func init() {
typingFuncMap[opt.SubqueryOp] = typeSubquery
typingFuncMap[opt.ColumnAccessOp] = typeColumnAccess
typingFuncMap[opt.IndirectionOp] = typeIndirection
typingFuncMap[opt.CollateOp] = typeCollate

// Override default typeAsAggregate behavior for aggregate functions with
// a large number of possible overloads or where ReturnType depends on
Expand Down Expand Up @@ -177,6 +178,11 @@ func typeIndirection(e opt.ScalarExpr) types.T {
return types.UnwrapType(e.Child(0).(opt.ScalarExpr).DataType()).(types.TArray).Typ
}

// typeCollate returns the collated string typed with the given locale.
func typeCollate(e opt.ScalarExpr) types.T {
return types.TCollatedString{Locale: e.(*CollateExpr).Locale}
}

// typeAsFirstArg returns the type of the expression's 0th argument.
func typeAsFirstArg(e opt.ScalarExpr) types.T {
return e.Child(0).(opt.ScalarExpr).DataType()
Expand Down
16 changes: 16 additions & 0 deletions pkg/sql/opt/norm/custom_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,22 @@ func (c *CustomFuncs) ConvertConstArrayToTuple(scalar opt.ScalarExpr) opt.Scalar
return c.f.ConstructTuple(elems, types.TTuple{Types: ts})
}

// CastToCollatedString returns the given string or collated string as a
// collated string constant with the given locale.
func (c *CustomFuncs) CastToCollatedString(str opt.ScalarExpr, locale string) opt.ScalarExpr {
var value string
switch t := str.(*memo.ConstExpr).Value.(type) {
case *tree.DString:
value = string(*t)
case *tree.DCollatedString:
value = t.Contents
default:
panic(fmt.Sprintf("unexpected type for COLLATE: %T", str.(*memo.ConstExpr).Value))
}

return c.f.ConstructConst(tree.NewDCollatedString(value, locale, &c.f.evalCtx.CollationEnv))
}

// ----------------------------------------------------------------------
//
// Numeric Rules
Expand Down
10 changes: 10 additions & 0 deletions pkg/sql/opt/norm/rules/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,13 @@
(ConvertConstArrayToTuple $ary)
$cmp
)

# FoldCollate converts a Collate expr over an uncollated string into a collated string
# constant.
[FoldCollate, Normalize]
(Collate
$input:(Const)
$locale:*
)
=>
(CastToCollatedString $input $locale)
59 changes: 59 additions & 0 deletions pkg/sql/opt/norm/testdata/rules/scalar
Original file line number Diff line number Diff line change
Expand Up @@ -1109,3 +1109,62 @@ select
│ └── key: (1)
└── filters
└── k = ANY '{1,2,3}'::INT[] [type=bool, outer=(1)]

# --------------------------------------------------
# FoldCollate
# --------------------------------------------------

norm expect=FoldCollate
SELECT 'hello' COLLATE en_u_ks_level1
----
project
├── columns: "?column?":1(collatedstring{en_u_ks_level1}!null)
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(1)
├── values
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ └── tuple [type=tuple]
└── projections
└── const: 'hello' COLLATE en_u_ks_level1 [type=collatedstring{en_u_ks_level1}]

norm expect=FoldCollate
SELECT ('hello' COLLATE en_u_ks_level1) COLLATE en_u_ks_level1
----
project
├── columns: "?column?":1(collatedstring{en_u_ks_level1}!null)
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(1)
├── values
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ └── tuple [type=tuple]
└── projections
└── const: 'hello' COLLATE en_u_ks_level1 [type=collatedstring{en_u_ks_level1}]

norm expect=FoldCollate
SELECT ('hello' COLLATE en) COLLATE en_u_ks_level1
----
project
├── columns: "?column?":1(collatedstring{en_u_ks_level1}!null)
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(1)
├── values
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ └── tuple [type=tuple]
└── projections
└── const: 'hello' COLLATE en_u_ks_level1 [type=collatedstring{en_u_ks_level1}]

norm expect-not=FoldCollate
SELECT s COLLATE en_u_ks_level1 FROM a
----
project
├── columns: s:7(collatedstring{en_u_ks_level1})
├── scan a
│ └── columns: a.s:4(string)
└── projections
└── a.s COLLATE en_u_ks_level1 [type=collatedstring{en_u_ks_level1}, outer=(4)]
12 changes: 12 additions & 0 deletions pkg/sql/opt/ops/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,18 @@ define FunctionPrivate {
Overload FuncOverload
}

# Collate is an expression of the form
#
# x COLLATE y
#
# Where x is a "string type" (meaning either a normal string or a collated string),
# and y is a locale. It evaluates to the string collated to the given locale.
[Scalar]
define Collate {
Input ScalarExpr
Locale string
}

[Scalar]
define Coalesce {
Args ScalarListExpr
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/opt/optbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ func (b *Builder) buildScalar(
}
out = b.factory.ConstructArray(els, arrayType)

case *tree.CollateExpr:
in := b.buildScalar(t.Expr.(tree.TypedExpr), inScope, nil, nil, colRefs)
out = b.factory.ConstructCollate(in, t.Locale)

case *tree.ArrayFlatten:
if b.AllowUnsupportedExpr {
out = b.factory.ConstructUnsupportedExpr(t)
Expand Down
9 changes: 8 additions & 1 deletion pkg/sql/opt/optbuilder/testdata/scalar
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,8 @@ concat [type=jsonb]
build-scalar allow-unsupported
'hello' COLLATE en
----
unsupported-expr: 'hello' COLLATE en [type=collatedstring{en}]
collate [type=collatedstring{en}]
└── const: 'hello' [type=string]

build-scalar
random()
Expand Down Expand Up @@ -1055,3 +1056,9 @@ tuple [type=tuple{tuple{bool, bool, bool}, tuple{bool, bool, bool}, tuple{bool,
├── false [type=bool]
├── true [type=bool]
└── false [type=bool]

build-scalar vars=(string)
@1 COLLATE en
----
collate [type=collatedstring{en}]
└── variable: @1 [type=string]
40 changes: 40 additions & 0 deletions pkg/sql/opt/xform/testdata/rules/scan
Original file line number Diff line number Diff line change
Expand Up @@ -512,3 +512,43 @@ memo (optimized, ~5KB, required=[presentation: i:2,k:1])
├── G6: (ge G7 G8)
├── G7: (variable s)
└── G8: (const 'foo')

# Collated strings are treated properly.
exec-ddl
CREATE TABLE x (s STRING COLLATE en_u_ks_level1 PRIMARY KEY)
----
TABLE x
├── s collatedstring{en_u_ks_level1} not null
└── INDEX primary
└── s collatedstring{en_u_ks_level1} not null

opt
SELECT s FROM x WHERE s < 'hello' COLLATE en_u_ks_level1
----
scan x
├── columns: s:1(collatedstring{en_u_ks_level1}!null)
├── constraint: /1: [ - /'hello' COLLATE en_u_ks_level1)
└── key: (1)

opt
SELECT s FROM x WHERE s = 'hello' COLLATE en_u_ks_level1
----
scan x
├── columns: s:1(collatedstring{en_u_ks_level1}!null)
├── constraint: /1: [/'hello' COLLATE en_u_ks_level1 - /'hello' COLLATE en_u_ks_level1]
├── cardinality: [0 - 1]
├── key: ()
└── fd: ()-->(1)

# Can't generate spans for other collations.
opt
SELECT s FROM x WHERE s COLLATE en = 'hello' COLLATE en
----
select
├── columns: s:1(collatedstring{en_u_ks_level1}!null)
├── key: (1)
├── scan x
│ ├── columns: s:1(collatedstring{en_u_ks_level1}!null)
│ └── key: (1)
└── filters
└── s COLLATE en = 'hello' COLLATE en [type=bool, outer=(1)]
8 changes: 4 additions & 4 deletions pkg/sql/sem/tree/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -2491,7 +2491,7 @@ type EvalContext struct {
// EXPLAIN(TYPES[, NORMALIZE]).
SkipNormalize bool

collationEnv CollationEnvironment
CollationEnv CollationEnvironment

TestingKnobs EvalContextTestingKnobs

Expand Down Expand Up @@ -3124,7 +3124,7 @@ func PerformCast(ctx *EvalContext, d Datum, t coltypes.CastTargetType) (Datum, e
if c.N > 0 && c.N < uint(len(s)) {
s = s[:c.N]
}
return NewDCollatedString(s, c.Locale, &ctx.collationEnv), nil
return NewDCollatedString(s, c.Locale, &ctx.CollationEnv), nil
case *coltypes.TName:
return NewDName(s), nil
}
Expand Down Expand Up @@ -3460,9 +3460,9 @@ func (expr *CollateExpr) Eval(ctx *EvalContext) (Datum, error) {
}
switch d := unwrapped.(type) {
case *DString:
return NewDCollatedString(string(*d), expr.Locale, &ctx.collationEnv), nil
return NewDCollatedString(string(*d), expr.Locale, &ctx.CollationEnv), nil
case *DCollatedString:
return NewDCollatedString(d.Contents, expr.Locale, &ctx.collationEnv), nil
return NewDCollatedString(d.Contents, expr.Locale, &ctx.CollationEnv), nil
default:
return nil, pgerror.NewErrorf(pgerror.CodeDatatypeMismatchError, "incompatible type for COLLATE: %s", d)
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/sql/sem/tree/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,16 @@ func NewTypedIndirectionExpr(expr, index TypedExpr) *IndirectionExpr {
return node
}

// NewTypedCollateExpr returns a new CollateExpr that is verified to be well-typed.
func NewTypedCollateExpr(expr TypedExpr, locale string) *CollateExpr {
node := &CollateExpr{
Expr: expr,
Locale: locale,
}
node.typ = types.TCollatedString{Locale: locale}
return node
}

func (node *ComparisonExpr) memoizeFn() {
fOp, fLeft, fRight, _, _ := foldComparisonExpr(node.Operator, node.Left, node.Right)
leftRet, rightRet := fLeft.(TypedExpr).ResolvedType(), fRight.(TypedExpr).ResolvedType()
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/sem/tree/parse_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func ParseStringAs(t types.T, s string, evalCtx *EvalContext) (Datum, error) {
return nil, err
}
case types.TCollatedString:
d = NewDCollatedString(s, t.Locale, &evalCtx.collationEnv)
d = NewDCollatedString(s, t.Locale, &evalCtx.CollationEnv)
default:
d, err = parseStringAs(t, s, evalCtx)
if d == nil && err == nil {
Expand Down

0 comments on commit ae59524

Please sign in to comment.