From fe7ded3d45f907a3ea0215d622fd1c352f499f7c Mon Sep 17 00:00:00 2001 From: Andrew Kimball Date: Tue, 7 May 2019 16:40:10 -0700 Subject: [PATCH] sql: Add locality() builtin function There is currently no easy way to programmatically inspect the locality of the current node. It appears in crdb_internal.gossip_nodes, but it's hard to work with because it's JSON, not constant-foldable, and keyed by node id. This commit adds a new locality builtin function that: Returns the hierarchical location of the current node as a tuple of labeled values, ordered from most inclusive to least inclusive. For example: `region=east,datacenter=us-east-1`. When building geo-distributed applications, this enables a very nice way to automatically assign the partition key, as illustrated below: CREATE TABLE charges ( region STRING NOT NULL DEFAULT (locality()).region, id UUID NOT NULL DEFAULT gen_random_uuid(), ... The DEFAULT expression for the region column automatically inserts the region value from the current node's locality. It also enables queries like this, that only touch rows in the current region (which is necessary to avoid cross-region hops): SELECT * FROM charges WHERE region = (locality()).region AND id = $1 The locality is constant, so the optimizer is able to fold column access to a constant value, which can then be used to select an optimal index. Resolves #37310 Release note (sql change): Adds a new locality builtin function that returns the hierarchical location of the current node as a tuple of labeled values, ordered from most inclusive to least inclusive. --- docs/generated/sql/functions.md | 3 ++ pkg/sql/conn_executor.go | 1 + .../logictest/testdata/logic_test/locality | 19 +++++++ pkg/sql/opt/memo/typing.go | 4 +- pkg/sql/opt/memo/typing_test.go | 2 +- pkg/sql/opt/norm/custom_funcs.go | 1 + pkg/sql/opt/optbuilder/testdata/scalar | 26 ++++++++++ pkg/sql/opt/testutils/opttester/opt_tester.go | 1 + pkg/sql/pg_catalog.go | 2 +- pkg/sql/planner.go | 1 + pkg/sql/sem/builtins/aggregate_builtins.go | 2 +- pkg/sql/sem/builtins/builtins.go | 49 +++++++++++++++++++ pkg/sql/sem/builtins/generator_builtins.go | 4 +- pkg/sql/sem/tree/overload.go | 12 ++--- pkg/sql/sem/tree/type_check.go | 19 +++++-- 15 files changed, 128 insertions(+), 18 deletions(-) create mode 100644 pkg/sql/logictest/testdata/logic_test/locality diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md index 6cf96ce88c52..08d0973dbeb4 100644 --- a/docs/generated/sql/functions.md +++ b/docs/generated/sql/functions.md @@ -991,6 +991,9 @@ SELECT * FROM crdb_internal.check_consistency(true, ‘\x02’, ‘\x04’)

current_user() → string

Returns the current user. This function is provided for compatibility with PostgreSQL.

+locality() → tuple

Returns the hierarchical location of the current node as a tuple of labeled values, ordered from most inclusive to least inclusive.

+

For example: region=east,datacenter=us-east-1.

+
version() → string

Returns the node’s version of CockroachDB.

diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index dbcbb59e6509..30630a44b2a6 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -1877,6 +1877,7 @@ func (ex *connExecutor) resetPlanner( p.semaCtx.Location = &ex.sessionData.DataConversion.Location p.semaCtx.SearchPath = ex.sessionData.SearchPath p.semaCtx.AsOfTimestamp = nil + p.semaCtx.Locality = ex.server.cfg.Locality ex.resetEvalCtx(&p.extendedEvalCtx, txn, stmtTS) diff --git a/pkg/sql/logictest/testdata/logic_test/locality b/pkg/sql/logictest/testdata/logic_test/locality new file mode 100644 index 000000000000..49854a96701d --- /dev/null +++ b/pkg/sql/logictest/testdata/logic_test/locality @@ -0,0 +1,19 @@ +# LogicTest: 5node-dist 5node-dist-opt + +query T +SELECT locality() +---- +(test,dc1) + +query T +SELECT (locality()).region +---- +test + +query T +SELECT (locality()).dc +---- +dc1 + +statement error could not identify column "unk" in tuple{string AS region, string AS dc} +SELECT (locality()).unk diff --git a/pkg/sql/opt/memo/typing.go b/pkg/sql/opt/memo/typing.go index a1643980564f..4d60ff4d1d56 100644 --- a/pkg/sql/opt/memo/typing.go +++ b/pkg/sql/opt/memo/typing.go @@ -296,7 +296,7 @@ func typeAsAggregate(e opt.ScalarExpr) *types.T { // types (i.e. pass nil to the ReturnTyper). Aggregates with return types // that depend on argument types are handled separately. _, overload := FindAggregateOverload(e) - t := overload.ReturnType(nil) + t := overload.ReturnType(nil /* ctx */, nil /* args */) if t == tree.UnknownReturnType { panic(pgerror.AssertionFailedf("unknown aggregate return type. e:\n%s", e)) } @@ -307,7 +307,7 @@ func typeAsAggregate(e opt.ScalarExpr) *types.T { // typeAsAggregate. func typeAsWindow(e opt.ScalarExpr) *types.T { _, overload := FindWindowOverload(e) - t := overload.ReturnType(nil) + t := overload.ReturnType(nil /* ctx */, nil /* args */) if t == tree.UnknownReturnType { panic(pgerror.AssertionFailedf("unknown window return type. e:\n%s", e)) } diff --git a/pkg/sql/opt/memo/typing_test.go b/pkg/sql/opt/memo/typing_test.go index b36d4a46da92..b8ad88e894c9 100644 --- a/pkg/sql/opt/memo/typing_test.go +++ b/pkg/sql/opt/memo/typing_test.go @@ -205,7 +205,7 @@ func TestTypingAggregateAssumptions(t *testing.T) { } // Check for fixed return types. - retType := overload.ReturnType(nil) + retType := overload.ReturnType(nil /* ctx */, nil /* args */) if retType == tree.UnknownReturnType { t.Errorf("return type is not fixed for %s: %+v", name, overload.Types.Types()) } diff --git a/pkg/sql/opt/norm/custom_funcs.go b/pkg/sql/opt/norm/custom_funcs.go index 12c65bd4174d..cdc7bdd75239 100644 --- a/pkg/sql/opt/norm/custom_funcs.go +++ b/pkg/sql/opt/norm/custom_funcs.go @@ -1837,4 +1837,5 @@ var FoldFunctionWhitelist = map[string]struct{}{ "jsonb_strip_nulls": {}, "json_array_length": {}, "jsonb_array_length": {}, + "locality": {}, } diff --git a/pkg/sql/opt/optbuilder/testdata/scalar b/pkg/sql/opt/optbuilder/testdata/scalar index 5b5347f25950..66e605ad213a 100644 --- a/pkg/sql/opt/optbuilder/testdata/scalar +++ b/pkg/sql/opt/optbuilder/testdata/scalar @@ -1175,3 +1175,29 @@ if-err [type=decimal] │ └── variable: @1 [type=decimal] └── err-code └── const: '10000' [type=string] + +build-scalar +locality() +---- +function: locality [type=tuple] + +build locality=(region=east,dc=east1-b) +SELECT (locality()).dc +---- +project + ├── columns: dc:1(string) + ├── values + │ └── tuple [type=tuple] + └── projections + └── column-access: 1 [type=string] + └── function: locality [type=tuple{string AS region, string AS dc}] + +build +SELECT (locality()).unk +---- +error (42804): could not identify column "unk" in tuple + +build locality=(region=east,dc=east1-b) +SELECT (locality()).unk +---- +error (42804): could not identify column "unk" in tuple{string AS region, string AS dc} diff --git a/pkg/sql/opt/testutils/opttester/opt_tester.go b/pkg/sql/opt/testutils/opttester/opt_tester.go index 4586186d3d6a..b140047f1d4d 100644 --- a/pkg/sql/opt/testutils/opttester/opt_tester.go +++ b/pkg/sql/opt/testutils/opttester/opt_tester.go @@ -257,6 +257,7 @@ func (ot *OptTester) RunCommand(tb testing.TB, d *datadriven.TestData) string { ot.Flags.Verbose = testing.Verbose() ot.evalCtx.TestingKnobs.OptimizerCostPerturbation = ot.Flags.PerturbCost ot.evalCtx.Locality = ot.Flags.Locality + ot.semaCtx.Locality = ot.Flags.Locality switch d.Cmd { case "exec-ddl": diff --git a/pkg/sql/pg_catalog.go b/pkg/sql/pg_catalog.go index 4653abfc84fc..e083effb6713 100644 --- a/pkg/sql/pg_catalog.go +++ b/pkg/sql/pg_catalog.go @@ -1510,7 +1510,7 @@ CREATE TABLE pg_catalog.pg_operator ( panic(fmt.Sprintf("Unexpected operator %s with %d params", opName, params.Length())) } - returnType := tree.NewDOid(tree.DInt(returnTyper(nil).Oid())) + returnType := tree.NewDOid(tree.DInt(returnTyper(nil /* ctx */, nil /* args */).Oid())) err := addRow( h.OperatorOid(opName, leftType, rightType, returnType), // oid diff --git a/pkg/sql/planner.go b/pkg/sql/planner.go index 3ad6fb35b7f0..eae705e76226 100644 --- a/pkg/sql/planner.go +++ b/pkg/sql/planner.go @@ -252,6 +252,7 @@ func newInternalPlanner( p.semaCtx = tree.MakeSemaContext() p.semaCtx.Location = &sd.DataConversion.Location p.semaCtx.SearchPath = sd.SearchPath + p.semaCtx.Locality = execCfg.Locality plannerMon := mon.MakeUnlimitedMonitor(ctx, fmt.Sprintf("internal-planner.%s.%s", user, opName), diff --git a/pkg/sql/sem/builtins/aggregate_builtins.go b/pkg/sql/sem/builtins/aggregate_builtins.go index e92b190dcd69..d48d7785cf12 100644 --- a/pkg/sql/sem/builtins/aggregate_builtins.go +++ b/pkg/sql/sem/builtins/aggregate_builtins.go @@ -107,7 +107,7 @@ var aggregates = map[string]builtinDefinition{ arrayBuiltin(func(t *types.T) tree.Overload { return makeAggOverloadWithReturnType( []*types.T{t}, - func(args []tree.TypedExpr) *types.T { + func(ctx *tree.SemaContext, args []tree.TypedExpr) *types.T { if len(args) == 0 { return types.MakeArray(t) } diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go index 7f71a818b805..f05e556fdd92 100644 --- a/pkg/sql/sem/builtins/builtins.go +++ b/pkg/sql/sem/builtins/builtins.go @@ -2756,6 +2756,31 @@ may increase either contention or retry errors, or both.`, }, ), + "locality": makeBuiltin( + tree.FunctionProperties{Category: categorySystemInfo}, + tree.Overload{ + Types: tree.ArgTypes{}, + ReturnType: func(ctx *tree.SemaContext, _ []tree.TypedExpr) *types.T { + if ctx == nil { + // Use simplified tuple type for signature (e.g. for docs). + return types.AnyTuple + } + return localityReturnType(&ctx.Locality) + }, + Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) { + datums := make(tree.Datums, len(ctx.Locality.Tiers)) + for i := range ctx.Locality.Tiers { + datums[i] = tree.NewDString(ctx.Locality.Tiers[i].Value) + } + typ := localityReturnType(&ctx.Locality) + return tree.NewDTuple(typ, datums...), nil + }, + Info: "Returns the hierarchical location of the current node as a tuple " + + "of labeled values, ordered from most inclusive to least inclusive. \n\n" + + "For example: `region=east,datacenter=us-east-1`.", + }, + ), + "crdb_internal.node_executable_version": makeBuiltin( tree.FunctionProperties{Category: categorySystemInfo}, tree.Overload{ @@ -4628,3 +4653,27 @@ func recentTimestamp(ctx *tree.EvalContext) (time.Time, error) { } return ctx.StmtTimestamp.Add(offset), nil } + +// localityReturnType returns the type that the locality() function will return. +// This is a tuple type with labeled string fields, one for each locality tier: +// +// tuple{string AS , string AS , ...} +// +// For example, consider this locality: +// +// region=east,datacenter=us-east-1 +// +// The tuple type will be: +// +// tuple{string AS region, string AS datacenter} +// +func localityReturnType(locality *roachpb.Locality) *types.T { + contents := make([]types.T, len(locality.Tiers)) + labels := make([]string, len(locality.Tiers)) + for i := range locality.Tiers { + tier := &locality.Tiers[i] + contents[i] = *types.String + labels[i] = tier.Key + } + return types.MakeLabeledTuple(contents, labels) +} diff --git a/pkg/sql/sem/builtins/generator_builtins.go b/pkg/sql/sem/builtins/generator_builtins.go index 79f0297908f1..0947cb9081f8 100644 --- a/pkg/sql/sem/builtins/generator_builtins.go +++ b/pkg/sql/sem/builtins/generator_builtins.go @@ -109,7 +109,7 @@ var generators = map[string]builtinDefinition{ // See https://www.postgresql.org/docs/current/static/functions-array.html makeGeneratorOverloadWithReturnType( tree.ArgTypes{{"input", types.AnyArray}}, - func(args []tree.TypedExpr) *types.T { + func(ctx *tree.SemaContext, args []tree.TypedExpr) *types.T { if len(args) == 0 || args[0].ResolvedType().Family() == types.UnknownFamily { return tree.UnknownReturnType } @@ -123,7 +123,7 @@ var generators = map[string]builtinDefinition{ "information_schema._pg_expandarray": makeBuiltin(genProps(expandArrayValueGeneratorLabels), makeGeneratorOverloadWithReturnType( tree.ArgTypes{{"input", types.AnyArray}}, - func(args []tree.TypedExpr) *types.T { + func(ctx *tree.SemaContext, args []tree.TypedExpr) *types.T { if len(args) == 0 || args[0].ResolvedType().Family() == types.UnknownFamily { return tree.UnknownReturnType } diff --git a/pkg/sql/sem/tree/overload.go b/pkg/sql/sem/tree/overload.go index b9faeebc17ce..0fc431d119cf 100644 --- a/pkg/sql/sem/tree/overload.go +++ b/pkg/sql/sem/tree/overload.go @@ -327,17 +327,17 @@ var UnknownReturnType *types.T // ReturnTyper defines the type-level function in which a builtin function's return type // is determined. ReturnTypers should make sure to return unknownReturnType when necessary. -type ReturnTyper func(args []TypedExpr) *types.T +type ReturnTyper func(ctx *SemaContext, args []TypedExpr) *types.T // FixedReturnType functions simply return a fixed type, independent of argument types. func FixedReturnType(typ *types.T) ReturnTyper { - return func(args []TypedExpr) *types.T { return typ } + return func(_ *SemaContext, args []TypedExpr) *types.T { return typ } } // IdentityReturnType creates a returnType that is a projection of the idx'th // argument type. func IdentityReturnType(idx int) ReturnTyper { - return func(args []TypedExpr) *types.T { + return func(_ *SemaContext, args []TypedExpr) *types.T { if len(args) == 0 { return UnknownReturnType } @@ -351,7 +351,7 @@ func IdentityReturnType(idx int) ReturnTyper { // with HomogeneousType functions, in which all arguments have been checked to // have the same type (or be null). func FirstNonNullReturnType() ReturnTyper { - return func(args []TypedExpr) *types.T { + return func(_ *SemaContext, args []TypedExpr) *types.T { if len(args) == 0 { return UnknownReturnType } @@ -365,7 +365,7 @@ func FirstNonNullReturnType() ReturnTyper { } func returnTypeToFixedType(s ReturnTyper) *types.T { - if t := s(nil); t != UnknownReturnType { + if t := s(nil, nil); t != UnknownReturnType { return t } return types.Any @@ -498,7 +498,7 @@ func typeCheckOverloadedExprs( // fixed return types. This could be improved, but is not currently // critical because we have no cases of functions with multiple // overloads that do not all expose FixedReturnTypes. - if t := o.returnType()(nil); t != UnknownReturnType { + if t := o.returnType()(ctx, nil); t != UnknownReturnType { return t.Equivalent(desired) } return true diff --git a/pkg/sql/sem/tree/type_check.go b/pkg/sql/sem/tree/type_check.go index b18ab6977931..21391aee20fb 100644 --- a/pkg/sql/sem/tree/type_check.go +++ b/pkg/sql/sem/tree/type_check.go @@ -19,6 +19,7 @@ import ( "strings" "time" + "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/sql/lex" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" @@ -58,6 +59,14 @@ type SemaContext struct { // globally for the entire txn and this field would not be needed. AsOfTimestamp *hlc.Timestamp + // Locality contains the location of the current node as a set of user-defined + // key/value pairs, ordered from most inclusive to least inclusive. If there + // are no tiers, then the node's location is not known. Example: + // + // [region=us,dc=east] + // + Locality roachpb.Locality + Properties SemaProperties } @@ -357,7 +366,7 @@ func (expr *BinaryExpr) TypeCheck(ctx *SemaContext, desired *types.T) (TypedExpr expr.Left, expr.Right = leftTyped, rightTyped expr.fn = binOp - expr.typ = binOp.returnType()(typedSubExprs) + expr.typ = binOp.returnType()(ctx, typedSubExprs) return expr, nil } @@ -559,7 +568,7 @@ func (expr *TupleStar) TypeCheck(ctx *SemaContext, desired *types.T) (TypedExpr, // Alghough we're going to elide the tuple star, we need to ensure // the expression is indeed a labeled tuple first. - if resolvedType.Family() != types.TupleFamily || len(resolvedType.TupleLabels()) == 0 { + if resolvedType.Family() != types.TupleFamily || resolvedType.TupleLabels() == nil { return nil, NewTypeIsNotCompositeError(resolvedType) } @@ -585,7 +594,7 @@ func (expr *ColumnAccessExpr) TypeCheck(ctx *SemaContext, desired *types.T) (Typ expr.Expr = subExpr resolvedType := subExpr.ResolvedType() - if resolvedType.Family() != types.TupleFamily || len(resolvedType.TupleLabels()) == 0 { + if resolvedType.Family() != types.TupleFamily || resolvedType.TupleLabels() == nil { return nil, NewTypeIsNotCompositeError(resolvedType) } @@ -922,7 +931,7 @@ func (expr *FuncExpr) TypeCheck(ctx *SemaContext, desired *types.T) (TypedExpr, } expr.fn = overloadImpl expr.fnProps = &def.FunctionProperties - expr.typ = overloadImpl.returnType()(typedSubExprs) + expr.typ = overloadImpl.returnType()(ctx, typedSubExprs) if expr.typ == UnknownReturnType { typeNames := make([]string, 0, len(expr.Exprs)) for _, expr := range typedSubExprs { @@ -1213,7 +1222,7 @@ func (expr *UnaryExpr) TypeCheck(ctx *SemaContext, desired *types.T) (TypedExpr, expr.Expr = exprTyped expr.fn = unaryOp - expr.typ = unaryOp.returnType()(typedSubExprs) + expr.typ = unaryOp.returnType()(ctx, typedSubExprs) return expr, nil }