sql: enable referring to columns symbolically

This patch introduces a new semantic concept that is useful for pretty-printing the AST back to valid SQL and again back to AST; this will be useful during development work on optimizations. The motivation is detailed below. A side effect is that it also introduces a user-visible feature. Because it is visible to user it will need to be documented, but some caveats apply. More details are given below too. **Motivation** During optimizations, column references (IndexedVar) can move around and be substituted in ways that may lose track of the original name used in the query to name the column. More specifically, we can move a reference to a position where there is no name known for the column *yet*. For example we want to go from: `SELECT * FROM (SELECT * FROM (VALUES (1), (2))) AS a(x) WHERE x > 2` to: `SELECT * FROM (SELECT * FROM ... WHERE ??? > 2) AS a(x)` Now although we can move the IndexedVar to the position in the `???` it is not so clear which *name* to give this indexed var if we pretty-print the resulting tree -- in this example the name "x" does not exist at that position. This patch makes it possible to side-step this issue, by introducing a new SQL syntax that refers to columns at the "current level" by ordinal position: "@n" where N is an integer literal. With this patch the example above can then be reliably serialized to: `SELECT * FROM (SELECT * FROM ... WHERE @1 > 2) AS a(x)` **User-visible changes** SQL already has a traditional, limited way to do refer to column numerically in some syntactic positions, specifically ORDER BY and GROUP BY. For example, in `SELECT a + b FROM foo ORDER BY 1`, "1" refers to the first value rendered, i.e. `a + b`. These are called "column ordinals"; they are supported in *some* SQL engines, sometimes for backward compatibility, sometimes because of historical reasons. The feature added in this patch complements and extends this mechanism. **However, the use of column ordinals by client applications is also customarily strongly discouraged.** The use of the new column ordinal references added in this patch should be equally discouraged. The reason why is that they are not robust against schema updates. Say, a table is initially created with columns `a, b, c` in this order. Then a query is designed to refer to column `a` by position, with number 1. Then later, independently a DB admin changes the schema and removes column `a`, and adds a new version of column `a` with e.g. a different type. Now the schema is `b, c, a`, and all the queries that expect to refer to `a` by position 1 are now broken. The new feature in this patch is also subject to this limitation. It is intended primarily for use during development when the schema updates are tightly controlled by the operator manipulating the query. Meanwhile, since the feature is visible to users it should still be (minimally) documented. The salient aspects that should be communicated are: 1) don't use this feature in client applications unless you 100% understand the limitation described above. 2) **the @ notation refers to a column number in the data source, not in the rendered columns**. The data source is the thing named after FROM. For example, suppose a table `foo` has columns `a` and `b` in this order. Then the query `SELECT b, a FROM foo WHERE @2 = 123` is equivalent to `SELECT b, a FROM foo WHERE b = 123`. 3) point 2 above means that there is a difference between the new column ordinal references and the traditional SQL ordinals, which can be illustrated as follows. With SQL ordinals, the query `SELECT b, a FROM foo ORDER BY 1` sorts with column `b`, because this is the first value rendered (columns after SELECT); whereas `SELECT b, a FROM foo ORDER BY @1` sorts with column `a`, because this is the first column in the data source (columns after FROM).
cockroachdb · Nov 22, 2016 · 146344b · 146344b
1 parent a83c960
commit 146344b
Show file tree

Hide file tree

Showing 10 changed files with 2,877 additions and 2,694 deletions.
diff --git a/pkg/sql/group.go b/pkg/sql/group.go
@@ -378,6 +378,19 @@ func (n *groupNode) ExplainPlan(_ bool) (name, description string, children []pl
 		}
 		f.Format(&buf, parser.FmtSimple)
 	}
+	if n.plan != nil {
+		sourceColumns := n.plan.Columns()
+		if len(sourceColumns) > len(n.funcs) {
+			buf.WriteString(" GROUP BY (")
+			for i := len(n.funcs); i < len(sourceColumns); i++ {
+				if i > len(n.funcs) {
+					buf.WriteString(", ")
+				}
+				fmt.Fprintf(&buf, "@%d", i+1)
+			}
+			buf.WriteByte(')')
+		}
+	}
 
 	subplans := []planNode{n.plan}
 	for _, e := range n.render {

diff --git a/pkg/sql/ordering.go b/pkg/sql/ordering.go
@@ -90,10 +90,10 @@ func (ord orderingInfo) Format(buf *bytes.Buffer, columns ResultColumns) {
 			prefix = '-'
 		}
 		buf.WriteByte(prefix)
-		if columns == nil || o.ColIdx >= len(columns) {
-			fmt.Fprintf(buf, "%d", o.ColIdx)
-		} else {
+		if columns != nil && o.ColIdx < len(columns) {
 			parser.Name(columns[o.ColIdx].Name).Format(buf, parser.FmtSimple)
+		} else {
+			fmt.Fprintf(buf, "@%d", o.ColIdx+1)
 		}
 	}
 

diff --git a/pkg/sql/parser/eval.go b/pkg/sql/parser/eval.go
@@ -2270,13 +2270,6 @@ func (expr *RangeCond) Eval(_ *EvalContext) (Datum, error) {
 	return nil, errors.Errorf("unhandled type %T", expr)
 }
 
-// Eval implements the TypedExpr interface.
-func (expr *Subquery) Eval(_ *EvalContext) (Datum, error) {
-	// Subquery expressions are handled during subquery expansion.
-	log.Errorf(context.TODO(), "unhandled type %T passed to Eval", expr)
-	return nil, errors.Errorf("unhandled type %T", expr)
-}
-
 // Eval implements the TypedExpr interface.
 func (expr *UnaryExpr) Eval(ctx *EvalContext) (Datum, error) {
 	d, err := expr.Expr.(TypedExpr).Eval(ctx)

diff --git a/pkg/sql/parser/expr.go b/pkg/sql/parser/expr.go
@@ -654,11 +654,6 @@ func (node *Subquery) Format(buf *bytes.Buffer, f FmtFlags) {
 	FormatNode(buf, f, node.Select)
 }
 
-// ResolvedType implements the TypedExpr interface.
-func (*Subquery) ResolvedType() Type {
-	return TypeNull
-}
-
 // BinaryOperator represents a binary operator.
 type BinaryOperator int
 

diff --git a/pkg/sql/parser/indexed_vars.go b/pkg/sql/parser/indexed_vars.go
@@ -19,10 +19,15 @@ package parser
 import (
 	"bytes"
 	"fmt"
+
+	"github.com/pkg/errors"
 )
 
 // IndexedVarContainer provides the implementation of TypeCheck, Eval, and
 // String for IndexedVars.
+// If an object that wishes to implement this interface has lost the
+// textual name that an IndexedVar originates from, it can use the
+// ordinal column reference syntax: fmt.Fprintf(buf, "@%d", idx)
 type IndexedVarContainer interface {
 	IndexedVarEval(idx int, ctx *EvalContext) (Datum, error)
 	IndexedVarResolvedType(idx int) Type
@@ -55,29 +60,58 @@ func (v *IndexedVar) TypeCheck(_ *SemaContext, desired Type) (TypedExpr, error)
 
 // Eval is part of the TypedExpr interface.
 func (v *IndexedVar) Eval(ctx *EvalContext) (Datum, error) {
+	if v.container == nil {
+		panic("indexed var must be bound to a container before evaluation")
+	}
 	return v.container.IndexedVarEval(v.Idx, ctx)
 }
 
 // ResolvedType is part of the TypedExpr interface.
 func (v *IndexedVar) ResolvedType() Type {
+	if v.container == nil {
+		panic("indexed var must be bound to a container before type resolution")
+	}
 	return v.container.IndexedVarResolvedType(v.Idx)
 }
 
 // Format implements the NodeFormatter interface.
 func (v *IndexedVar) Format(buf *bytes.Buffer, f FmtFlags) {
-	if f.symbolicVars {
+	if f.symbolicVars || v.container == nil {
 		fmt.Fprintf(buf, "@%d", v.Idx+1)
 		return
 	}
 	v.container.IndexedVarFormat(buf, f, v.Idx)
 }
 
-// IndexedVarHelper is a structure that helps with initialization of IndexVars.
+// NewOrdinalReference is a helper routine to create a standalone
+// IndexedVar with the given index value. This needs to undergo
+// BindIfUnbound() below before it can be fully used.
+func NewOrdinalReference(r int) *IndexedVar {
+	return &IndexedVar{Idx: r, container: nil}
+}
+
+// IndexedVarHelper is a structure that helps with initialization of IndexedVars.
 type IndexedVarHelper struct {
 	vars      []IndexedVar
 	container IndexedVarContainer
 }
 
+// BindIfUnbound attaches an IndexedVar to an existing container.
+// This is needed for standalone column ordinals created during parsing.
+func (h *IndexedVarHelper) BindIfUnbound(ivar *IndexedVar) error {
+	if ivar.container != nil {
+		return nil
+	}
+	if ivar.Idx < 0 || ivar.Idx >= len(h.vars) {
+		return errors.Errorf("invalid column ordinal: @%d", ivar.Idx+1)
+	}
+	// This container must also remember it has "seen" the variable
+	// so that IndexedVarUsed() below returns the right results.
+	// The IndexedVar() method ensures this.
+	*ivar = *h.IndexedVar(ivar.Idx)
+	return nil
+}
+
 // MakeIndexedVarHelper initializes an IndexedVarHelper structure.
 func MakeIndexedVarHelper(container IndexedVarContainer, numVars int) IndexedVarHelper {
 	return IndexedVarHelper{vars: make([]IndexedVar, numVars), container: container}