Skip to content
Permalink
Browse files

Merge #40469 #40471 #40482

40469: opt: fix panic when building indirection exprs r=justinj a=justinj

Previously, we would ask a built array datum to tell us its type when
building an IndirectionExpr. This was problematic when the datum was
NULL, since while the optimizer-level NULLs track their inferred type,
built DNulls do not, so we ended up not knowing the element type.

This is fixed by grabbing the type from the opt expression, rather than
the built datum.

Fixes #40404.
Fixes #37794.

Release note (bug fix): fixed an optimizer panic when building array
access expressions.

40471: exec: add projections of AndExpr r=jordanlewis a=jordanlewis

This adds support for TPCH q19, which sees a 3x speedup with vectorized.

Release note: None

40482: importccl: replace the undocumented data format MYSQLOUTFILE with DELIMITED r=spaskob a=spaskob

MYSQLOUTFILE was originally used to help a client import data that was
not in csv proper format. This turns out to be useful for other users so
we call the new format DELIMITED. It's very fast and simple format for importing
delimited data disregarding issues with quoting. For example the csv format forbids
`field1,fieldsth2` as such fields that contain  quotes have to be enclosed in
quotes themselves. The whole format is fully described here
https://dev.mysql.com/doc/refman/8.0/en/load-data.html.

Fixes #39820.

Release note (cli change): add a new IMPORT DATA format DELIMITED.

Co-authored-by: Justin Jaffray <justin@cockroachlabs.com>
Co-authored-by: Jordan Lewis <jordanthelewis@gmail.com>
Co-authored-by: Spas Bojanov <spas@cockroachlabs.com>
  • Loading branch information...
4 people committed Sep 4, 2019
4 parents 0443c26 + 99e4e58 + be5143e + 3c3cece commit d8c99dcf5fbf5f772f3350edfe429a9236057a56
@@ -243,7 +243,7 @@ func importPlanHook(
}
format.Csv.Skip = uint32(skip)
}
case "MYSQLOUTFILE":
case "DELIMITED":
telemetry.Count("import.format.mysqlout")
format.Format = roachpb.IOFileFormat_MysqlOutfile
format.MysqlOut = roachpb.MySQLOutfileOptions{
@@ -239,38 +239,38 @@ d
{
name: "too many imported columns",
create: `i int8`,
typ: "MYSQLOUTFILE",
typ: "DELIMITED",
data: "1\t2",
err: "row 1: too many columns, expected 1",
},
{
name: "unexpected number of columns",
create: `a string, b string`,
typ: "MYSQLOUTFILE",
typ: "DELIMITED",
data: "1,2",
err: "row 1: unexpected number of columns, expected 2 got 1",
},
{
name: "unmatched field enclosure",
create: `i int8`,
with: `WITH fields_enclosed_by = '"'`,
typ: "MYSQLOUTFILE",
typ: "DELIMITED",
data: "\"1",
err: "row 1: unmatched field enclosure",
},
{
name: "unmatched literal",
create: `i int8`,
with: `WITH fields_escaped_by = '\'`,
typ: "MYSQLOUTFILE",
typ: "DELIMITED",
data: `\`,
err: "row 1: unmatched literal",
},
{
name: "weird escape char",
create: `s STRING`,
with: `WITH fields_escaped_by = '@'`,
typ: "MYSQLOUTFILE",
typ: "DELIMITED",
data: "@N\nN@@\nNULL",
query: map[string][][]string{
`SELECT COALESCE(s, '(null)') from t`: {{"(null)"}, {"N@"}, {"NULL"}},
@@ -280,7 +280,7 @@ d
name: `null and \N with escape`,
create: `s STRING`,
with: `WITH fields_escaped_by = '\'`,
typ: "MYSQLOUTFILE",
typ: "DELIMITED",
data: "\\N\n\\\\N\nNULL",
query: map[string][][]string{
`SELECT COALESCE(s, '(null)') from t`: {{"(null)"}, {`\N`}, {"NULL"}},
@@ -290,22 +290,22 @@ d
name: `\N with trailing char`,
create: `s STRING`,
with: `WITH fields_escaped_by = '\'`,
typ: "MYSQLOUTFILE",
typ: "DELIMITED",
data: "\\N1",
err: "row 1: unexpected data after null encoding",
},
{
name: `double null`,
create: `s STRING`,
with: `WITH fields_escaped_by = '\'`,
typ: "MYSQLOUTFILE",
typ: "DELIMITED",
data: "\\N\\N",
err: "row 1: unexpected null encoding",
},
{
name: `null and \N without escape`,
create: `s STRING`,
typ: "MYSQLOUTFILE",
typ: "DELIMITED",
data: "\\N\n\\\\N\nNULL",
query: map[string][][]string{
`SELECT COALESCE(s, '(null)') from t`: {{`\N`}, {`\\N`}, {"(null)"}},
@@ -314,7 +314,7 @@ d
{
name: `bytes with escape`,
create: `b BYTES`,
typ: "MYSQLOUTFILE",
typ: "DELIMITED",
data: `\x`,
query: map[string][][]string{
`SELECT * from t`: {{`\x`}},
@@ -712,7 +712,7 @@ COPY t (a, b, c) FROM stdin;
t.Run("mysqlout multiple", func(t *testing.T) {
sqlDB.Exec(t, `CREATE DATABASE mysqlout; USE mysqlout`)
dataString = "1"
sqlDB.Exec(t, `IMPORT TABLE t (s STRING) MYSQLOUTFILE DATA ($1, $1)`, srv.URL)
sqlDB.Exec(t, `IMPORT TABLE t (s STRING) DELIMITED DATA ($1, $1)`, srv.URL)
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"1"}, {"1"}})
})
}
@@ -2683,7 +2683,7 @@ func TestImportMysqlOutfile(t *testing.T) {
t.Run(cfg.name, func(t *testing.T) {
var opts []interface{}

cmd := fmt.Sprintf(`IMPORT TABLE test%d (i INT8 PRIMARY KEY, s text, b bytea) MYSQLOUTFILE DATA ($1)`, i)
cmd := fmt.Sprintf(`IMPORT TABLE test%d (i INT8 PRIMARY KEY, s text, b bytea) DELIMITED DATA ($1)`, i)
opts = append(opts, fmt.Sprintf("nodelocal://%s", strings.TrimPrefix(cfg.filename, baseDir)))

var flags []string
@@ -887,6 +887,22 @@ func planProjectionOperators(
op := exec.NewCaseOp(buffer, caseOps, elseOp, thenIdxs, caseOutputIdx, caseOutputType)

return op, caseOutputIdx, ct, memUsed, nil
case *tree.AndExpr:
var leftOp, rightOp exec.Operator
var leftIdx, rightIdx, lMemUsed, rMemUsed int
leftOp, leftIdx, ct, lMemUsed, err = planProjectionOperators(ctx, t.TypedLeft(), columnTypes, input)
if err != nil {
return nil, resultIdx, ct, 0, err
}
rightOp, rightIdx, ct, rMemUsed, err = planProjectionOperators(ctx, t.TypedRight(), ct, leftOp)
if err != nil {
return nil, resultIdx, ct, 0, err
}
// Add a new boolean column that ands the two output columns.
resultIdx = len(ct)
ct = append(ct, *t.ResolvedType())
andOp := exec.NewAndOp(rightOp, leftIdx, rightIdx, resultIdx)
return andOp, resultIdx, ct, lMemUsed + rMemUsed, nil
case *tree.OrExpr:
// Rewrite the OR expression as an equivalent CASE expression.
// "a OR b" becomes "CASE WHEN a THEN true WHEN b THEN true ELSE false END".
@@ -0,0 +1,68 @@
// Copyright 2019 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package exec

import (
"context"

"github.com/cockroachdb/cockroach/pkg/col/coldata"
"github.com/cockroachdb/cockroach/pkg/col/coltypes"
)

type andOp struct {
OneInputNode

leftIdx int
rightIdx int
outputIdx int
}

// NewAndOp returns a new operator that logical-ANDs the boolean columns at
// leftIdx and rightIdx, returning the result in outputIdx.
func NewAndOp(input Operator, leftIdx, rightIdx, outputIdx int) Operator {
return &andOp{
OneInputNode: NewOneInputNode(input),
leftIdx: leftIdx,
rightIdx: rightIdx,
outputIdx: outputIdx,
}
}

func (a *andOp) Init() {
a.input.Init()
}

func (a *andOp) Next(ctx context.Context) coldata.Batch {
batch := a.input.Next(ctx)
n := batch.Length()
if n == 0 {
return batch
}
if a.outputIdx == batch.Width() {
batch.AppendCol(coltypes.Bool)
}
leftCol := batch.ColVec(a.leftIdx).Bool()
rightCol := batch.ColVec(a.rightIdx).Bool()
outputCol := batch.ColVec(a.outputIdx).Bool()

if sel := batch.Selection(); sel != nil {
for _, i := range sel[:n] {
outputCol[i] = leftCol[i] && rightCol[i]
}
} else {
_ = rightCol[n-1]
_ = outputCol[n-1]
for i := range leftCol[:n] {
outputCol[i] = leftCol[i] && rightCol[i]
}
}
return batch
}
@@ -279,11 +279,11 @@ statement ok
RESET vectorize

# AND expressions.
query II
SELECT a, b FROM a WHERE a < 2 AND b > 0 AND a * b != 3
query IIBB
SELECT a, b, a < 2 AND b > 0 AND a * b != 3, a < 2 AND b < 2 FROM a WHERE a < 2 AND b > 0 AND a * b != 3
----
0 1
1 2
0 1 true true
1 2 true false

statement ok
CREATE TABLE b (a INT, b STRING, PRIMARY KEY (b,a))
@@ -408,7 +408,7 @@ func (b *Builder) buildIndirection(
return nil, err
}

return tree.NewTypedIndirectionExpr(expr, index), nil
return tree.NewTypedIndirectionExpr(expr, index, scalar.DataType()), nil
}

func (b *Builder) buildCollate(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.TypedExpr, error) {
@@ -252,7 +252,7 @@ func (c *CustomFuncs) FoldIndirection(input, index opt.ScalarExpr) opt.ScalarExp
// Case 2: The input is a constant DArray.
if memo.CanExtractConstDatum(input) {
inputD := memo.ExtractConstDatum(input)
texpr := tree.NewTypedIndirectionExpr(inputD, indexD)
texpr := tree.NewTypedIndirectionExpr(inputD, indexD, input.DataType().ArrayContents())
result, err := texpr.Eval(c.f.evalCtx)
if err == nil {
return c.f.ConstructConstVal(result, texpr.ResolvedType())
@@ -837,6 +837,24 @@ project
└── projections
└── a.arr[0] [type=int, outer=(6)]

# Regression test for #40404.
norm expect=FoldIndirection
SELECT (SELECT x[1]) FROM (VALUES(null::oid[])) v(x)
----
values
├── columns: x:3(oid)
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(3)
└── tuple [type=tuple{oid}]
└── subquery [type=oid]
└── values
├── columns: x:2(oid)
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(2)
└── (NULL,) [type=tuple{oid}]

# --------------------------------------------------
# FoldColumnAccess
# --------------------------------------------------
@@ -1314,7 +1314,7 @@ func TestParse(t *testing.T) {

{`IMPORT TABLE foo CREATE USING 'nodelocal:///some/file' CSV DATA ('path/to/some/file', $1) WITH temp = 'path/to/temp'`},
{`EXPLAIN IMPORT TABLE foo CREATE USING 'nodelocal:///some/file' CSV DATA ('path/to/some/file', $1) WITH temp = 'path/to/temp'`},
{`IMPORT TABLE foo CREATE USING 'nodelocal:///some/file' MYSQLOUTFILE DATA ('path/to/some/file', $1)`},
{`IMPORT TABLE foo CREATE USING 'nodelocal:///some/file' DELIMITED DATA ('path/to/some/file', $1)`},
{`IMPORT TABLE foo (id INT8 PRIMARY KEY, email STRING, age INT8) CSV DATA ('path/to/some/file', $1) WITH temp = 'path/to/temp'`},
{`IMPORT TABLE foo (id INT8, email STRING, age INT8) CSV DATA ('path/to/some/file', $1) WITH comma = ',', "nullif" = 'n/a', temp = $2`},
{`IMPORT TABLE foo FROM PGDUMPCREATE 'nodelocal:///foo/bar' WITH temp = 'path/to/temp'`},
@@ -1860,7 +1860,7 @@ import_format:
//
// Formats:
// CSV
// MYSQLOUTFILE
// DELIMITED
// MYSQLDUMP
// PGCOPY
// PGDUMP
@@ -453,12 +453,12 @@ func NewTypedComparisonExprWithSubOp(
}

// NewTypedIndirectionExpr returns a new IndirectionExpr that is verified to be well-typed.
func NewTypedIndirectionExpr(expr, index TypedExpr) *IndirectionExpr {
func NewTypedIndirectionExpr(expr, index TypedExpr, typ *types.T) *IndirectionExpr {
node := &IndirectionExpr{
Expr: expr,
Indirection: ArraySubscripts{&ArraySubscript{Begin: index}},
}
node.typ = expr.(TypedExpr).ResolvedType().ArrayContents()
node.typ = typ
return node
}

0 comments on commit d8c99dc

Please sign in to comment.
You can’t perform that action at this time.