Skip to content

Commit

Permalink
opt: Propagate null counts through statsbuilder
Browse files Browse the repository at this point in the history
This change takes null counts that are already collected in table
statistics, and propagates them through the different operators in
statistics_builder. It also uses these null counts to generate
selectivities which are then applied to the row count.

The expectation is that this change will lead to much better cardinality
estimation, especially in workloads involving lots of null values that
get filtered out by an operator or constraint. Previously, we treated
null counts like any other distinct value.

Fixes #30289

Release note: None
  • Loading branch information
itsbilal committed Oct 9, 2018
1 parent 73514f6 commit b90a309
Show file tree
Hide file tree
Showing 40 changed files with 1,854 additions and 1,448 deletions.
31 changes: 18 additions & 13 deletions pkg/sql/opt/exec/execbuilder/testdata/join
Original file line number Diff line number Diff line change
Expand Up @@ -587,19 +587,24 @@ SELECT *
WHERE (a IS NULL OR a > 2) AND n > 1 AND (a IS NULL OR a < sq)
]
----
filter · ·
│ filter ((a IS NULL) OR (a > 2)) AND ((a IS NULL) OR (a < sq))
└── join · ·
│ type right outer
│ equality (b) = (sq)
│ pred n < 6
├── scan · ·
│ table pairs@primary
│ spans ALL
│ filter a > 1
└── scan · ·
· table square@primary
· spans /2-
render · ·
│ render 0 a
│ render 1 b
│ render 2 n
│ render 3 sq
└── filter · ·
│ filter ((a IS NULL) OR (a > 2)) AND ((a IS NULL) OR (a < sq))
└── join · ·
│ type left outer
│ equality (sq) = (b)
│ pred n < 6
├── scan · ·
│ table square@primary
│ spans /2-
└── scan · ·
· table pairs@primary
· spans ALL
· filter a > 1

# The simpler plan for an inner join, to compare.
query TTT
Expand Down
54 changes: 54 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/stats
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,57 @@ scan · · (u, v) ·
· table b@b_u_idx · ·
· spans /1-/2 · ·
· filter v = 1 · ·

# Verify that injecting different statistics with null counts
# changes the plan.
statement ok
ALTER TABLE b INJECT STATISTICS '[
{
"columns": ["u"],
"created_at": "2018-01-01 1:00:00.00000+00:00",
"row_count": 100,
"distinct_count": 20,
"null_count": 0
},
{
"columns": ["v"],
"created_at": "2018-01-01 1:00:00.00000+00:00",
"row_count": 100,
"distinct_count": 10,
"null_count": 0
}
]'

query TTTTT
EXPLAIN (VERBOSE) SELECT * FROM b WHERE u = 1 AND v = 1
----
scan · · (u, v) ·
· table b@b_u_idx · ·
· spans /1-/2 · ·
· filter v = 1 · ·

statement ok
ALTER TABLE b INJECT STATISTICS '[
{
"columns": ["u"],
"created_at": "2018-01-01 1:00:00.00000+00:00",
"row_count": 100,
"distinct_count": 20,
"null_count": 0
},
{
"columns": ["v"],
"created_at": "2018-01-01 1:00:00.00000+00:00",
"row_count": 100,
"distinct_count": 10,
"null_count": 90
}
]'

query TTTTT
EXPLAIN (VERBOSE) SELECT * FROM b WHERE u = 1 AND v = 1
----
scan · · (u, v) ·
· table b@b_v_idx · ·
· spans /1-/2 · ·
· filter u = 1 · ·
14 changes: 14 additions & 0 deletions pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,14 @@ func (m *Memo) NormOp(group GroupID) opt.Operator {
return m.groups[group].expr(normExprOrdinal).op
}

// DeriveLogicalProps derives logical props for the specified expression,
// usually resulting in population of child stats in the test output tree.
func DeriveLogicalProps(evalCtx *tree.EvalContext, ev ExprView) {
if ev.op != opt.MergeJoinOp && ev.op != opt.SortOp {
ev.mem.logPropsBuilder.buildProps(evalCtx, ev)
}
}

// MemoizeNormExpr enters a normalized expression into the memo. This requires
// the creation of a new memo group with the normalized expression as its first
// expression. If the expression is already part of an existing memo group, then
Expand Down Expand Up @@ -496,6 +504,12 @@ func (m *Memo) MemoizeDenormExpr(evalCtx *tree.EvalContext, group GroupID, denor
tmpGroupID := GroupID(len(m.groups))
m.groups = append(m.groups, makeMemoGroup(tmpGroupID, denorm))
ev := MakeNormExprView(m, tmpGroupID)
// Building out logical props could lead to more lazy population of
// stats in child expressions, which could change the test output
// when run with and without -race.
//
// TODO: Figure out a way to either run this for all tests, or
// cleanly restore all column stats after this step.
logical := m.logPropsBuilder.buildProps(evalCtx, ev)
logical.VerifyAgainst(&m.group(group).logical)

Expand Down
Loading

0 comments on commit b90a309

Please sign in to comment.