Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: do not collect stats for virtual columns #68312

Merged
merged 1 commit into from
Aug 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 35 additions & 6 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,12 +335,25 @@ func createStatsDefaultColumns(
// addIndexColumnStatsIfNotExists appends column stats for the given column
// ID if they have not already been added. Histogram stats are collected for
// every indexed column.
addIndexColumnStatsIfNotExists := func(colID descpb.ColumnID, isInverted bool) {
addIndexColumnStatsIfNotExists := func(colID descpb.ColumnID, isInverted bool) error {
col, err := desc.FindColumnWithID(colID)
if err != nil {
return err
}

// Do not collect stats for virtual computed columns. DistSQLPlanner
// cannot currently collect stats for these columns because it plans
// table readers on the table's primary index which does not include
// virtual computed columns.
if col.IsVirtual() {
return nil
}

colList := []descpb.ColumnID{colID}

// Check for existing stats and remember the requested stats.
if !trackStatsIfNotExists(colList) {
return
return nil
}

colStat := jobspb.CreateStatsDetails_ColStat{
Expand All @@ -360,12 +373,18 @@ func createStatsDefaultColumns(
colStat.HasHistogram = true
colStats = append(colStats, colStat)
}

return nil
}

// Add column stats for the primary key.
for i := 0; i < desc.GetPrimaryIndex().NumKeyColumns(); i++ {
primaryIdx := desc.GetPrimaryIndex()
for i := 0; i < primaryIdx.NumKeyColumns(); i++ {
// Generate stats for each column in the primary key.
addIndexColumnStatsIfNotExists(desc.GetPrimaryIndex().GetKeyColumnID(i), false /* isInverted */)
err := addIndexColumnStatsIfNotExists(primaryIdx.GetKeyColumnID(i), false /* isInverted */)
if err != nil {
return nil, err
}

// Only collect multi-column stats if enabled.
if i == 0 || !multiColEnabled {
Expand Down Expand Up @@ -394,7 +413,9 @@ func createStatsDefaultColumns(
isInverted := idx.GetType() == descpb.IndexDescriptor_INVERTED && colID == idx.InvertedColumnID()

// Generate stats for each indexed column.
addIndexColumnStatsIfNotExists(colID, isInverted)
if err := addIndexColumnStatsIfNotExists(colID, isInverted); err != nil {
return nil, err
}

// Only collect multi-column stats if enabled.
if j == 0 || !multiColEnabled {
Expand Down Expand Up @@ -438,7 +459,9 @@ func createStatsDefaultColumns(
return nil, err
}
isInverted := colinfo.ColumnTypeIsInvertedIndexable(col.GetType())
addIndexColumnStatsIfNotExists(colID, isInverted)
if err := addIndexColumnStatsIfNotExists(colID, isInverted); err != nil {
return nil, err
}
}
}
}
Expand All @@ -447,6 +470,12 @@ func createStatsDefaultColumns(
nonIdxCols := 0
for i := 0; i < len(desc.PublicColumns()) && nonIdxCols < maxNonIndexCols; i++ {
col := desc.PublicColumns()[i]

// Do not collect stats for virtual computed columns.
if col.IsVirtual() {
continue
}

colList := []descpb.ColumnID{col.GetID()}

if !trackStatsIfNotExists(colList) {
Expand Down
46 changes: 37 additions & 9 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -826,8 +826,38 @@ upper_bound range_rows distinct_range_rows equal_rows
statement ok
SET experimental_enable_expression_indexes=true

# Test that inaccessible columns that represent expression indexes have
# histograms collected for them.
# Test that stats are not collected for virtual columns.
statement ok
CREATE TABLE virt (
a INT,
v INT AS (a + 10) VIRTUAL,
INDEX (v)
)

statement ok
INSERT INTO virt VALUES (1), (2), (3)

statement ok
CREATE STATISTICS s FROM virt

query TTIIB colnames,rowsort
SELECT
statistics_name,
column_names,
row_count,
null_count,
histogram_id IS NOT NULL AS has_histogram
FROM
[SHOW STATISTICS FOR TABLE virt]
ORDER BY
column_names::STRING, created
----
statistics_name column_names row_count null_count has_histogram
s {a} 3 0 true
s {rowid} 3 0 true

# Test that stats are not collect for inaccessible virtual columns that
# represent expression indexes.
statement ok
CREATE TABLE expression (
a INT,
Expand All @@ -853,13 +883,11 @@ FROM
ORDER BY
column_names::STRING, created
----
statistics_name column_names row_count null_count has_histogram
s {a} 3 0 true
s {b} 3 0 true
s {crdb_internal_idx_expr_1} 3 3 false
s {crdb_internal_idx_expr} 3 3 true
s {j} 3 0 false
s {rowid} 3 0 true
statistics_name column_names row_count null_count has_histogram
s {a} 3 0 true
s {b} 3 0 true
s {j} 3 0 false
s {rowid} 3 0 true

# Test that non-index columns have histograms collected for them, with
# up to 2 buckets.
Expand Down