Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: fix crash when planning stats collection on virtual col with UDT #123926

Merged
merged 1 commit into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 34 additions & 14 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@ func StubTableStats(
// framework.
type createStatsNode struct {
tree.CreateStats

// p is the "outer planner" from planning the CREATE STATISTICS
// statement. When we startExec the createStatsNode, it creates a job which
// has a second planner (the JobExecContext). When the job resumes, it does
// its work using a retrying internal transaction for which we create a third
// "inner planner".
p *planner

// runAsJob is true by default, and causes the code below to be executed,
Expand Down Expand Up @@ -638,39 +644,53 @@ var _ jobs.Resumer = &createStatsResumer{}

// Resume is part of the jobs.Resumer interface.
func (r *createStatsResumer) Resume(ctx context.Context, execCtx interface{}) error {
p := execCtx.(JobExecContext)
// jobsPlanner is a second planner distinct from the "outer planner" in the
// createStatsNode. It comes from the jobs system and does not have an
// associated txn.
jobsPlanner := execCtx.(JobExecContext)
details := r.job.Details().(jobspb.CreateStatsDetails)
if details.Name == jobspb.AutoStatsName {
// We want to make sure that an automatic CREATE STATISTICS job only runs if
// there are no other CREATE STATISTICS jobs running, automatic or manual.
if err := checkRunningJobs(ctx, r.job, p); err != nil {
if err := checkRunningJobs(ctx, r.job, jobsPlanner); err != nil {
return err
}
}

r.tableID = details.Table.ID
evalCtx := p.ExtendedEvalContext()

dsp := p.DistSQLPlanner()
if err := p.ExecCfg().InternalDB.Txn(ctx, func(ctx context.Context, txn isql.Txn) error {
// Set the transaction on the EvalContext to this txn. This allows for
// use of the txn during processor setup during the execution of the flow.
evalCtx.Txn = txn.KV()

evalCtx := jobsPlanner.ExtendedEvalContext()

if err := jobsPlanner.ExecCfg().InternalDB.Txn(ctx, func(ctx context.Context, txn isql.Txn) error {
// We create a third "inner planner" associated with this txn in order to
// have (a) use of the txn during type checking of any virtual computed
// column expressions, and (b) use of the txn during processor setup during
// the execution of the flow.
innerPlanner, cleanup := NewInternalPlanner(
"create-stats-resume-job",
txn.KV(),
jobsPlanner.User(),
&MemoryMetrics{},
jobsPlanner.ExecCfg(),
jobsPlanner.SessionData(),
)
defer cleanup()
innerP := innerPlanner.(*planner)
innerEvalCtx := innerP.ExtendedEvalContext()
if details.AsOf != nil {
p.ExtendedEvalContext().AsOfSystemTime = &eval.AsOfSystemTime{Timestamp: *details.AsOf}
p.ExtendedEvalContext().SetTxnTimestamp(details.AsOf.GoTime())
innerP.ExtendedEvalContext().AsOfSystemTime = &eval.AsOfSystemTime{Timestamp: *details.AsOf}
innerP.ExtendedEvalContext().SetTxnTimestamp(details.AsOf.GoTime())
if err := txn.KV().SetFixedTimestamp(ctx, *details.AsOf); err != nil {
return err
}
}

planCtx := dsp.NewPlanningCtx(ctx, evalCtx, nil /* planner */, txn.KV(), FullDistribution)
dsp := innerP.DistSQLPlanner()
planCtx := dsp.NewPlanningCtx(ctx, innerEvalCtx, innerP, txn.KV(), FullDistribution)
// CREATE STATS flow doesn't produce any rows and only emits the
// metadata, so we can use a nil rowContainerHelper.
resultWriter := NewRowResultWriter(nil /* rowContainer */)
if err := dsp.planAndRunCreateStats(
ctx, evalCtx, planCtx, p.SemaCtx(), txn.KV(), r.job, resultWriter,
ctx, innerEvalCtx, planCtx, innerP.SemaCtx(), txn.KV(), r.job, resultWriter,
); err != nil {
// Check if this was a context canceled error and restart if it was.
if grpcutil.IsContextCanceled(err) {
Expand Down
69 changes: 69 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -2877,3 +2877,72 @@ vectorized: true
estimated row count: 10 (1.0% of the table; stats collected <hidden> ago)
table: mno@mno_o_idx
spans: [/11 - /11]

# Regression for not setting the TypeResolver on the SemaContext when dealing
# with stats on virtual computed columns (#122312).
statement ok
CREATE TABLE t122312 (s STRING, g greeting AS (s::greeting) VIRTUAL)

statement ok
ANALYZE t122312

# Regression for not setting the txn of the schemaResolver when type checking
# stats on virtual computed columns.
statement ok
CREATE TYPE order_status AS ENUM ('pending', 'paid', 'dispatched', 'delivered')

statement ok
CREATE TABLE orders (
"id" UUID PRIMARY KEY DEFAULT gen_random_uuid(),
"customer_id" UUID NOT NULL,
"total" DECIMAL NOT NULL,
"balance" DECIMAL NOT NULL,
"order_ts" TIMESTAMPTZ(0) NOT NULL DEFAULT now(),
"dispatch_ts" TIMESTAMPTZ(0),
"delivery_ts" TIMESTAMPTZ(0),
"status" order_status AS (
CASE
WHEN "delivery_ts" IS NOT NULL THEN 'delivered'
WHEN "dispatch_ts" IS NOT NULL THEN 'dispatched'
WHEN "balance" = 0 THEN 'paid'
ELSE 'pending'
END) VIRTUAL,
INDEX ("status")
)

statement ok
INSERT INTO orders ("customer_id", "total", "balance", "dispatch_ts", "delivery_ts") VALUES
('bdeb232e-12e9-4a33-9dd5-7bb9b694291a', 100, 100, NULL, NULL),
('0dc59725-d20b-4370-a05d-11db025a0064', 200, 0, NULL, NULL),
('d53d4021-9390-4b3a-9e5a-4bf1ff3e5a4c', 300, 0, now(), NULL),
('d53d4021-9390-4b3a-9e5a-4bf1ff3e5a4c', 400, 0, now(), now())

statement ok
ANALYZE orders

query TIIIIB colnames
SELECT column_names, row_count, null_count, distinct_count, avg_size, histogram_id IS NOT NULL AS has_histogram
FROM [SHOW STATISTICS FOR TABLE orders]
ORDER BY column_names::STRING
----
column_names row_count null_count distinct_count avg_size has_histogram
{balance} 4 0 2 32 true
{customer_id} 4 0 3 16 true
{delivery_ts} 4 3 2 6 true
{dispatch_ts} 4 2 2 12 true
{id} 4 0 4 16 true
{order_ts} 4 0 1 24 true
{status} 4 0 4 48 true
{total} 4 0 4 32 true

let $hist_status
SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE orders] WHERE column_names = ARRAY['status']

query TIRI colnames,nosort
SHOW HISTOGRAM $hist_status
----
upper_bound range_rows distinct_range_rows equal_rows
'pending' 0 0 1
'paid' 0 0 1
'dispatched' 0 0 1
'delivered' 0 0 1
Loading