Skip to content

Commit

Permalink
sql: default sql.stats.statement_fingerprint.format_mask to use spe…
Browse files Browse the repository at this point in the history
…cial flags

By default `sql.stats.statement_fingerprint.format_mask` is now
set to `FmtCollapseLists|FmtConstantsAsUnderscores` to reduce
statement fingerprint cardinality due to long constant lists and
variations in constant formatting. Note that the default fmt flag
for statement fingerprint generation is `FmtHideConstants`. Any
flags set with sql.stats.statement_fingerprint.format_mask will be
OR'd with `FmtHideConstants`.

Closes: #120409

Release note (sql change): Users will see the following changes
in their generated statement fingerprints from sql stats:
- lists with only literals/placeholders and similar subexpressions are
shortened to their first item followed by "__more__", e.g.
- constants and placeholders are all replaced with the same character,
an underscore `_`
```
SELECT * FROM foo WHERE f IN (1, $1, 1+2) ->
SELECT * FROM foo WHERE f IN (_, __more__)
```
  • Loading branch information
xinhaoz committed Mar 20, 2024
1 parent 865da22 commit adcfb6c
Show file tree
Hide file tree
Showing 19 changed files with 87 additions and 76 deletions.
12 changes: 6 additions & 6 deletions pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# LogicTest: 3node-tenant
# LogicTest: 3node-tenant

query II
SELECT count(distinct(node_id)), count(*) FROM crdb_internal.node_runtime_info
Expand Down Expand Up @@ -543,11 +543,11 @@ SELECT key, max_retries, failure_count
WHERE application_name = 'test_max_retry'
ORDER BY key
----
CREATE SEQUENCE s 0 0
DROP SEQUENCE s 0 0
RESET application_name 0 0
SELECT IF(nextval('_') < _, crdb_internal.force_retry('_'::INTERVAL), _) 0 1
SELECT IF(nextval(_) < _, crdb_internal.force_retry(_), _) 2 1
CREATE SEQUENCE s 0 0
DROP SEQUENCE s 0 0
RESET application_name 0 0
SELECT IF(nextval(_) < _, crdb_internal.force_retry(_), _) 2 1
SELECT IF(nextval(_) < _, crdb_internal.force_retry(_::INTERVAL), _) 0 1

query T
SELECT crdb_internal.cluster_name()
Expand Down
4 changes: 2 additions & 2 deletions pkg/ccl/serverccl/statusccl/tenant_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ func TestTenantCannotSeeNonTenantStats(t *testing.T) {
{stmt: `CREATE TABLE posts_t (id INT8 PRIMARY KEY, body STRING)`},
{
stmt: `INSERT INTO posts_t VALUES (1, 'foo')`,
fingerprint: `INSERT INTO posts_t VALUES (_, '_')`,
fingerprint: `INSERT INTO posts_t VALUES (_, __more__)`,
},
{stmt: `SELECT * FROM posts_t`},
}
Expand All @@ -446,7 +446,7 @@ func TestTenantCannotSeeNonTenantStats(t *testing.T) {
{stmt: `CREATE TABLE posts_nt (id INT8 PRIMARY KEY, body STRING)`},
{
stmt: `INSERT INTO posts_nt VALUES (1, 'foo')`,
fingerprint: `INSERT INTO posts_nt VALUES (_, '_')`,
fingerprint: `INSERT INTO posts_nt VALUES (_, __more__)`,
},
{stmt: `SELECT * FROM posts_nt`},
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/interactive_tests/test_demo_workload.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ for {set i 0} {$i < 10} {incr i} {
set timeout 1
send "select key from crdb_internal.node_statement_statistics order by count desc limit 1;\r"
expect {
"SELECT city, id FROM vehicles WHERE city = \$1" {
"SELECT city, id FROM vehicles WHERE city = _" {
set workloadRunning 1
break
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/interactive_tests/test_explain_analyze_debug.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ eexpect root@
send "PREPARE p AS SELECT * FROM t WHERE k = \$1;\r"
eexpect root@

send "SELECT crdb_internal.request_statement_bundle('SELECT * FROM t WHERE k = \$1', 0::FLOAT, 0::INTERVAL, 0::INTERVAL);\r"
send "SELECT crdb_internal.request_statement_bundle('SELECT * FROM t WHERE k = _', 0::FLOAT, 0::INTERVAL, 0::INTERVAL);\r"
eexpect root@

send "EXECUTE p(1);\r"
Expand Down
12 changes: 6 additions & 6 deletions pkg/server/application_api/sql_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func TestStatusAPICombinedTransactions(t *testing.T) {
{query: `CREATE TABLE posts (id INT8 PRIMARY KEY, body STRING)`, count: 1, numRows: 0},
{
query: `INSERT INTO posts VALUES (1, 'foo')`,
fingerprinted: `INSERT INTO posts VALUES (_, '_')`,
fingerprinted: `INSERT INTO posts VALUES (_, __more__)`,
count: 1,
numRows: 1,
},
Expand Down Expand Up @@ -222,7 +222,7 @@ func TestStatusAPITransactions(t *testing.T) {
{query: `CREATE TABLE posts (id INT8 PRIMARY KEY, body STRING)`, count: 1, numRows: 0},
{
query: `INSERT INTO posts VALUES (1, 'foo')`,
fingerprinted: `INSERT INTO posts VALUES (_, _)`,
fingerprinted: `INSERT INTO posts VALUES (_, __more__)`,
count: 1,
numRows: 1,
},
Expand Down Expand Up @@ -425,7 +425,7 @@ func TestStatusAPIStatements(t *testing.T) {
{stmt: `CREATE TABLE posts (id INT8 PRIMARY KEY, body STRING)`},
{
stmt: `INSERT INTO posts VALUES (1, 'foo')`,
fingerprinted: `INSERT INTO posts VALUES (_, '_')`,
fingerprinted: `INSERT INTO posts VALUES (_, __more__)`,
},
{stmt: `SELECT * FROM posts`},
}
Expand Down Expand Up @@ -730,7 +730,7 @@ func TestStatusAPICombinedStatementsWithFullScans(t *testing.T) {
{stmt: `CREATE DATABASE football`, respQuery: `CREATE DATABASE football`, fullScan: false, distSQL: false, failed: false, count: 1},
{stmt: `SET database = football`, respQuery: `SET database = football`, fullScan: false, distSQL: false, failed: false, count: 1},
{stmt: `CREATE TABLE players (id INT PRIMARY KEY, name TEXT, position TEXT, age INT,goals INT)`, respQuery: `CREATE TABLE players (id INT8 PRIMARY KEY, name STRING, "position" STRING, age INT8, goals INT8)`, fullScan: false, distSQL: false, failed: false, count: 1},
{stmt: `INSERT INTO players (id, name, position, age, goals) VALUES (1, 'Lionel Messi', 'Forward', 34, 672), (2, 'Cristiano Ronaldo', 'Forward', 36, 674)`, respQuery: `INSERT INTO players(id, name, "position", age, goals) VALUES (_, '_', __more1_10__), (__more1_10__)`, fullScan: false, distSQL: false, failed: false, count: 1},
{stmt: `INSERT INTO players (id, name, position, age, goals) VALUES (1, 'Lionel Messi', 'Forward', 34, 672), (2, 'Cristiano Ronaldo', 'Forward', 36, 674)`, respQuery: `INSERT INTO players(id, name, "position", age, goals) VALUES (_, __more__), (__more__)`, fullScan: false, distSQL: false, failed: false, count: 1},
{stmt: `SELECT avg(goals) FROM players`, respQuery: `SELECT avg(goals) FROM players`, fullScan: true, distSQL: true, failed: false, count: 1},
{stmt: `SELECT name FROM players WHERE age >= 32`, respQuery: `SELECT name FROM players WHERE age >= _`, fullScan: true, distSQL: true, failed: false, count: 1},
}
Expand Down Expand Up @@ -882,7 +882,7 @@ func TestStatusAPICombinedStatements(t *testing.T) {
{stmt: `CREATE TABLE posts (id INT8 PRIMARY KEY, body STRING)`},
{
stmt: `INSERT INTO posts VALUES (1, 'foo')`,
fingerprinted: `INSERT INTO posts VALUES (_, '_')`,
fingerprinted: `INSERT INTO posts VALUES (_, __more__)`,
},
{stmt: `SELECT * FROM posts`},
}
Expand Down Expand Up @@ -1061,7 +1061,7 @@ func TestStatusAPIStatementDetails(t *testing.T) {
thirdServerSQL.Exec(t, stmt)
}

query := `INSERT INTO posts VALUES (_, '_')`
query := `INSERT INTO posts VALUES (_, __more__)`
fingerprintID := appstatspb.ConstructStatementFingerprintID(query, true, `roachblog`)
path := fmt.Sprintf(`stmtdetails/%v`, fingerprintID)

Expand Down
2 changes: 1 addition & 1 deletion pkg/server/application_api/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ CREATE TABLE t.test (x INT PRIMARY KEY);

foundStat := false
for _, stat := range stats {
if stat.Key.Query == "INSERT INTO _ VALUES ($1)" {
if stat.Key.Query == "INSERT INTO _ VALUES (_)" {
foundStat = true
if stat.Stats.Count != 2 {
t.Fatal("expected to find 2 invocations, found", stat.Stats.Count)
Expand Down
13 changes: 11 additions & 2 deletions pkg/sql/conn_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -3525,11 +3525,20 @@ var allowSnapshotIsolation = settings.RegisterBoolSetting(
var logIsolationLevelLimiter = log.Every(10 * time.Second)

// Bitmask for enabling various query fingerprint formatting styles.
// We don't publish this setting because most users should not need
// to tweak the fingerprint generation.
var queryFormattingForFingerprintsMask = settings.RegisterIntSetting(
settings.ApplicationLevel,
"sql.stats.statement_fingerprint.format_mask",
"enables setting additional fmt flags for statement fingerprint formatting",
0,
"enables setting additional fmt flags FmtHideConstants for statement fingerprint formatting. "+
"Flags set here will be applied in addition to FmtHideConstants",
int64(tree.FmtCollapseLists|tree.FmtConstantsAsUnderscores),
settings.WithValidateInt(func(i int64) error {
if i == 0 || int64(tree.FmtCollapseLists|tree.FmtConstantsAsUnderscores)&i == i {
return nil
}
return errors.Newf("invalid value %d", i)
}),
)

func (ex *connExecutor) txnIsolationLevelToKV(
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/conn_executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ func TestPrepareStatisticsMetadata(t *testing.T) {
require.NoError(t, err)

// Verify that query and querySummary are equal in crdb_internal.statement_statistics.metadata.
rows, err := sqlDB.Query(`SELECT metadata->>'query', metadata->>'querySummary' FROM crdb_internal.statement_statistics WHERE metadata->>'query' LIKE 'SELECT $1::INT8'`)
rows, err := sqlDB.Query(`SELECT metadata->>'query', metadata->>'querySummary' FROM crdb_internal.statement_statistics WHERE metadata->>'query' LIKE 'SELECT _::INT8'`)
if err != nil {
t.Fatal(err)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/idxrecommendations/idx_recommendations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func TestIndexRecommendationsStats(t *testing.T) {
for i := 0; i < (minExecutions + 2); i++ {
testConn.Exec(t, `INSERT INTO t3 VALUES($1)`, i)
rows := testConn.QueryRow(t, "SELECT index_recommendations FROM CRDB_INTERNAL.STATEMENT_STATISTICS "+
" WHERE metadata ->> 'db' = 'idxrectest' AND metadata ->> 'query' = 'INSERT INTO t3 VALUES ($1)'")
" WHERE metadata ->> 'db' = 'idxrectest' AND metadata ->> 'query' = 'INSERT INTO t3 VALUES (_)'")
rows.Scan(&recommendations)

require.Equal(t, "{}", recommendations)
Expand Down
10 changes: 5 additions & 5 deletions pkg/sql/logictest/testdata/logic_test/crdb_internal
Original file line number Diff line number Diff line change
Expand Up @@ -860,11 +860,11 @@ SELECT key, max_retries, failure_count
WHERE application_name = 'test_max_retry'
ORDER BY key
----
CREATE SEQUENCE s 0 0
DROP SEQUENCE s 0 0
RESET application_name 0 0
SELECT IF(nextval('_') < _, crdb_internal.force_retry('_'::INTERVAL), _) 0 1
SELECT IF(nextval(_) < _, crdb_internal.force_retry(_), _) 2 1
CREATE SEQUENCE s 0 0
DROP SEQUENCE s 0 0
RESET application_name 0 0
SELECT IF(nextval(_) < _, crdb_internal.force_retry(_), _) 2 1
SELECT IF(nextval(_) < _, crdb_internal.force_retry(_::INTERVAL), _) 0 1

query T
SELECT database_name FROM crdb_internal.node_statement_statistics limit 1
Expand Down
23 changes: 12 additions & 11 deletions pkg/sql/logictest/testdata/logic_test/statement_statistics
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ SET application_name = ''
query TTB
SELECT txn_fingerprint_id, key, implicit_txn FROM crdb_internal.node_statement_statistics WHERE application_name = 'multi_stmts_test' ORDER BY txn_fingerprint_id
----
3783666325893023269 SET application_name = '_' true
6795574265959791651 SELECT '_' true
6795574265959791651 SELECT _, _ true
6795574265959791651 SELECT _, _, _ true
11423158778792053189 SELECT _ true
11423158778792053189 SELECT _, _ true
11423158778792053189 SELECT _, _, _ true
6692757627851618087 SET application_name = _ true

statement ok
CREATE TABLE test(x INT, y INT, z INT); INSERT INTO test(x, y, z) VALUES (0,0,0);
Expand Down Expand Up @@ -156,20 +156,20 @@ SELECT key,flags
FROM test.crdb_internal.node_statement_statistics
WHERE application_name = 'valuetest' ORDER BY key, flags
----
key flags
INSERT INTO test VALUES (_, _, __more1_10__), (__more1_10__) ·
SELECT (_, _, __more1_10__) FROM test WHERE _ ·
key flags
INSERT INTO test VALUES (_, __more__), (__more__) ·
SELECT (_, __more__) FROM test WHERE _ ·
SELECT key FROM test.crdb_internal.node_statement_statistics ·
SELECT sin(_) ·
SELECT sqrt(_) ·
SELECT x FROM (VALUES (_, _, __more1_10__), (__more1_10__)) AS t (x) ·
SELECT x FROM (VALUES (_, __more__), (__more__)) AS t (x) ·
SELECT x FROM test WHERE y = (_ / z) +
SELECT x FROM test WHERE y IN (_, _, _ + x, _, _) ·
SELECT x FROM test WHERE y IN (_, _, __more1_10__) +
SELECT x FROM test WHERE y NOT IN (_, _, __more1_10__) ·
SELECT x FROM test WHERE y IN (_, __more__) +
SELECT x FROM test WHERE y NOT IN (_, __more__) ·
SET CLUSTER SETTING "debug.panic_on_failed_assertions.enabled" = DEFAULT ·
SET CLUSTER SETTING "debug.panic_on_failed_assertions.enabled" = _ ·
SET application_name = '_' ·
SET application_name = _ ·
SET distsql = "on" ·
SHOW CLUSTER SETTING "debug.panic_on_failed_assertions.enabled" ·
SHOW application_name ·
Expand All @@ -196,6 +196,7 @@ SELECT key,
key svc_ok parse_ok plan_ok run_ok ovh_ok
SELECT _ true true true true true
SELECT _ true true true true true
SELECT _ true true true true true

# Check that statements made in implicit transactions are separated from those
# that are not.
Expand Down
16 changes: 8 additions & 8 deletions pkg/sql/pgwire/testdata/pgtest/tuple
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ ErrorResponse
ReadyForQuery
----
{"Type":"ParseComplete"}
{"Type":"ErrorResponse","Code":"0A000","Message":"error in argument for $1: could not parse \"(1,cat)\" as type tuple: cannot parse anonymous record type","Detail":"statement name \"s4\"\n--\nportal name \"p4\"\n--\nstatement summary \"SELECT $1 AS a\""}
{"Type":"ErrorResponse","Code":"0A000","Message":"error in argument for $1: could not parse \"(1,cat)\" as type tuple: cannot parse anonymous record type","Detail":"statement name \"s4\"\n--\nportal name \"p4\"\n--\nstatement summary \"SELECT _ AS a\""}
{"Type":"ReadyForQuery","TxStatus":"I"}

send
Expand Down Expand Up @@ -164,7 +164,7 @@ ErrorResponse
ReadyForQuery
----
{"Type":"ParseComplete"}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: tuple requires a 4 byte header for binary format","Detail":"bufferLength=0\n--\nstatement name \"s_empty_param\"\n--\nportal name \"p_empty_param\"\n--\nstatement summary \"SELECT $1::r\""}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: tuple requires a 4 byte header for binary format","Detail":"bufferLength=0\n--\nstatement name \"s_empty_param\"\n--\nportal name \"p_empty_param\"\n--\nstatement summary \"SELECT _::r\""}
{"Type":"ReadyForQuery","TxStatus":"I"}

# negative length tuple
Expand All @@ -189,7 +189,7 @@ ErrorResponse
ReadyForQuery
----
{"Type":"ParseComplete"}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: tuple must have non-negative number of elements","Detail":"numberOfElements=-1\n--\nstatement name \"s_negative_tuple\"\n--\nportal name \"p_negative_tuple\"\n--\nstatement summary \"SELECT $1::r\""}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: tuple must have non-negative number of elements","Detail":"numberOfElements=-1\n--\nstatement name \"s_negative_tuple\"\n--\nportal name \"p_negative_tuple\"\n--\nstatement summary \"SELECT _::r\""}
{"Type":"ReadyForQuery","TxStatus":"I"}

# not enough bytes for element OID
Expand All @@ -214,7 +214,7 @@ ErrorResponse
ReadyForQuery
----
{"Type":"ParseComplete"}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: insufficient bytes reading element OID for binary format","Detail":"elementIdx=0 bufferLength=4 bufferStartIdx=4 bufferEndIdx=8\n--\nstatement name \"s_element_oid_no_bytes\"\n--\nportal name \"p_element_oid_no_bytes\"\n--\nstatement summary \"SELECT $1::r\""}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: insufficient bytes reading element OID for binary format","Detail":"elementIdx=0 bufferLength=4 bufferStartIdx=4 bufferEndIdx=8\n--\nstatement name \"s_element_oid_no_bytes\"\n--\nportal name \"p_element_oid_no_bytes\"\n--\nstatement summary \"SELECT _::r\""}
{"Type":"ReadyForQuery","TxStatus":"I"}

# element OID not found
Expand All @@ -240,7 +240,7 @@ ErrorResponse
ReadyForQuery
----
{"Type":"ParseComplete"}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: element type not found for OID 0","Detail":"elementIdx=0 bufferLength=8 bufferStartIdx=4 bufferEndIdx=8\n--\nstatement name \"s_element_oid_not_found\"\n--\nportal name \"p_element_oid_not_found\"\n--\nstatement summary \"SELECT $1::r\""}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: element type not found for OID 0","Detail":"elementIdx=0 bufferLength=8 bufferStartIdx=4 bufferEndIdx=8\n--\nstatement name \"s_element_oid_not_found\"\n--\nportal name \"p_element_oid_not_found\"\n--\nstatement summary \"SELECT _::r\""}
{"Type":"ReadyForQuery","TxStatus":"I"}

# not enough bytes for element size
Expand All @@ -266,7 +266,7 @@ ErrorResponse
ReadyForQuery
----
{"Type":"ParseComplete"}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: insufficient bytes reading element size for binary format","Detail":"elementIdx=0 bufferLength=8 bufferStartIdx=8 bufferEndIdx=12\n--\nstatement name \"s_element_size_no_bytes\"\n--\nportal name \"p_element_size_no_bytes\"\n--\nstatement summary \"SELECT $1::r\""}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: insufficient bytes reading element size for binary format","Detail":"elementIdx=0 bufferLength=8 bufferStartIdx=8 bufferEndIdx=12\n--\nstatement name \"s_element_size_no_bytes\"\n--\nportal name \"p_element_size_no_bytes\"\n--\nstatement summary \"SELECT _::r\""}
{"Type":"ReadyForQuery","TxStatus":"I"}

# null element
Expand Down Expand Up @@ -313,7 +313,7 @@ ErrorResponse
ReadyForQuery
----
{"Type":"ParseComplete"}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: unsupported binary bool: ","Detail":"statement name \"s_element_needs_bytes\"\n--\nportal name \"p_element_needs_bytes\"\n--\nstatement summary \"SELECT $1::r\""}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: unsupported binary bool: ","Detail":"statement name \"s_element_needs_bytes\"\n--\nportal name \"p_element_needs_bytes\"\n--\nstatement summary \"SELECT _::r\""}
{"Type":"ReadyForQuery","TxStatus":"I"}

# not enough bytes for element
Expand All @@ -340,7 +340,7 @@ ErrorResponse
ReadyForQuery
----
{"Type":"ParseComplete"}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: insufficient bytes reading element for binary format","Detail":"elementIdx=0 bufferLength=12 bufferStartIdx=12 bufferEndIdx=13\n--\nstatement name \"s_element_no_bytes\"\n--\nportal name \"p_element_no_bytes\"\n--\nstatement summary \"SELECT $1::r\""}
{"Type":"ErrorResponse","Code":"42601","Message":"error in argument for $1: insufficient bytes reading element for binary format","Detail":"elementIdx=0 bufferLength=12 bufferStartIdx=12 bufferEndIdx=13\n--\nstatement name \"s_element_no_bytes\"\n--\nportal name \"p_element_no_bytes\"\n--\nstatement summary \"SELECT _::r\""}
{"Type":"ReadyForQuery","TxStatus":"I"}

# Test binary encoding of a generic tuple result.
Expand Down

0 comments on commit adcfb6c

Please sign in to comment.