Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pkg/sql/sqlstats: move failures out of stmt key/meta, into stats #120236

Merged
merged 1 commit into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
31 changes: 15 additions & 16 deletions pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,10 @@ SELECT * FROM crdb_internal.leases WHERE node_id < 0
node_id table_id name parent_id expiration deleted


query ITTTTTIIITTRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRBBTTTTTRRRRR colnames
query ITTTTTIIITTRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRBBTTTTTRRRRRI colnames
SELECT * FROM crdb_internal.node_statement_statistics WHERE node_id < 0
----
node_id application_name flags statement_id key anonymized count first_attempt_count max_retries last_error last_error_code rows_avg rows_var idle_lat_avg idle_lat_var parse_lat_avg parse_lat_var plan_lat_avg plan_lat_var run_lat_avg run_lat_var service_lat_avg service_lat_var overhead_lat_avg overhead_lat_var bytes_read_avg bytes_read_var rows_read_avg rows_read_var rows_written_avg rows_written_var network_bytes_avg network_bytes_var network_msgs_avg network_msgs_var max_mem_usage_avg max_mem_usage_var max_disk_usage_avg max_disk_usage_var contention_time_avg contention_time_var cpu_sql_nanos_avg cpu_sql_nanos_var mvcc_step_avg mvcc_step_var mvcc_step_internal_avg mvcc_step_internal_var mvcc_seek_avg mvcc_seek_var mvcc_seek_internal_avg mvcc_seek_internal_var mvcc_block_bytes_avg mvcc_block_bytes_var mvcc_block_bytes_in_cache_avg mvcc_block_bytes_in_cache_var mvcc_key_bytes_avg mvcc_key_bytes_var mvcc_value_bytes_avg mvcc_value_bytes_var mvcc_point_count_avg mvcc_point_count_var mvcc_points_covered_by_range_tombstones_avg mvcc_points_covered_by_range_tombstones_var mvcc_range_key_count_avg mvcc_range_key_count_var mvcc_range_key_contained_points_avg mvcc_range_key_contained_points_var mvcc_range_key_skipped_points_avg mvcc_range_key_skipped_points_var implicit_txn full_scan sample_plan database_name exec_node_ids txn_fingerprint_id index_recommendations latency_seconds_min latency_seconds_max latency_seconds_p50 latency_seconds_p90 latency_seconds_p99
node_id application_name flags statement_id key anonymized count first_attempt_count max_retries last_error last_error_code rows_avg rows_var idle_lat_avg idle_lat_var parse_lat_avg parse_lat_var plan_lat_avg plan_lat_var run_lat_avg run_lat_var service_lat_avg service_lat_var overhead_lat_avg overhead_lat_var bytes_read_avg bytes_read_var rows_read_avg rows_read_var rows_written_avg rows_written_var network_bytes_avg network_bytes_var network_msgs_avg network_msgs_var max_mem_usage_avg max_mem_usage_var max_disk_usage_avg max_disk_usage_var contention_time_avg contention_time_var cpu_sql_nanos_avg cpu_sql_nanos_var mvcc_step_avg mvcc_step_var mvcc_step_internal_avg mvcc_step_internal_var mvcc_seek_avg mvcc_seek_var mvcc_seek_internal_avg mvcc_seek_internal_var mvcc_block_bytes_avg mvcc_block_bytes_var mvcc_block_bytes_in_cache_avg mvcc_block_bytes_in_cache_var mvcc_key_bytes_avg mvcc_key_bytes_var mvcc_value_bytes_avg mvcc_value_bytes_var mvcc_point_count_avg mvcc_point_count_var mvcc_points_covered_by_range_tombstones_avg mvcc_points_covered_by_range_tombstones_var mvcc_range_key_count_avg mvcc_range_key_count_var mvcc_range_key_contained_points_avg mvcc_range_key_contained_points_var mvcc_range_key_skipped_points_avg mvcc_range_key_skipped_points_var implicit_txn full_scan sample_plan database_name exec_node_ids txn_fingerprint_id index_recommendations latency_seconds_min latency_seconds_max latency_seconds_p50 latency_seconds_p90 latency_seconds_p99 failure_count

query ITTTIIRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR colnames
SELECT * FROM crdb_internal.node_transaction_statistics WHERE node_id < 0
Expand Down Expand Up @@ -536,18 +536,17 @@ RESET application_name
# (#22847).
#

query TIB
SELECT key, max_retries, flags LIKE '!%' AS f
query TII
SELECT key, max_retries, failure_count
FROM crdb_internal.node_statement_statistics
WHERE application_name = 'test_max_retry'
ORDER BY key, f
ORDER BY key
----
CREATE SEQUENCE s 0 false
DROP SEQUENCE s 0 false
RESET application_name 0 false
SELECT IF(nextval('_') < _, crdb_internal.force_retry('_'::INTERVAL), _) 0 true
SELECT IF(nextval(_) < _, crdb_internal.force_retry(_), _) 2 false
SELECT IF(nextval(_) < _, crdb_internal.force_retry(_), _) 1 true
CREATE SEQUENCE s 0 0
DROP SEQUENCE s 0 0
RESET application_name 0 0
SELECT IF(nextval('_') < _, crdb_internal.force_retry('_'::INTERVAL), _) 0 1
SELECT IF(nextval(_) < _, crdb_internal.force_retry(_), _) 2 1

query T
SELECT crdb_internal.cluster_name()
Expand Down Expand Up @@ -611,11 +610,11 @@ query ITTTI colnames,rowsort
SELECT node_id, application_name, key, statement_ids, count FROM crdb_internal.node_transaction_statistics where application_name = 'test_txn_statistics'
----
node_id application_name key statement_ids count
0 test_txn_statistics 10922505138341351577 {4104808689124681542} 1
0 test_txn_statistics 12762606372390135532 {8833422719858486605,8833422719858486605,8833422719858486605} 1
0 test_txn_statistics 15417266716795083410 {8833422719858486605} 1
0 test_txn_statistics 15417266716795083422 {8833422719858486593} 1
0 test_txn_statistics 17236010932163349339 {8833422719858486605,8833422719858486605} 2
0 test_txn_statistics 9852458619443781049 {6504861979500726990,6504861979500726990,6504861979500726990} 1
0 test_txn_statistics 17664899828239596817 {6504861979500726990} 1
0 test_txn_statistics 17664899828239596829 {6504861979500726978} 1
0 test_txn_statistics 17976145939037309168 {6204733445766643503} 1
0 test_txn_statistics 18364202634803759405 {6504861979500726990,6504861979500726990} 2

subtest node_tenant_capabilities_cache

Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/serverccl/statusccl/tenant_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ func TestTenantCannotSeeNonTenantStats(t *testing.T) {

var actualStatements []string
for _, respStatement := range actual.Statements {
if respStatement.Key.KeyData.Failed {
if respStatement.Stats.FailureCount > 0 {
// We ignore failed statements here as the INSERT statement can fail and
// be automatically retried, confusing the test success check.
continue
Expand Down
19 changes: 5 additions & 14 deletions pkg/server/application_api/sql_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ func TestStatusAPIStatements(t *testing.T) {
// See if the statements returned are what we executed.
var statementsInResponse []string
for _, respStatement := range resp.Statements {
if respStatement.Key.KeyData.Failed {
if respStatement.Stats.FailureCount > 0 {
// We ignore failed statements here as the INSERT statement can fail and
// be automatically retried, confusing the test success check.
continue
Expand Down Expand Up @@ -753,7 +753,6 @@ func TestStatusAPICombinedStatementsWithFullScans(t *testing.T) {
count int
fullScan bool
distSQL bool
failed bool
}

// expectedStatementStatsMap maps the query response format to the associated
Expand All @@ -769,7 +768,6 @@ func TestStatusAPICombinedStatementsWithFullScans(t *testing.T) {
expectedStatementStatsMap[stmt.respQuery] = ExpectedStatementData{
fullScan: stmt.fullScan,
distSQL: stmt.distSQL,
failed: stmt.failed,
count: stmt.count,
}
}
Expand Down Expand Up @@ -798,7 +796,7 @@ func TestStatusAPICombinedStatementsWithFullScans(t *testing.T) {
// statements that were that were successfully executed by the test app
// to avoid counting such failures. If a statement that we expect to be
// successful is not found in the response, the test will fail later.
if respStatement.Key.KeyData.App == testAppName && !respStatement.Key.KeyData.Failed {
if respStatement.Key.KeyData.App == testAppName && respStatement.Stats.FailureCount == 0 {
actualResponseStatsMap[respStatement.Key.KeyData.Query] = respStatement
}
}
Expand All @@ -810,13 +808,11 @@ func TestStatusAPICombinedStatementsWithFullScans(t *testing.T) {
actualCount := respStatement.Stats.FirstAttemptCount
actualFullScan := respStatement.Key.KeyData.FullScan
actualDistSQL := respStatement.Key.KeyData.DistSQL
actualFailed := respStatement.Key.KeyData.Failed

stmtJSONString := responseToJSON(respStatement)

require.Equal(t, expectedData.fullScan, actualFullScan, "failed for respStatement: %v", stmtJSONString)
require.Equal(t, expectedData.distSQL, actualDistSQL, "failed for respStatement: %v", stmtJSONString)
require.Equal(t, expectedData.failed, actualFailed, "failed for respStatement: %v", stmtJSONString)
require.Equal(t, expectedData.count, int(actualCount), "failed for respStatement: %v", stmtJSONString)
}
}
Expand Down Expand Up @@ -912,7 +908,7 @@ func TestStatusAPICombinedStatements(t *testing.T) {
var statementsInResponse []string
expectedTxnFingerprints := map[appstatspb.TransactionFingerprintID]struct{}{}
for _, respStatement := range resp.Statements {
if respStatement.Key.KeyData.Failed {
if respStatement.Stats.FailureCount > 0 {
// We ignore failed statements here as the INSERT statement can fail and
// be automatically retried, confusing the test success check.
continue
Expand Down Expand Up @@ -1066,8 +1062,7 @@ func TestStatusAPIStatementDetails(t *testing.T) {
}

query := `INSERT INTO posts VALUES (_, '_')`
fingerprintID := appstatspb.ConstructStatementFingerprintID(query,
false, true, `roachblog`)
fingerprintID := appstatspb.ConstructStatementFingerprintID(query, true, `roachblog`)
path := fmt.Sprintf(`stmtdetails/%v`, fingerprintID)

var resp serverpb.StatementDetailsResponse
Expand Down Expand Up @@ -1260,8 +1255,7 @@ func TestStatusAPIStatementDetails(t *testing.T) {
}

selectQuery := "SELECT _, _, _, _"
fingerprintID = appstatspb.ConstructStatementFingerprintID(selectQuery, false,
true, "defaultdb")
fingerprintID = appstatspb.ConstructStatementFingerprintID(selectQuery, true, "defaultdb")

testPath(
fmt.Sprintf(`stmtdetails/%v`, fingerprintID),
Expand Down Expand Up @@ -1583,7 +1577,6 @@ func generateStatement() appstatspb.CollectedStatementStatistics {
Database: "test_database",
DistSQL: true,
FullScan: true,
Failed: false,
ImplicitTxn: true,
PlanHash: uint64(200),
Query: "SELECT * FROM foo",
Expand Down Expand Up @@ -1729,7 +1722,6 @@ func insertStatementIntoSystemStmtStatsTable(
metadata := struct {
Database string `json:"db"`
DistSQL bool `json:"distsql"`
Failed bool `json:"failed"`
FullScan bool `json:"fullScan"`
ImplicitTxn bool `json:"implicitTxn"`
Query string `json:"query"`
Expand All @@ -1739,7 +1731,6 @@ func insertStatementIntoSystemStmtStatsTable(
}{
Database: statement.Key.Database,
DistSQL: statement.Key.DistSQL,
Failed: statement.Key.Failed,
FullScan: statement.Key.FullScan,
ImplicitTxn: statement.Key.ImplicitTxn,
Query: statement.Key.Query,
Expand Down
14 changes: 5 additions & 9 deletions pkg/server/combined_statement_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -666,15 +666,14 @@ func collectCombinedStatements(
activityTableHasAllData bool,
) ([]serverpb.StatementsResponse_CollectedStatementStatistics, error) {
aostClause := testingKnobs.GetAOSTClause()
const expectedNumDatums = 10
const expectedNumDatums = 9
const queryFormat = `
SELECT
fingerprint_id,
app_name,
aggregated_ts,
COALESCE(CAST(metadata -> 'distSQLCount' AS INT), 0) AS distSQLCount,
COALESCE(CAST(metadata -> 'fullScanCount' AS INT), 0) AS fullScanCount,
COALESCE(CAST(metadata -> 'failedCount' AS INT), 0) AS failedCount,
metadata ->> 'query' AS query,
metadata ->> 'querySummary' AS querySummary,
(SELECT string_agg(elem::text, ',')
Expand Down Expand Up @@ -709,7 +708,6 @@ SELECT
aggregated_ts,
COALESCE(CAST(metadata -> 'distSQLCount' AS INT), 0) AS distSQLCount,
COALESCE(CAST(metadata -> 'fullScanCount' AS INT), 0) AS fullScanCount,
COALESCE(CAST(metadata -> 'failedCount' AS INT), 0) AS failedCount,
metadata ->> 'query' AS query,
metadata ->> 'querySummary' AS querySummary,
(SELECT string_agg(elem::text, ',')
Expand Down Expand Up @@ -797,25 +795,23 @@ FROM (SELECT fingerprint_id,
aggregatedTs := tree.MustBeDTimestampTZ(row[2]).Time
distSQLCount := int64(*row[3].(*tree.DInt))
fullScanCount := int64(*row[4].(*tree.DInt))
failedCount := int64(*row[5].(*tree.DInt))
query := string(tree.MustBeDString(row[6]))
querySummary := string(tree.MustBeDString(row[7]))
databases := string(tree.MustBeDString(row[8]))
query := string(tree.MustBeDString(row[5]))
querySummary := string(tree.MustBeDString(row[6]))
databases := string(tree.MustBeDString(row[7]))

metadata := appstatspb.CollectedStatementStatistics{
Key: appstatspb.StatementStatisticsKey{
App: app,
DistSQL: distSQLCount > 0,
FullScan: fullScanCount > 0,
Failed: failedCount > 0,
Query: query,
QuerySummary: querySummary,
Database: databases,
},
}

var stats appstatspb.StatementStatistics
statsJSON := tree.MustBeDJSON(row[9]).JSON
statsJSON := tree.MustBeDJSON(row[8]).JSON
if err = sqlstatsutil.DecodeStmtStatsStatisticsJSON(statsJSON, &stats); err != nil {
return nil, srverrors.ServerError(ctx, err)
}
Expand Down
5 changes: 0 additions & 5 deletions pkg/server/statement_details.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,11 @@ func getStatementDetails(
// since the metadata is unique per plan hash.
// Update the statementTotal.Metadata.*Count with the counts from statementStatisticsPerPlanHash.keyData.
statementTotal.Metadata.DistSQLCount = 0
statementTotal.Metadata.FailedCount = 0
statementTotal.Metadata.FullScanCount = 0
statementTotal.Metadata.VecCount = 0
statementTotal.Metadata.TotalCount = 0
for _, planStats := range statementStatisticsPerPlanHash {
statementTotal.Metadata.DistSQLCount += planStats.Metadata.DistSQLCount
statementTotal.Metadata.FailedCount += planStats.Metadata.FailedCount
statementTotal.Metadata.FullScanCount += planStats.Metadata.FullScanCount
statementTotal.Metadata.VecCount += planStats.Metadata.VecCount
statementTotal.Metadata.TotalCount += planStats.Metadata.TotalCount
Expand Down Expand Up @@ -620,9 +618,6 @@ LIMIT $%d`, whereClause, len(args)), args...)
if aggregatedMetadata.DistSQLCount > 0 {
aggregatedMetadata.DistSQLCount = metadata.Stats.Count
}
if aggregatedMetadata.FailedCount > 0 {
aggregatedMetadata.FailedCount = metadata.Stats.Count
}
if aggregatedMetadata.FullScanCount > 0 {
aggregatedMetadata.FullScanCount = metadata.Stats.Count
}
Expand Down
13 changes: 3 additions & 10 deletions pkg/sql/appstatspb/app_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ import (
type StmtFingerprintID uint64

// ConstructStatementFingerprintID constructs an ID by hashing query with
// constants redacted, its database and failure status, and if it was part of an
// constants redacted, its database, and if it was part of an
// implicit txn. At the time of writing, these are the axis' we use to bucket
// queries for stats collection (see stmtKey).
func ConstructStatementFingerprintID(
stmtNoConstants string, failed bool, implicitTxn bool, database string,
stmtNoConstants string, implicitTxn bool, database string,
) StmtFingerprintID {
fnv := util.MakeFNV64()
for _, c := range stmtNoConstants {
Expand All @@ -33,11 +33,6 @@ func ConstructStatementFingerprintID(
for _, c := range database {
fnv.Add(uint64(c))
}
if failed {
fnv.Add('F')
} else {
fnv.Add('S')
}
if implicitTxn {
fnv.Add('I')
} else {
Expand Down Expand Up @@ -156,9 +151,6 @@ func (s *AggregatedStatementMetadata) Add(other *CollectedStatementStatistics) {
if other.Key.DistSQL {
s.DistSQLCount++
}
if other.Key.Failed {
s.FailedCount++
}
if other.Key.FullScan {
s.FullScanCount++
}
Expand Down Expand Up @@ -214,6 +206,7 @@ func (s *StatementStatistics) Add(other *StatementStatistics) {
}

s.Count += other.Count
s.FailureCount += other.FailureCount
}

// AlmostEqual compares two StatementStatistics and their contained NumericStats
Expand Down
9 changes: 6 additions & 3 deletions pkg/sql/appstatspb/app_stats.proto
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ message StatementStatistics {
// last_error_code is the last error code for a failed statement, if it exists.
optional string last_error_code = 32 [(gogoproto.nullable) = false];

// failure_count is the count of failed executions for a given statement fingerprint.
optional int64 failure_count = 33 [(gogoproto.nullable) = false];

// Note: be sure to update `sql/app_stats.go` when adding/removing fields here!

reserved 13, 14, 17, 18, 19, 20;
Expand Down Expand Up @@ -209,15 +212,14 @@ message StatementStatisticsKey {
optional string query = 1 [(gogoproto.nullable) = false];
optional string app = 2 [(gogoproto.nullable) = false];
optional bool distSQL = 3 [(gogoproto.nullable) = false];
optional bool failed = 4 [(gogoproto.nullable) = false];
optional bool implicit_txn = 6 [(gogoproto.nullable) = false];
optional bool vec = 7 [(gogoproto.nullable) = false];
optional bool full_scan = 8 [(gogoproto.nullable) = false];
optional string database = 9 [(gogoproto.nullable) = false];
optional uint64 plan_hash = 10 [(gogoproto.nullable) = false];
optional string query_summary = 12 [(gogoproto.nullable) = false];

reserved 5;
reserved 4, 5;
optional uint64 transaction_fingerprint_id = 11
[(gogoproto.nullable) = false,
(gogoproto.customname) = "TransactionFingerprintID",
Expand All @@ -235,11 +237,12 @@ message AggregatedStatementMetadata {
repeated string databases = 6;
optional bool implicit_txn = 7 [(gogoproto.nullable) = false];
optional int64 dist_sql_count = 8 [(gogoproto.nullable) = false, (gogoproto.customname) = "DistSQLCount"];
optional int64 failed_count = 9 [(gogoproto.nullable) = false];
optional int64 full_scan_count = 10 [(gogoproto.nullable) = false];
optional int64 vec_count = 11 [(gogoproto.nullable) = false];
optional int64 total_count = 12 [(gogoproto.nullable) = false];
optional string fingerprint_id = 13 [(gogoproto.nullable) = false, (gogoproto.customname) = "FingerprintID"];

reserved 9;
}

// CollectedStatementStatistics wraps collected timings and metadata for some
Expand Down
2 changes: 0 additions & 2 deletions pkg/sql/conn_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -2872,7 +2872,6 @@ func (ex *connExecutor) execCopyOut(
f.FormatNode(cmd.Stmt)
stmtFingerprintID := appstatspb.ConstructStatementFingerprintID(
f.CloseAndGetString(),
copyErr != nil,
ex.implicitTxn(),
ex.planner.CurrentDatabase(),
)
Expand Down Expand Up @@ -3137,7 +3136,6 @@ func (ex *connExecutor) execCopyIn(
f.FormatNode(cmd.Stmt)
stmtFingerprintID := appstatspb.ConstructStatementFingerprintID(
f.CloseAndGetString(),
copyErr != nil,
ex.implicitTxn(),
ex.planner.CurrentDatabase(),
)
Expand Down
2 changes: 0 additions & 2 deletions pkg/sql/conn_executor_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,6 @@ func (ex *connExecutor) execStmtInOpenState(
f.FormatNode(ast)
stmtFingerprintID := appstatspb.ConstructStatementFingerprintID(
f.CloseAndGetString(),
execErr != nil,
ex.implicitTxn(),
p.CurrentDatabase(),
)
Expand Down Expand Up @@ -2557,7 +2556,6 @@ func (ex *connExecutor) execStmtInNoTxnState(
f.FormatNode(stmt.AST)
stmtFingerprintID := appstatspb.ConstructStatementFingerprintID(
f.CloseAndGetString(),
execErr != nil,
ex.implicitTxn(),
p.CurrentDatabase(),
)
Expand Down