Skip to content

Commit

Permalink
kvcoord: Add observability for actively running range feeds.
Browse files Browse the repository at this point in the history
Add a `crdb_internal.active_range_feeds` virtual table which lists
all currently running range feeds on a node.

The table lists the span for which the range feed
was started, along with all of the ranges making up that span.
Per range information, including the node where the range feed is running
against, as well as the timestamp when the last non-error event has
been seen from the range is recorded in the table.

```
root@:26257/defaultdb> select * from crdb_internal.active_range_feeds order by id, range_start;
       id      |              tags              | range_start |  range_end  |            startts             | diff  | node_id |     partial_range_start      |      partial_range_end       |            resolved            | last_event_utc
---------------+--------------------------------+-------------+-------------+--------------------------------+-------+---------+------------------------------+------------------------------+--------------------------------+-----------------
  824655839232 | n?,rangefeed=table-stats-cache | /Table/20   | /Table/21   | 1629329537656207000.0000000000 | false |       1 | /Table/20                    | /Table/21                    | 1629335390881550000.0000000000 |           NULL
  824657576368 | n1,rangefeed=lease             | /Table/3    | /Table/4    | 0.0000000000                   | false |       1 | /Table/3                     | /Table/4                     | 1629335390881550000.0000000000 |           NULL
  824680146768 | n1,job=685578580453523457      | /Table/53/2 | /Table/53/3 | 1629329458537238000.0000000000 | false |       1 | /Table/53/2                  | /Table/53/2/"\x80"           | 1629335390881550000.0000000000 |           NULL
  824680146768 | n1,job=685578580453523457      | /Table/53/2 | /Table/53/3 | 1629329458537238000.0000000000 | false |       1 | /Table/53/2/"\x80"/PrefixEnd | /Table/53/3                  | 1629335390881550000.0000000000 |           NULL
  824680146768 | n1,job=685578580453523457      | /Table/53/2 | /Table/53/3 | 1629329458537238000.0000000000 | false |       1 | /Table/53/2/"\x80"           | /Table/53/2/"\x80"/PrefixEnd | 1629335390881550000.0000000000 |           NULL
(11 rows)
```

Release Notes: Improve range feed observability by adding a
`crdb_internal.active_range_feeds` virtual table which lists
all currently executing range feeds on the node.
  • Loading branch information
Yevgeniy Miretskiy committed Aug 19, 2021
1 parent b16c86d commit 165fb88
Show file tree
Hide file tree
Showing 12 changed files with 244 additions and 17 deletions.
27 changes: 27 additions & 0 deletions pkg/ccl/changefeedccl/changefeed_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4394,3 +4394,30 @@ func TestChangefeedOnErrorOption(t *testing.T) {
t.Run(`kafka`, kafkaTest(testFn))
t.Run(`webhook`, webhookTest(testFn))
}

func TestDistSenderRangeFeedPopulatesVirtualTable(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

s, db, _ := serverutils.StartServer(t, base.TestServerArgs{
Knobs: base.TestingKnobs{
JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals(),
},
})
defer s.Stopper().Stop(context.Background())

sqlDB := sqlutils.MakeSQLRunner(db)
sqlDB.Exec(t, `
SET CLUSTER SETTING kv.rangefeed.enabled='true';
CREATE TABLE tbl (a INT, b STRING);
INSERT INTO tbl VALUES (1, 'one'), (2, 'two'), (3, 'three');
CREATE CHANGEFEED FOR tbl INTO 'null://';
`)

var tableID int
sqlDB.QueryRow(t, "SELECT table_id FROM crdb_internal.tables WHERE name='tbl'").Scan(&tableID)
numRangesQuery := fmt.Sprintf(
"SELECT count(*) FROM crdb_internal.active_range_feeds WHERE range_start LIKE '/Table/%d/%%'",
tableID)
sqlDB.CheckQueryResultsRetry(t, numRangesQuery, [][]string{{"1"}})
}
5 changes: 5 additions & 0 deletions pkg/cli/testdata/zip/partial1
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null
[node 1] retrieving SQL data for crdb_internal.node_transaction_statistics... writing output: debug/nodes/1/crdb_internal.node_transaction_statistics.txt... done
[node 1] retrieving SQL data for crdb_internal.node_transactions... writing output: debug/nodes/1/crdb_internal.node_transactions.txt... done
[node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done
[node 1] retrieving SQL data for crdb_internal.active_range_feeds... writing output: debug/nodes/1/crdb_internal.active_range_feeds.txt... done
[node 1] requesting data for debug/nodes/1/details... received response... converting to JSON... writing binary output: debug/nodes/1/details.json... done
[node 1] requesting data for debug/nodes/1/gossip... received response... converting to JSON... writing binary output: debug/nodes/1/gossip.json... done
[node 1] requesting data for debug/nodes/1/enginestats... received response... converting to JSON... writing binary output: debug/nodes/1/enginestats.json... done
Expand Down Expand Up @@ -169,6 +170,9 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null
[node 2] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/2/crdb_internal.node_txn_stats.txt...
[node 2] retrieving SQL data for crdb_internal.node_txn_stats: last request failed: dial tcp ...
[node 2] retrieving SQL data for crdb_internal.node_txn_stats: creating error output: debug/nodes/2/crdb_internal.node_txn_stats.txt.err.txt... done
[node 2] retrieving SQL data for crdb_internal.active_range_feeds... writing output: debug/nodes/2/crdb_internal.active_range_feeds.txt...
[node 2] retrieving SQL data for crdb_internal.active_range_feeds: last request failed: dial tcp ...
[node 2] retrieving SQL data for crdb_internal.active_range_feeds: creating error output: debug/nodes/2/crdb_internal.active_range_feeds.txt.err.txt... done
[node 2] requesting data for debug/nodes/2/details... received response...
[node 2] requesting data for debug/nodes/2/details: last request failed: rpc error: ...
[node 2] requesting data for debug/nodes/2/details: creating error output: debug/nodes/2/details.json.err.txt... done
Expand Down Expand Up @@ -216,6 +220,7 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null
[node 3] retrieving SQL data for crdb_internal.node_transaction_statistics... writing output: debug/nodes/3/crdb_internal.node_transaction_statistics.txt... done
[node 3] retrieving SQL data for crdb_internal.node_transactions... writing output: debug/nodes/3/crdb_internal.node_transactions.txt... done
[node 3] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/3/crdb_internal.node_txn_stats.txt... done
[node 3] retrieving SQL data for crdb_internal.active_range_feeds... writing output: debug/nodes/3/crdb_internal.active_range_feeds.txt... done
[node 3] requesting data for debug/nodes/3/details... received response... converting to JSON... writing binary output: debug/nodes/3/details.json... done
[node 3] requesting data for debug/nodes/3/gossip... received response... converting to JSON... writing binary output: debug/nodes/3/gossip.json... done
[node 3] requesting data for debug/nodes/3/enginestats... received response... converting to JSON... writing binary output: debug/nodes/3/enginestats.json... done
Expand Down
2 changes: 2 additions & 0 deletions pkg/cli/testdata/zip/partial1_excluded
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0
[node 1] retrieving SQL data for crdb_internal.node_transaction_statistics... writing output: debug/nodes/1/crdb_internal.node_transaction_statistics.txt... done
[node 1] retrieving SQL data for crdb_internal.node_transactions... writing output: debug/nodes/1/crdb_internal.node_transactions.txt... done
[node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done
[node 1] retrieving SQL data for crdb_internal.active_range_feeds... writing output: debug/nodes/1/crdb_internal.active_range_feeds.txt... done
[node 1] requesting data for debug/nodes/1/details... received response... converting to JSON... writing binary output: debug/nodes/1/details.json... done
[node 1] requesting data for debug/nodes/1/gossip... received response... converting to JSON... writing binary output: debug/nodes/1/gossip.json... done
[node 1] requesting data for debug/nodes/1/enginestats... received response... converting to JSON... writing binary output: debug/nodes/1/enginestats.json... done
Expand Down Expand Up @@ -134,6 +135,7 @@ debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0
[node 3] retrieving SQL data for crdb_internal.node_transaction_statistics... writing output: debug/nodes/3/crdb_internal.node_transaction_statistics.txt... done
[node 3] retrieving SQL data for crdb_internal.node_transactions... writing output: debug/nodes/3/crdb_internal.node_transactions.txt... done
[node 3] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/3/crdb_internal.node_txn_stats.txt... done
[node 3] retrieving SQL data for crdb_internal.active_range_feeds... writing output: debug/nodes/3/crdb_internal.active_range_feeds.txt... done
[node 3] requesting data for debug/nodes/3/details... received response... converting to JSON... writing binary output: debug/nodes/3/details.json... done
[node 3] requesting data for debug/nodes/3/gossip... received response... converting to JSON... writing binary output: debug/nodes/3/gossip.json... done
[node 3] requesting data for debug/nodes/3/enginestats... received response... converting to JSON... writing binary output: debug/nodes/3/enginestats.json... done
Expand Down
2 changes: 2 additions & 0 deletions pkg/cli/testdata/zip/partial2
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null
[node 1] retrieving SQL data for crdb_internal.node_transaction_statistics... writing output: debug/nodes/1/crdb_internal.node_transaction_statistics.txt... done
[node 1] retrieving SQL data for crdb_internal.node_transactions... writing output: debug/nodes/1/crdb_internal.node_transactions.txt... done
[node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done
[node 1] retrieving SQL data for crdb_internal.active_range_feeds... writing output: debug/nodes/1/crdb_internal.active_range_feeds.txt... done
[node 1] requesting data for debug/nodes/1/details... received response... converting to JSON... writing binary output: debug/nodes/1/details.json... done
[node 1] requesting data for debug/nodes/1/gossip... received response... converting to JSON... writing binary output: debug/nodes/1/gossip.json... done
[node 1] requesting data for debug/nodes/1/enginestats... received response... converting to JSON... writing binary output: debug/nodes/1/enginestats.json... done
Expand Down Expand Up @@ -133,6 +134,7 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null
[node 3] retrieving SQL data for crdb_internal.node_transaction_statistics... writing output: debug/nodes/3/crdb_internal.node_transaction_statistics.txt... done
[node 3] retrieving SQL data for crdb_internal.node_transactions... writing output: debug/nodes/3/crdb_internal.node_transactions.txt... done
[node 3] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/3/crdb_internal.node_txn_stats.txt... done
[node 3] retrieving SQL data for crdb_internal.active_range_feeds... writing output: debug/nodes/3/crdb_internal.active_range_feeds.txt... done
[node 3] requesting data for debug/nodes/3/details... received response... converting to JSON... writing binary output: debug/nodes/3/details.json... done
[node 3] requesting data for debug/nodes/3/gossip... received response... converting to JSON... writing binary output: debug/nodes/3/gossip.json... done
[node 3] requesting data for debug/nodes/3/enginestats... received response... converting to JSON... writing binary output: debug/nodes/3/enginestats.json... done
Expand Down
1 change: 1 addition & 0 deletions pkg/cli/testdata/zip/testzip
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null
[node 1] retrieving SQL data for crdb_internal.node_transaction_statistics... writing output: debug/nodes/1/crdb_internal.node_transaction_statistics.txt... done
[node 1] retrieving SQL data for crdb_internal.node_transactions... writing output: debug/nodes/1/crdb_internal.node_transactions.txt... done
[node 1] retrieving SQL data for crdb_internal.node_txn_stats... writing output: debug/nodes/1/crdb_internal.node_txn_stats.txt... done
[node 1] retrieving SQL data for crdb_internal.active_range_feeds... writing output: debug/nodes/1/crdb_internal.active_range_feeds.txt... done
[node 1] requesting data for debug/nodes/1/details... received response... converting to JSON... writing binary output: debug/nodes/1/details.json... done
[node 1] requesting data for debug/nodes/1/gossip... received response... converting to JSON... writing binary output: debug/nodes/1/gossip.json... done
[node 1] requesting data for debug/nodes/1/enginestats... received response... converting to JSON... writing binary output: debug/nodes/1/enginestats.json... done
Expand Down
9 changes: 9 additions & 0 deletions pkg/cli/testdata/zip/testzip_concurrent
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ zip
[node 1] requesting stacks: done
[node 1] requesting stacks: received response...
[node 1] requesting stacks: writing binary output: debug/nodes/1/stacks.txt...
[node 1] retrieving SQL data for crdb_internal.active_range_feeds...
[node 1] retrieving SQL data for crdb_internal.active_range_feeds: done
[node 1] retrieving SQL data for crdb_internal.active_range_feeds: writing output: debug/nodes/1/crdb_internal.active_range_feeds.txt...
[node 1] retrieving SQL data for crdb_internal.feature_usage...
[node 1] retrieving SQL data for crdb_internal.feature_usage: done
[node 1] retrieving SQL data for crdb_internal.feature_usage: writing output: debug/nodes/1/crdb_internal.feature_usage.txt...
Expand Down Expand Up @@ -438,6 +441,9 @@ zip
[node 2] requesting stacks: done
[node 2] requesting stacks: received response...
[node 2] requesting stacks: writing binary output: debug/nodes/2/stacks.txt...
[node 2] retrieving SQL data for crdb_internal.active_range_feeds...
[node 2] retrieving SQL data for crdb_internal.active_range_feeds: done
[node 2] retrieving SQL data for crdb_internal.active_range_feeds: writing output: debug/nodes/2/crdb_internal.active_range_feeds.txt...
[node 2] retrieving SQL data for crdb_internal.feature_usage...
[node 2] retrieving SQL data for crdb_internal.feature_usage: done
[node 2] retrieving SQL data for crdb_internal.feature_usage: writing output: debug/nodes/2/crdb_internal.feature_usage.txt...
Expand Down Expand Up @@ -710,6 +716,9 @@ zip
[node 3] requesting stacks: done
[node 3] requesting stacks: received response...
[node 3] requesting stacks: writing binary output: debug/nodes/3/stacks.txt...
[node 3] retrieving SQL data for crdb_internal.active_range_feeds...
[node 3] retrieving SQL data for crdb_internal.active_range_feeds: done
[node 3] retrieving SQL data for crdb_internal.active_range_feeds: writing output: debug/nodes/3/crdb_internal.active_range_feeds.txt...
[node 3] retrieving SQL data for crdb_internal.feature_usage...
[node 3] retrieving SQL data for crdb_internal.feature_usage: done
[node 3] retrieving SQL data for crdb_internal.feature_usage: writing output: debug/nodes/3/crdb_internal.feature_usage.txt...
Expand Down
1 change: 1 addition & 0 deletions pkg/cli/zip_per_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ var debugZipTablesPerNode = []string{
"crdb_internal.node_transaction_statistics",
"crdb_internal.node_transactions",
"crdb_internal.node_txn_stats",
"crdb_internal.active_range_feeds",
}

// collectCPUProfiles collects CPU profiles in parallel over all nodes
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvclient/kvcoord/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ go_library(
"//pkg/util/errorutil/unimplemented",
"//pkg/util/grpcutil",
"//pkg/util/hlc",
"//pkg/util/iterutil",
"//pkg/util/log",
"//pkg/util/metric",
"//pkg/util/quotapool",
Expand Down
4 changes: 4 additions & 0 deletions pkg/kv/kvclient/kvcoord/dist_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"fmt"
"runtime"
"strings"
"sync"
"sync/atomic"
"time"
"unsafe"
Expand Down Expand Up @@ -289,6 +290,9 @@ type DistSender struct {
// the descriptor, instead of trying to reorder them by latency. The knob
// only applies to requests sent with the LEASEHOLDER routing policy.
dontReorderReplicas bool

// Currently executing range feeds.
activeRangeFeeds sync.Map // // map[*rangeFeedRegistry]nil
}

var _ kv.Sender = &DistSender{}
Expand Down
Loading

0 comments on commit 165fb88

Please sign in to comment.