diff --git a/.changelog/17582.txt b/.changelog/17582.txt new file mode 100644 index 000000000000..122b9df98116 --- /dev/null +++ b/.changelog/17582.txt @@ -0,0 +1,3 @@ +```release-note:feature +cli: `consul operator raft list-peers` command shows the number of commits each follower is trailing the leader by to aid in troubleshooting. +``` diff --git a/agent/consul/operator_raft_endpoint.go b/agent/consul/operator_raft_endpoint.go index 328f8ff964e0..f5678fffdde5 100644 --- a/agent/consul/operator_raft_endpoint.go +++ b/agent/consul/operator_raft_endpoint.go @@ -45,6 +45,12 @@ func (op *Operator) RaftGetConfiguration(args *structs.DCSpecificRequest, reply serverMap[raft.ServerAddress(addr)] = member } + serverIDLastIndexMap := make(map[raft.ServerID]uint64) + + for _, serverState := range op.srv.autopilot.GetState().Servers { + serverIDLastIndexMap[serverState.Server.ID] = serverState.Stats.LastIndex + } + // Fill out the reply. leader := op.srv.raft.Leader() reply.Index = future.Index() @@ -63,6 +69,7 @@ func (op *Operator) RaftGetConfiguration(args *structs.DCSpecificRequest, reply Leader: server.Address == leader, Voter: server.Suffrage == raft.Voter, ProtocolVersion: raftProtocolVersion, + LastIndex: serverIDLastIndexMap[server.ID], } reply.Servers = append(reply.Servers, entry) } diff --git a/agent/consul/operator_raft_endpoint_test.go b/agent/consul/operator_raft_endpoint_test.go index be60ec66a317..e4f322130a75 100644 --- a/agent/consul/operator_raft_endpoint_test.go +++ b/agent/consul/operator_raft_endpoint_test.go @@ -47,6 +47,13 @@ func TestOperator_RaftGetConfiguration(t *testing.T) { if len(future.Configuration().Servers) != 1 { t.Fatalf("bad: %v", future.Configuration().Servers) } + + serverIDLastIndexMap := make(map[raft.ServerID]uint64) + + for _, serverState := range s1.autopilot.GetState().Servers { + serverIDLastIndexMap[serverState.Server.ID] = serverState.Stats.LastIndex + } + me := future.Configuration().Servers[0] expected := structs.RaftConfigurationResponse{ Servers: []*structs.RaftServer{ @@ -57,6 +64,7 @@ func TestOperator_RaftGetConfiguration(t *testing.T) { Leader: true, Voter: true, ProtocolVersion: "3", + LastIndex: serverIDLastIndexMap[me.ID], }, }, Index: future.Index(), @@ -110,6 +118,10 @@ func TestOperator_RaftGetConfiguration_ACLDeny(t *testing.T) { if len(future.Configuration().Servers) != 1 { t.Fatalf("bad: %v", future.Configuration().Servers) } + serverIDLastIndexMap := make(map[raft.ServerID]uint64) + for _, serverState := range s1.autopilot.GetState().Servers { + serverIDLastIndexMap[serverState.Server.ID] = serverState.Stats.LastIndex + } me := future.Configuration().Servers[0] expected := structs.RaftConfigurationResponse{ Servers: []*structs.RaftServer{ @@ -120,6 +132,7 @@ func TestOperator_RaftGetConfiguration_ACLDeny(t *testing.T) { Leader: true, Voter: true, ProtocolVersion: "3", + LastIndex: serverIDLastIndexMap[me.ID], }, }, Index: future.Index(), diff --git a/agent/structs/operator.go b/agent/structs/operator.go index be4507c325a8..5cb1a92ca935 100644 --- a/agent/structs/operator.go +++ b/agent/structs/operator.go @@ -31,6 +31,9 @@ type RaftServer struct { // it's a non-voting server, which will be added in a future release of // Consul. Voter bool + + // LastIndex is the last log index this server has a record of in its Raft log. + LastIndex uint64 } // RaftConfigurationResponse is returned when querying for the current Raft diff --git a/api/operator_raft.go b/api/operator_raft.go index 1b48fdcd9b89..7e18f9304bb1 100644 --- a/api/operator_raft.go +++ b/api/operator_raft.go @@ -25,6 +25,9 @@ type RaftServer struct { // it's a non-voting server, which will be added in a future release of // Consul. Voter bool + + // LastIndex is the last log index this server has a record of in its Raft log. + LastIndex uint64 } // RaftConfiguration is returned when querying for the current Raft configuration. diff --git a/command/operator/raft/listpeers/operator_raft_list.go b/command/operator/raft/listpeers/operator_raft_list.go index 98934d8d0eb2..a54ccb9d3ea2 100644 --- a/command/operator/raft/listpeers/operator_raft_list.go +++ b/command/operator/raft/listpeers/operator_raft_list.go @@ -67,8 +67,24 @@ func raftListPeers(client *api.Client, stale bool) (string, error) { return "", fmt.Errorf("Failed to retrieve raft configuration: %v", err) } + leaderLastCommitIndex := uint64(0) + serverIdLastIndexMap := make(map[string]uint64) + + for _, raftServer := range reply.Servers { + serverIdLastIndexMap[raftServer.ID] = raftServer.LastIndex + } + + for _, s := range reply.Servers { + if s.Leader { + lastIndex, ok := serverIdLastIndexMap[s.ID] + if ok { + leaderLastCommitIndex = lastIndex + } + } + } + // Format it as a nice table. - result := []string{"Node\x1fID\x1fAddress\x1fState\x1fVoter\x1fRaftProtocol"} + result := []string{"Node\x1fID\x1fAddress\x1fState\x1fVoter\x1fRaftProtocol\x1fCommit Index\x1fTrails Leader By"} for _, s := range reply.Servers { raftProtocol := s.ProtocolVersion @@ -79,8 +95,20 @@ func raftListPeers(client *api.Client, stale bool) (string, error) { if s.Leader { state = "leader" } - result = append(result, fmt.Sprintf("%s\x1f%s\x1f%s\x1f%s\x1f%v\x1f%s", - s.Node, s.ID, s.Address, state, s.Voter, raftProtocol)) + + trailsLeaderByText := "-" + serverLastIndex, ok := serverIdLastIndexMap[s.ID] + if ok { + trailsLeaderBy := leaderLastCommitIndex - serverLastIndex + trailsLeaderByText = fmt.Sprintf("%d commits", trailsLeaderBy) + if s.Leader { + trailsLeaderByText = "-" + } else if trailsLeaderBy == 1 { + trailsLeaderByText = fmt.Sprintf("%d commit", trailsLeaderBy) + } + } + result = append(result, fmt.Sprintf("%s\x1f%s\x1f%s\x1f%s\x1f%v\x1f%s\x1f%v\x1f%s", + s.Node, s.ID, s.Address, state, s.Voter, raftProtocol, serverLastIndex, trailsLeaderByText)) } return columnize.Format(result, &columnize.Config{Delim: string([]byte{0x1f})}), nil diff --git a/command/operator/raft/listpeers/operator_raft_list_test.go b/command/operator/raft/listpeers/operator_raft_list_test.go index 8d53e4945398..c40ae8c48d7b 100644 --- a/command/operator/raft/listpeers/operator_raft_list_test.go +++ b/command/operator/raft/listpeers/operator_raft_list_test.go @@ -25,7 +25,7 @@ func TestOperatorRaftListPeersCommand(t *testing.T) { a := agent.NewTestAgent(t, ``) defer a.Shutdown() - expected := fmt.Sprintf("%s %s 127.0.0.1:%d leader true 3", + expected := fmt.Sprintf("%s %s 127.0.0.1:%d leader true 3 1 -", a.Config.NodeName, a.Config.NodeID, a.Config.ServerPort) // Test the list-peers subcommand directly diff --git a/website/content/commands/operator/raft.mdx b/website/content/commands/operator/raft.mdx index b6a10dab6e68..d10399876a51 100644 --- a/website/content/commands/operator/raft.mdx +++ b/website/content/commands/operator/raft.mdx @@ -46,10 +46,10 @@ Usage: `consul operator raft list-peers -stale=[true|false]` The output looks like this: ```text -Node ID Address State Voter RaftProtocol -alice 127.0.0.1:8300 127.0.0.1:8300 follower true 2 -bob 127.0.0.2:8300 127.0.0.2:8300 leader true 3 -carol 127.0.0.3:8300 127.0.0.3:8300 follower true 2 +Node ID Address State Voter RaftProtocol Commit Index Trails Leader By +alice 127.0.0.1:8300 127.0.0.1:8300 follower true 2 1167 0 commits +bob 127.0.0.2:8300 127.0.0.2:8300 leader true 3 1167 - +carol 127.0.0.3:8300 127.0.0.3:8300 follower true 2 1159 8 commits ``` `Node` is the node name of the server, as known to Consul, or "(unknown)" if @@ -70,7 +70,7 @@ configuration. - `-stale` - Enables non-leader servers to provide cluster state information. If the cluster is in an outage state without a leader, - we recommend setting this option to `true. + we recommend setting this option to `true`. Default is `false`. ## remove-peer