Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

usagemetrics: add cluster members to metrics API #10340

Merged
merged 9 commits into from
Jun 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/10340.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
telemetry: The usage data in the `metrics` API now includes cluster member counts, reporting clients on a per segment basis.
```
12 changes: 10 additions & 2 deletions agent/consul/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,15 @@ func NewServer(config *Config, flat Deps) (*Server, error) {
WithStateProvider(s.fsm).
WithLogger(s.logger).
WithDatacenter(s.config.Datacenter).
WithReportingInterval(s.config.MetricsReportingInterval),
WithReportingInterval(s.config.MetricsReportingInterval).
WithGetMembersFunc(func() []serf.Member {
members, err := s.LANMembersAllSegments()
if err != nil {
return []serf.Member{}
}

return members
}),
)
if err != nil {
s.Shutdown()
Expand Down Expand Up @@ -1138,7 +1146,7 @@ func (s *Server) LANMembers() []serf.Member {
return s.serfLAN.Members()
}

// WANMembers is used to return the members of the LAN cluster
// WANMembers is used to return the members of the WAN cluster
func (s *Server) WANMembers() []serf.Member {
if s.serfWAN == nil {
return nil
Expand Down
86 changes: 86 additions & 0 deletions agent/consul/usagemetrics/usagemetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/serf/serf"
)

var Gauges = []prometheus.GaugeDefinition{
Expand All @@ -26,15 +27,26 @@ var Gauges = []prometheus.GaugeDefinition{
Name: []string{"consul", "state", "service_instances"},
Help: "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.",
},
{
Name: []string{"consul", "members", "clients"},
Help: "Measures the current number of client agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6.",
},
{
Name: []string{"consul", "members", "servers"},
Help: "Measures the current number of server agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6.",
},
}

type getMembersFunc func() []serf.Member

// Config holds the settings for various parameters for the
// UsageMetricsReporter
type Config struct {
logger hclog.Logger
metricLabels []metrics.Label
stateProvider StateProvider
tickerInterval time.Duration
getMembersFunc getMembersFunc
}

// WithDatacenter adds the datacenter as a label to all metrics emitted by the
Expand Down Expand Up @@ -63,6 +75,12 @@ func (c *Config) WithStateProvider(sp StateProvider) *Config {
return c
}

// WithGetMembersFunc specifies the function used to identify cluster members
func (c *Config) WithGetMembersFunc(fn getMembersFunc) *Config {
c.getMembersFunc = fn
return c
}

// StateProvider defines an inteface for retrieving a state.Store handle. In
// non-test code, this is satisfied by the fsm.FSM struct.
type StateProvider interface {
Expand All @@ -77,13 +95,18 @@ type UsageMetricsReporter struct {
metricLabels []metrics.Label
stateProvider StateProvider
tickerInterval time.Duration
getMembersFunc getMembersFunc
}

func NewUsageMetricsReporter(cfg *Config) (*UsageMetricsReporter, error) {
if cfg.stateProvider == nil {
return nil, errors.New("must provide a StateProvider to usage reporter")
}

if cfg.getMembersFunc == nil {
return nil, errors.New("must provide a getMembersFunc to usage reporter")
}

if cfg.logger == nil {
cfg.logger = hclog.NewNullLogger()
}
Expand All @@ -98,6 +121,7 @@ func NewUsageMetricsReporter(cfg *Config) (*UsageMetricsReporter, error) {
stateProvider: cfg.stateProvider,
metricLabels: cfg.metricLabels,
tickerInterval: cfg.tickerInterval,
getMembersFunc: cfg.getMembersFunc,
}

return u, nil
Expand Down Expand Up @@ -137,4 +161,66 @@ func (u *UsageMetricsReporter) runOnce() {
}

u.emitServiceUsage(serviceUsage)

servers, clients := u.memberUsage()
u.emitMemberUsage(servers, clients)
}

func (u *UsageMetricsReporter) memberUsage() (int, map[string]int) {
if u.getMembersFunc == nil {
mkeeler marked this conversation as resolved.
Show resolved Hide resolved
return 0, nil
}

mems := u.getMembersFunc()
if len(mems) <= 0 {
u.logger.Warn("cluster reported zero members")
return 0, nil
}

servers := 0
clients := make(map[string]int)

for _, m := range mems {
if m.Status != serf.StatusAlive {
continue
}

switch m.Tags["role"] {
case "node":
clients[m.Tags["segment"]]++
mkeeler marked this conversation as resolved.
Show resolved Hide resolved
case "consul":
servers++
}
}

return servers, clients
}

func (u *UsageMetricsReporter) emitMemberUsage(servers int, clients map[string]int) {
totalClients := 0

for seg, c := range clients {
segmentLabel := metrics.Label{Name: "segment", Value: seg}
labels := append([]metrics.Label{segmentLabel}, u.metricLabels...)

metrics.SetGaugeWithLabels(
[]string{"consul", "members", "clients"},
float32(c),
labels,
)

totalClients += c
}

metrics.SetGaugeWithLabels(
[]string{"consul", "members", "clients"},
float32(totalClients),
u.metricLabels,
)

metrics.SetGaugeWithLabels(
[]string{"consul", "members", "servers"},
float32(servers),
u.metricLabels,
)
}
85 changes: 79 additions & 6 deletions agent/consul/usagemetrics/usagemetrics_oss_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/serf/serf"
mkeeler marked this conversation as resolved.
Show resolved Hide resolved
)

func newStateStore() (*state.Store, error) {
Expand All @@ -21,6 +22,7 @@ func newStateStore() (*state.Store, error) {
func TestUsageReporter_emitServiceUsage_OSS(t *testing.T) {
type testCase struct {
modfiyStateStore func(t *testing.T, s *state.Store)
getMembersFunc getMembersFunc
expectedGauges map[string]metrics.GaugeValue
}
cases := map[string]testCase{
Expand All @@ -45,24 +47,64 @@ func TestUsageReporter_emitServiceUsage_OSS(t *testing.T) {
{Name: "datacenter", Value: "dc1"},
},
},
"consul.usage.test.consul.members.clients;datacenter=dc1": {
Name: "consul.usage.test.consul.members.clients",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
},
},
"consul.usage.test.consul.members.servers;datacenter=dc1": {
Name: "consul.usage.test.consul.members.servers",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
},
},
},
getMembersFunc: func() []serf.Member { return []serf.Member{} },
},
"nodes-and-services": {
modfiyStateStore: func(t *testing.T, s *state.Store) {
require.Nil(t, s.EnsureNode(1, &structs.Node{Node: "foo", Address: "127.0.0.1"}))
require.Nil(t, s.EnsureNode(2, &structs.Node{Node: "bar", Address: "127.0.0.2"}))
require.Nil(t, s.EnsureNode(3, &structs.Node{Node: "baz", Address: "127.0.0.2"}))
require.Nil(t, s.EnsureNode(4, &structs.Node{Node: "qux", Address: "127.0.0.3"}))

// Typical services and some consul services spread across two nodes
require.Nil(t, s.EnsureService(4, "foo", &structs.NodeService{ID: "db", Service: "db", Tags: nil, Address: "", Port: 5000}))
require.Nil(t, s.EnsureService(5, "bar", &structs.NodeService{ID: "api", Service: "api", Tags: nil, Address: "", Port: 5000}))
require.Nil(t, s.EnsureService(6, "foo", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil}))
require.Nil(t, s.EnsureService(7, "bar", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil}))
require.Nil(t, s.EnsureService(5, "foo", &structs.NodeService{ID: "db", Service: "db", Tags: nil, Address: "", Port: 5000}))
require.Nil(t, s.EnsureService(6, "bar", &structs.NodeService{ID: "api", Service: "api", Tags: nil, Address: "", Port: 5000}))
require.Nil(t, s.EnsureService(7, "foo", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil}))
require.Nil(t, s.EnsureService(8, "bar", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil}))
},
getMembersFunc: func() []serf.Member {
return []serf.Member{
{
Name: "foo",
Tags: map[string]string{"role": "consul"},
Status: serf.StatusAlive,
},
{
Name: "bar",
Tags: map[string]string{"role": "consul"},
Status: serf.StatusAlive,
},
{
Name: "baz",
Tags: map[string]string{"role": "node", "segment": "a"},
Status: serf.StatusAlive,
},
{
Name: "qux",
Tags: map[string]string{"role": "node", "segment": "b"},
Status: serf.StatusAlive,
},
}
},
expectedGauges: map[string]metrics.GaugeValue{
"consul.usage.test.consul.state.nodes;datacenter=dc1": {
Name: "consul.usage.test.consul.state.nodes",
Value: 3,
Value: 4,
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
},
"consul.usage.test.consul.state.services;datacenter=dc1": {
Expand All @@ -79,6 +121,36 @@ func TestUsageReporter_emitServiceUsage_OSS(t *testing.T) {
{Name: "datacenter", Value: "dc1"},
},
},
"consul.usage.test.consul.members.clients;datacenter=dc1": {
Name: "consul.usage.test.consul.members.clients",
Value: 2,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
},
},
"consul.usage.test.consul.members.servers;datacenter=dc1": {
Name: "consul.usage.test.consul.members.servers",
Value: 2,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
},
},
"consul.usage.test.consul.members.clients;segment=a;datacenter=dc1": {
Name: "consul.usage.test.consul.members.clients",
Value: 1,
Labels: []metrics.Label{
{Name: "segment", Value: "a"},
{Name: "datacenter", Value: "dc1"},
},
},
"consul.usage.test.consul.members.clients;segment=b;datacenter=dc1": {
Name: "consul.usage.test.consul.members.clients",
Value: 1,
Labels: []metrics.Label{
{Name: "segment", Value: "b"},
{Name: "datacenter", Value: "dc1"},
},
},
},
},
}
Expand All @@ -102,7 +174,8 @@ func TestUsageReporter_emitServiceUsage_OSS(t *testing.T) {
new(Config).
WithStateProvider(mockStateProvider).
WithLogger(testutil.Logger(t)).
WithDatacenter("dc1"),
WithDatacenter("dc1").
WithGetMembersFunc(tcase.getMembersFunc),
)
require.NoError(t, err)

Expand Down
35 changes: 34 additions & 1 deletion agent/consul/usagemetrics/usagemetrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/serf/serf"
)

type mockStateProvider struct {
Expand All @@ -25,6 +26,7 @@ func (m *mockStateProvider) State() *state.Store {
func TestUsageReporter_Run_Nodes(t *testing.T) {
type testCase struct {
modfiyStateStore func(t *testing.T, s *state.Store)
getMembersFunc getMembersFunc
expectedGauges map[string]metrics.GaugeValue
}
cases := map[string]testCase{
Expand All @@ -36,19 +38,49 @@ func TestUsageReporter_Run_Nodes(t *testing.T) {
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
},
},
getMembersFunc: func() []serf.Member { return []serf.Member{} },
},
"nodes": {
modfiyStateStore: func(t *testing.T, s *state.Store) {
require.Nil(t, s.EnsureNode(1, &structs.Node{Node: "foo", Address: "127.0.0.1"}))
require.Nil(t, s.EnsureNode(2, &structs.Node{Node: "bar", Address: "127.0.0.2"}))
require.Nil(t, s.EnsureNode(3, &structs.Node{Node: "baz", Address: "127.0.0.2"}))
},
getMembersFunc: func() []serf.Member {
return []serf.Member{
{
Name: "foo",
Tags: map[string]string{"role": "consul"},
Status: serf.StatusAlive,
},
{
Name: "bar",
Tags: map[string]string{"role": "consul"},
Status: serf.StatusAlive,
},
{
Name: "baz",
Tags: map[string]string{"role": "node"},
Status: serf.StatusAlive,
},
}
},
expectedGauges: map[string]metrics.GaugeValue{
"consul.usage.test.consul.state.nodes;datacenter=dc1": {
Name: "consul.usage.test.consul.state.nodes",
Value: 3,
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
},
"consul.usage.test.consul.members.clients;datacenter=dc1": {
Name: "consul.usage.test.consul.members.clients",
Value: 1,
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
},
"consul.usage.test.consul.members.servers;datacenter=dc1": {
Name: "consul.usage.test.consul.members.servers",
Value: 2,
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
},
},
},
}
Expand All @@ -73,7 +105,8 @@ func TestUsageReporter_Run_Nodes(t *testing.T) {
new(Config).
WithStateProvider(mockStateProvider).
WithLogger(testutil.Logger(t)).
WithDatacenter("dc1"),
WithDatacenter("dc1").
WithGetMembersFunc(tcase.getMembersFunc),
)
require.NoError(t, err)

Expand Down
Loading