Skip to content

Commit

Permalink
Add LastStatusModifyTime field to HealthChecks
Browse files Browse the repository at this point in the history
This field is updated when the check status changes and allows to adjust
behaviour based on how much time has passed since the check is "passing"
for example.
  • Loading branch information
Thibault Gilles committed Nov 13, 2018
1 parent 2e00641 commit e90e8c9
Show file tree
Hide file tree
Showing 15 changed files with 157 additions and 51 deletions.
2 changes: 1 addition & 1 deletion agent/acl_test.go
Expand Up @@ -80,7 +80,7 @@ func NewTestACLAgent(name string, hcl string, resolveFn func(string) (acl.Author
agent.MemSink = metrics.NewInmemSink(1*time.Second, time.Minute)

a.Agent.delegate = a
a.Agent.State = local.NewState(LocalConfig(a.Config), a.Agent.logger, a.Agent.tokens)
a.Agent.State = local.NewState(LocalConfig(a.Config), a.Agent.logger, a.Agent.tokens, time.Now)
a.Agent.State.TriggerSyncChanges = func() {}
return a
}
Expand Down
9 changes: 7 additions & 2 deletions agent/agent.go
Expand Up @@ -249,6 +249,9 @@ type Agent struct {
// grpcServer is the server instance used currently to serve xDS API for
// Envoy.
grpcServer *grpc.Server

// clock is used to timestamp healthchecks
clock func() time.Time
}

func New(c *config.RuntimeConfig) (*Agent, error) {
Expand Down Expand Up @@ -277,6 +280,7 @@ func New(c *config.RuntimeConfig) (*Agent, error) {
shutdownCh: make(chan struct{}),
endpoints: make(map[string]string),
tokens: new(token.Store),
clock: time.Now,
}

if err := a.initializeACLs(); err != nil {
Expand Down Expand Up @@ -363,7 +367,7 @@ func (a *Agent) Start() error {
}

// create the local state
a.State = local.NewState(LocalConfig(c), a.logger, a.tokens)
a.State = local.NewState(LocalConfig(c), a.logger, a.tokens, a.clock)

// create the state synchronization manager which performs
// regular and on-demand state synchronizations (anti-entropy).
Expand All @@ -385,7 +389,7 @@ func (a *Agent) Start() error {

// Setup either the client or the server.
if c.ServerMode {
server, err := consul.NewServerLogger(consulCfg, a.logger, a.tokens)
server, err := consul.NewServerLogger(consulCfg, a.logger, a.tokens, a.clock)
if err != nil {
return fmt.Errorf("Failed to start Consul server: %v", err)
}
Expand Down Expand Up @@ -2799,6 +2803,7 @@ func (a *Agent) loadCheckState(check *structs.HealthCheck) error {
// Restore the fields from the state
check.Output = p.Output
check.Status = p.Status
check.LastStatusModifyTime = p.LastStatusModifyTime
return nil
}

Expand Down
15 changes: 10 additions & 5 deletions agent/agent_test.go
Expand Up @@ -451,6 +451,8 @@ func TestAgent_AddService(t *testing.T) {
for k, v := range tt.healthChks {
t.Run(k, func(t *testing.T) {
got, want := a.State.Checks()[types.CheckID(k)], v
want.LastStatusModifyTime = a.Now
fmt.Println(want.LastStatusModifyTime, got.LastStatusModifyTime)
verify.Values(t, k, got, want)
})
}
Expand Down Expand Up @@ -653,9 +655,11 @@ func TestAgent_RemoveServiceRemovesAllChecks(t *testing.T) {
}

// check that both checks are there
hchk1.LastStatusModifyTime = a.Now
if got, want := a.State.Checks()["chk1"], hchk1; !verify.Values(t, "", got, want) {
t.FailNow()
}
hchk2.LastStatusModifyTime = a.Now
if got, want := a.State.Checks()["chk2"], hchk2; !verify.Values(t, "", got, want) {
t.FailNow()
}
Expand Down Expand Up @@ -1978,11 +1982,12 @@ func TestAgent_PurgeCheckOnDuplicate(t *testing.T) {
t.Fatalf("missing check registration")
}
expected := &structs.HealthCheck{
Node: a2.Config.NodeName,
CheckID: "mem",
Name: "memory check",
Status: api.HealthCritical,
Notes: "my cool notes",
Node: a2.Config.NodeName,
CheckID: "mem",
Name: "memory check",
Status: api.HealthCritical,
Notes: "my cool notes",
LastStatusModifyTime: a2.Now,
}
if got, want := result, expected; !verify.Values(t, "", got, want) {
t.FailNow()
Expand Down
11 changes: 7 additions & 4 deletions agent/check.go
@@ -1,6 +1,8 @@
package agent

import (
"time"

"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/types"
)
Expand All @@ -18,8 +20,9 @@ type persistedCheck struct {
// expiration timestamp which is used to determine staleness on later
// agent restarts.
type persistedCheckState struct {
CheckID types.CheckID
Output string
Status string
Expires int64
CheckID types.CheckID
Output string
Status string
LastStatusModifyTime time.Time
Expires int64
}
6 changes: 3 additions & 3 deletions agent/consul/client_test.go
Expand Up @@ -365,7 +365,7 @@ func TestClient_RPC_TLS(t *testing.T) {
conf1.VerifyIncoming = true
conf1.VerifyOutgoing = true
configureTLS(conf1)
s1, err := NewServer(conf1)
s1, err := NewServer(conf1, time.Now)
if err != nil {
t.Fatalf("err: %v", err)
}
Expand Down Expand Up @@ -408,7 +408,7 @@ func TestClient_RPC_TLS(t *testing.T) {
func TestClient_RPC_RateLimit(t *testing.T) {
t.Parallel()
dir1, conf1 := testServerConfig(t)
s1, err := NewServer(conf1)
s1, err := NewServer(conf1, time.Now)
if err != nil {
t.Fatalf("err: %v", err)
}
Expand Down Expand Up @@ -518,7 +518,7 @@ func TestClient_SnapshotRPC_TLS(t *testing.T) {
conf1.VerifyIncoming = true
conf1.VerifyOutgoing = true
configureTLS(conf1)
s1, err := NewServer(conf1)
s1, err := NewServer(conf1, time.Now)
if err != nil {
t.Fatalf("err: %v", err)
}
Expand Down
22 changes: 12 additions & 10 deletions agent/consul/leader.go
Expand Up @@ -1311,11 +1311,12 @@ AFTER_CHECK:
Address: member.Addr.String(),
Service: service,
Check: &structs.HealthCheck{
Node: member.Name,
CheckID: structs.SerfCheckID,
Name: structs.SerfCheckName,
Status: api.HealthPassing,
Output: structs.SerfCheckAliveOutput,
Node: member.Name,
CheckID: structs.SerfCheckID,
Name: structs.SerfCheckName,
Status: api.HealthPassing,
Output: structs.SerfCheckAliveOutput,
LastStatusModifyTime: s.clock(),
},

// If there's existing information about the node, do not
Expand Down Expand Up @@ -1356,11 +1357,12 @@ func (s *Server) handleFailedMember(member serf.Member) error {
ID: types.NodeID(member.Tags["id"]),
Address: member.Addr.String(),
Check: &structs.HealthCheck{
Node: member.Name,
CheckID: structs.SerfCheckID,
Name: structs.SerfCheckName,
Status: api.HealthCritical,
Output: structs.SerfCheckFailedOutput,
Node: member.Name,
CheckID: structs.SerfCheckID,
Name: structs.SerfCheckName,
Status: api.HealthCritical,
Output: structs.SerfCheckFailedOutput,
LastStatusModifyTime: s.clock(),
},

// If there's existing information about the node, do not
Expand Down
10 changes: 7 additions & 3 deletions agent/consul/server.go
Expand Up @@ -248,17 +248,20 @@ type Server struct {
shutdownCh chan struct{}
shutdownLock sync.Mutex

// clock is used to timestamp healthchecks
clock func() time.Time

// embedded struct to hold all the enterprise specific data
EnterpriseServer
}

func NewServer(config *Config) (*Server, error) {
return NewServerLogger(config, nil, new(token.Store))
func NewServer(config *Config, clock func() time.Time) (*Server, error) {
return NewServerLogger(config, nil, new(token.Store), clock)
}

// NewServer is used to construct a new Consul server from the
// configuration, potentially returning an error
func NewServerLogger(config *Config, logger *log.Logger, tokens *token.Store) (*Server, error) {
func NewServerLogger(config *Config, logger *log.Logger, tokens *token.Store, clock func() time.Time) (*Server, error) {
// Check the protocol version.
if err := config.CheckProtocolVersion(); err != nil {
return nil, err
Expand Down Expand Up @@ -346,6 +349,7 @@ func NewServerLogger(config *Config, logger *log.Logger, tokens *token.Store) (*
tombstoneGC: gc,
serverLookup: NewServerLookup(),
shutdownCh: shutdownCh,
clock: clock,
}

// Initialize enterprise specific server functionality
Expand Down
2 changes: 1 addition & 1 deletion agent/consul/server_test.go
Expand Up @@ -175,7 +175,7 @@ func newServer(c *Config) (*Server, error) {
w = os.Stderr
}
logger := log.New(w, c.NodeName+" - ", log.LstdFlags|log.Lmicroseconds)
srv, err := NewServerLogger(c, logger, new(token.Store))
srv, err := NewServerLogger(c, logger, new(token.Store), time.Now)
if err != nil {
return nil, err
}
Expand Down
19 changes: 11 additions & 8 deletions agent/consul/state/catalog_test.go
Expand Up @@ -6,6 +6,7 @@ import (
"sort"
"strings"
"testing"
"time"

"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/api"
Expand Down Expand Up @@ -2130,16 +2131,18 @@ func TestStateStore_Service_Snapshot(t *testing.T) {
func TestStateStore_EnsureCheck(t *testing.T) {
s := testStateStore(t)

now := time.Now()
// Create a check associated with the node
check := &structs.HealthCheck{
Node: "node1",
CheckID: "check1",
Name: "redis check",
Status: api.HealthPassing,
Notes: "test check",
Output: "aaa",
ServiceID: "service1",
ServiceName: "redis",
Node: "node1",
CheckID: "check1",
Name: "redis check",
Status: api.HealthPassing,
Notes: "test check",
Output: "aaa",
ServiceID: "service1",
ServiceName: "redis",
LastStatusModifyTime: now,
}

// Creating a check without a node returns error
Expand Down
9 changes: 8 additions & 1 deletion agent/local/state.go
Expand Up @@ -214,10 +214,12 @@ type State struct {
// are sent a message each time a proxy changes via Add or RemoveProxy.
managedProxies map[string]*ManagedProxy
managedProxyHandlers map[chan<- struct{}]struct{}

clock func() time.Time
}

// NewState creates a new local state for the agent.
func NewState(c Config, lg *log.Logger, tokens *token.Store) *State {
func NewState(c Config, lg *log.Logger, tokens *token.Store, clock func() time.Time) *State {
l := &State{
config: c,
logger: lg,
Expand All @@ -229,6 +231,7 @@ func NewState(c Config, lg *log.Logger, tokens *token.Store) *State {
notifyHandlers: make(map[chan<- struct{}]struct{}),
managedProxies: make(map[string]*ManagedProxy),
managedProxyHandlers: make(map[chan<- struct{}]struct{}),
clock: clock,
}
l.SetDiscardCheckOutput(c.DiscardCheckOutput)
return l
Expand Down Expand Up @@ -434,6 +437,9 @@ func (l *State) AddCheck(check *structs.HealthCheck, token string) error {
// hard-set the node name
check.Node = l.config.NodeName

// init last status modify time
check.LastStatusModifyTime = l.clock().UTC()

l.SetCheckState(&CheckState{
Check: check,
Token: token,
Expand Down Expand Up @@ -572,6 +578,7 @@ func (l *State) UpdateCheck(id types.CheckID, status, output string) {
// Update status and mark out of sync
c.Check.Status = status
c.Check.Output = output
c.Check.LastStatusModifyTime = l.clock().UTC()
c.InSync = false
l.TriggerSyncChanges()
}
Expand Down

0 comments on commit e90e8c9

Please sign in to comment.