-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
health.go
118 lines (107 loc) · 2.8 KB
/
health.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
package node
import (
"context"
"net/http"
"sync/atomic"
"time"
logging "github.com/ipfs/go-log/v2"
"github.com/libp2p/go-libp2p/core/network"
lapi "github.com/filecoin-project/lotus/api"
)
var healthlog = logging.Logger("healthcheck")
type HealthHandler struct {
healthy int32
}
func (h *HealthHandler) SetHealthy(healthy bool) {
var hi32 int32
if healthy {
hi32 = 1
}
atomic.StoreInt32(&h.healthy, hi32)
}
func (h *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if atomic.LoadInt32(&h.healthy) != 1 {
w.WriteHeader(http.StatusServiceUnavailable)
return
}
w.WriteHeader(http.StatusOK)
}
// Check that the node is still working. That is, that it's still processing the chain.
// If there have been no recent changes, consider the node to be dead.
func NewLiveHandler(api lapi.FullNode) *HealthHandler {
ctx := context.Background()
h := HealthHandler{}
go func() {
const (
reset int32 = 5
maxbackoff time.Duration = time.Minute
minbackoff time.Duration = time.Second
)
var (
countdown int32
headCh <-chan []*lapi.HeadChange
backoff time.Duration = minbackoff
err error
)
minutely := time.NewTicker(time.Minute)
for {
if headCh == nil {
healthlog.Infof("waiting %v before starting ChainNotify channel", backoff)
<-time.After(backoff)
headCh, err = api.ChainNotify(ctx)
if err != nil {
healthlog.Warnf("failed to instantiate ChainNotify channel; cannot determine liveness. %s", err)
h.SetHealthy(false)
nextbackoff := 2 * backoff
if nextbackoff > maxbackoff {
nextbackoff = maxbackoff
}
backoff = nextbackoff
continue
} else {
healthlog.Infof("started ChainNotify channel")
backoff = minbackoff
}
}
select {
case <-minutely.C:
atomic.AddInt32(&countdown, -1)
if countdown <= 0 {
h.SetHealthy(false)
}
case _, ok := <-headCh:
if !ok { // channel is closed, enter reconnect loop.
h.SetHealthy(false)
headCh = nil
continue
}
atomic.StoreInt32(&countdown, reset)
h.SetHealthy(true)
}
}
}()
return &h
}
// Check if we are ready to handle traffic.
// 1. sync workers are reasonably up to date.
// 2. libp2p is servicable
func NewReadyHandler(api lapi.FullNode) *HealthHandler {
ctx := context.Background()
h := HealthHandler{}
go func() {
const heightTolerance = uint64(5)
var nethealth, synchealth bool
minutely := time.NewTicker(time.Minute)
for {
select {
case <-minutely.C:
netstat, err := api.NetAutoNatStatus(ctx)
nethealth = err == nil && netstat.Reachability != network.ReachabilityUnknown
nodestat, err := api.NodeStatus(ctx, false)
synchealth = err == nil && nodestat.SyncStatus.Behind < heightTolerance
h.SetHealthy(nethealth && synchealth)
}
}
}()
return &h
}