-
Notifications
You must be signed in to change notification settings - Fork 4.4k
/
register.go
303 lines (261 loc) · 8.63 KB
/
register.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
package proxy
import (
"fmt"
"strings"
"sync"
"time"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/go-hclog"
)
const (
// RegisterReconcilePeriod is how often the monitor will attempt to
// reconcile the expected service state with the remote Consul server.
RegisterReconcilePeriod = 30 * time.Second
// RegisterTTLPeriod is the TTL setting for the health check of the
// service. The monitor will automatically pass the health check
// three times per this period to be more resilient to failures.
RegisterTTLPeriod = 30 * time.Second
)
// RegisterMonitor registers the proxy with the local Consul agent with a TTL
// health check that is kept alive.
//
// This struct should be initialized with NewRegisterMonitor instead of being
// allocated directly. Using this struct without calling NewRegisterMonitor
// will result in panics.
type RegisterMonitor struct {
// Logger is the logger for the monitor.
Logger hclog.Logger
// Client is the API client to a specific Consul agent. This agent is
// where the service will be registered.
Client *api.Client
// Service is the name of the service being proxied.
Service string
// LocalAddress and LocalPort are the address and port of the proxy
// itself, NOT the service being proxied.
LocalAddress string
LocalPort int
// IDSuffix is a unique ID that is appended to the end of the service
// name. This helps the service be unique. By default the service ID
// is just the proxied service name followed by "-proxy".
IDSuffix string
// The fields below are related to timing settings. See the default
// constants for more documentation on what they set.
ReconcilePeriod time.Duration
TTLPeriod time.Duration
// lock is held while reading/writing any internal state of the monitor.
// cond is a condition variable on lock that is broadcasted for runState
// changes.
lock *sync.Mutex
cond *sync.Cond
// runState is the current state of the monitor. To read this the
// lock must be held. The condition variable cond can be waited on
// for changes to this value.
runState registerRunState
}
// registerState is the state of the RegisterMonitor.
//
// This is a basic state machine with the following transitions:
//
// - idle => running, stopped
// - running => stopping, stopped
// - stopping => stopped
// - stopped => <>
type registerRunState uint8
const (
registerStateIdle registerRunState = iota
registerStateRunning
registerStateStopping
registerStateStopped
)
// NewRegisterMonitor initializes a RegisterMonitor. After initialization,
// the exported fields should be configured as desired. To start the monitor,
// execute Run in a goroutine.
func NewRegisterMonitor(logger hclog.Logger) *RegisterMonitor {
var lock sync.Mutex
if logger == nil {
logger = hclog.New(&hclog.LoggerOptions{})
}
return &RegisterMonitor{
Logger: logger, // default logger
ReconcilePeriod: RegisterReconcilePeriod,
TTLPeriod: RegisterTTLPeriod,
lock: &lock,
cond: sync.NewCond(&lock),
}
}
// Run should be started in a goroutine and will keep Consul updated
// in the background with the state of this proxy. If registration fails
// this will continue to retry.
func (r *RegisterMonitor) Run() {
// Grab the lock and set our state. If we're not idle, then we return
// immediately since the monitor is only allowed to run once.
r.lock.Lock()
if r.runState != registerStateIdle {
r.lock.Unlock()
return
}
r.runState = registerStateRunning
r.lock.Unlock()
// Start a goroutine that just waits for a stop request
stopCh := make(chan struct{})
go func() {
defer close(stopCh)
r.lock.Lock()
defer r.lock.Unlock()
// We wait for anything not running, just so we're more resilient
// in the face of state machine issues. Basically any state change
// will cause us to quit.
for r.runState == registerStateRunning {
r.cond.Wait()
}
}()
// When we exit, we set the state to stopped and broadcast to any
// waiting Close functions that they can return.
defer func() {
r.lock.Lock()
r.runState = registerStateStopped
r.cond.Broadcast()
r.lock.Unlock()
}()
// Run the first registration optimistically. If this fails then its
// okay since we'll just retry shortly.
r.register()
// Create the timers for trigger events. We don't use tickers because
// we don't want the events to pile on.
reconcileTimer := time.NewTimer(r.ReconcilePeriod)
heartbeatTimer := time.NewTimer(r.TTLPeriod / 3)
for {
select {
case <-reconcileTimer.C:
r.register()
reconcileTimer.Reset(r.ReconcilePeriod)
case <-heartbeatTimer.C:
r.heartbeat()
heartbeatTimer.Reset(r.TTLPeriod / 3)
case <-stopCh:
r.Logger.Info("stop request received, deregistering")
r.deregister()
return
}
}
}
// register queries the Consul agent to determine if we've already registered.
// If we haven't or the registered service differs from what we're trying to
// register, then we attempt to register our service.
func (r *RegisterMonitor) register() {
catalog := r.Client.Catalog()
serviceID := r.serviceID()
serviceName := r.serviceName()
// Determine the current state of this service in Consul
var currentService *api.CatalogService
services, _, err := catalog.Service(
serviceName, "",
&api.QueryOptions{AllowStale: true})
if err == nil {
for _, service := range services {
if serviceID == service.ServiceID {
currentService = service
break
}
}
}
// If we have a matching service, then we verify if we need to reregister
// by comparing if it matches what we expect.
if currentService != nil &&
currentService.ServiceAddress == r.LocalAddress &&
currentService.ServicePort == r.LocalPort {
r.Logger.Debug("service already registered, not re-registering")
return
}
// If we're here, then we're registering the service.
err = r.Client.Agent().ServiceRegister(&api.AgentServiceRegistration{
Kind: api.ServiceKindConnectProxy,
ID: serviceID,
Name: serviceName,
Address: r.LocalAddress,
Port: r.LocalPort,
Proxy: &api.AgentServiceConnectProxyConfig{
DestinationServiceName: r.Service,
},
Check: &api.AgentServiceCheck{
CheckID: r.checkID(),
Name: "proxy heartbeat",
TTL: "30s",
Notes: "Built-in proxy will heartbeat this check.",
Status: "passing",
},
})
if err != nil {
r.Logger.Warn("Failed to register Consul service", "error", err)
return
}
r.Logger.Info("registered Consul service", "service", serviceID)
}
// heartbeat just pings the TTL check for our service.
func (r *RegisterMonitor) heartbeat() {
// Trigger the health check passing. We don't need to retry this
// since we do a couple tries within the TTL period.
if err := r.Client.Agent().PassTTL(r.checkID(), ""); err != nil {
if !strings.Contains(err.Error(), "does not have associated") {
r.Logger.Warn("heartbeat failed", "error", err)
}
}
}
// deregister deregisters the service.
func (r *RegisterMonitor) deregister() {
// Basic retry loop, no backoff for now. But we want to retry a few
// times just in case there are basic ephemeral issues.
for i := 0; i < 3; i++ {
err := r.Client.Agent().ServiceDeregister(r.serviceID())
if err == nil {
return
}
r.Logger.Warn("service deregister failed", "error", err)
time.Sleep(500 * time.Millisecond)
}
}
// Close stops the register goroutines and deregisters the service. Once
// Close is called, the monitor can no longer be used again. It is safe to
// call Close multiple times and concurrently.
func (r *RegisterMonitor) Close() error {
r.lock.Lock()
defer r.lock.Unlock()
for {
switch r.runState {
case registerStateIdle:
// Idle so just set it to stopped and return. We notify
// the condition variable in case others are waiting.
r.runState = registerStateStopped
r.cond.Broadcast()
return nil
case registerStateRunning:
// Set the state to stopping and broadcast to all waiters,
// since Run is sitting on cond.Wait.
r.runState = registerStateStopping
r.cond.Broadcast()
r.cond.Wait() // Wait on the stopping event
case registerStateStopping:
// Still stopping, wait...
r.cond.Wait()
case registerStateStopped:
// Stopped, target state reached
return nil
}
}
}
// serviceID returns the unique ID for this proxy service.
func (r *RegisterMonitor) serviceID() string {
id := fmt.Sprintf("%s-proxy", r.Service)
if r.IDSuffix != "" {
id += "-" + r.IDSuffix
}
return id
}
// serviceName returns the non-unique name of this proxy service.
func (r *RegisterMonitor) serviceName() string {
return fmt.Sprintf("%s-proxy", r.Service)
}
// checkID is the unique ID for the registered health check.
func (r *RegisterMonitor) checkID() string {
return fmt.Sprintf("%s-ttl", r.serviceID())
}