forked from hashicorp/consul
/
resolver.go
342 lines (283 loc) · 9.93 KB
/
resolver.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package resolver
import (
"fmt"
"strings"
"sync"
"google.golang.org/grpc/resolver"
"github.com/hernad/consul/agent/metadata"
"github.com/hernad/consul/types"
)
// ServerResolverBuilder tracks the current server list and keeps any
// ServerResolvers updated when changes occur.
type ServerResolverBuilder struct {
cfg Config
// leaderResolver is used to track the address of the leader in the local DC.
leaderResolver leaderResolver
// servers is an index of Servers by area and Server.ID. The map contains server IDs
// for all datacenters.
servers map[types.AreaID]map[string]*metadata.Server
// resolvers is an index of connections to the serverResolver which manages
// addresses of servers for that connection.
//
// this is only applicable for non-leader conn types
resolvers map[resolver.ClientConn]*serverResolver
// lock for all stateful fields (excludes config which is immutable).
lock sync.RWMutex
}
type Config struct {
// Datacenter is the datacenter of this agent.
Datacenter string
// AgentType is either 'server' or 'client' and is required.
AgentType string
// Authority used to query the server. Defaults to "". Used to support
// parallel testing because gRPC registers resolvers globally.
Authority string
}
func NewServerResolverBuilder(cfg Config) *ServerResolverBuilder {
if cfg.Datacenter == "" {
panic("ServerResolverBuilder needs Config.Datacenter to be nonempty")
}
switch cfg.AgentType {
case "server", "client":
default:
panic("ServerResolverBuilder needs Config.AgentType to be either server or client")
}
return &ServerResolverBuilder{
cfg: cfg,
servers: make(map[types.AreaID]map[string]*metadata.Server),
resolvers: make(map[resolver.ClientConn]*serverResolver),
}
}
// ServerForGlobalAddr returns server metadata for a server with the specified globally unique address.
func (s *ServerResolverBuilder) ServerForGlobalAddr(globalAddr string) (*metadata.Server, error) {
s.lock.RLock()
defer s.lock.RUnlock()
for _, areaServers := range s.servers {
for _, server := range areaServers {
if DCPrefix(server.Datacenter, server.Addr.String()) == globalAddr {
return server, nil
}
}
}
return nil, fmt.Errorf("failed to find Consul server for global address %q", globalAddr)
}
// Build returns a new serverResolver for the given ClientConn. The resolver
// will keep the ClientConn's state updated based on updates from Serf.
func (s *ServerResolverBuilder) Build(target resolver.Target, cc resolver.ClientConn, _ resolver.BuildOptions) (resolver.Resolver, error) {
s.lock.Lock()
defer s.lock.Unlock()
// If there's already a resolver for this connection, return it.
// TODO(streaming): how would this happen since we already cache connections in ClientConnPool?
if cc == s.leaderResolver.clientConn {
return s.leaderResolver, nil
}
if resolver, ok := s.resolvers[cc]; ok {
return resolver, nil
}
//nolint:staticcheck
serverType, datacenter, err := parseEndpoint(target.Endpoint())
if err != nil {
return nil, err
}
if serverType == "leader" {
// TODO: is this safe? can we ever have multiple CC for the leader? Seems
// like we can only have one given the caching in ClientConnPool.Dial
s.leaderResolver.clientConn = cc
s.leaderResolver.updateClientConn()
return s.leaderResolver, nil
}
// Make a new resolver for the dc and add it to the list of active ones.
resolver := &serverResolver{
datacenter: datacenter,
clientConn: cc,
close: func() {
s.lock.Lock()
defer s.lock.Unlock()
delete(s.resolvers, cc)
},
}
resolver.updateAddrs(s.getDCAddrs(datacenter))
s.resolvers[cc] = resolver
return resolver, nil
}
// parseEndpoint parses a string, expecting a format of "serverType.datacenter"
func parseEndpoint(target string) (string, string, error) {
parts := strings.SplitN(target, ".", 2)
if len(parts) != 2 {
return "", "", fmt.Errorf("unexpected endpoint address: %v", target)
}
return parts[0], parts[1], nil
}
func (s *ServerResolverBuilder) Authority() string {
return s.cfg.Authority
}
// AddServer updates the resolvers' states to include the new server's address.
func (s *ServerResolverBuilder) AddServer(areaID types.AreaID, server *metadata.Server) {
if s.shouldIgnoreServer(areaID, server) {
return
}
s.lock.Lock()
defer s.lock.Unlock()
areaServers, ok := s.servers[areaID]
if !ok {
areaServers = make(map[string]*metadata.Server)
s.servers[areaID] = areaServers
}
areaServers[uniqueID(server)] = server
addrs := s.getDCAddrs(server.Datacenter)
for _, resolver := range s.resolvers {
if resolver.datacenter == server.Datacenter {
resolver.updateAddrs(addrs)
}
}
}
// uniqueID returns a unique identifier for the server which includes the
// Datacenter and the ID.
//
// In practice it is expected that the server.ID is already a globally unique
// UUID. This function is an extra safeguard in case that ever changes.
func uniqueID(server *metadata.Server) string {
return server.Datacenter + "-" + server.ID
}
// DCPrefix prefixes the given string with a datacenter for use in
// disambiguation.
func DCPrefix(datacenter, suffix string) string {
return datacenter + "-" + suffix
}
// RemoveServer updates the resolvers' states with the given server removed.
func (s *ServerResolverBuilder) RemoveServer(areaID types.AreaID, server *metadata.Server) {
if s.shouldIgnoreServer(areaID, server) {
return
}
s.lock.Lock()
defer s.lock.Unlock()
areaServers, ok := s.servers[areaID]
if !ok {
return // already gone
}
delete(areaServers, uniqueID(server))
if len(areaServers) == 0 {
delete(s.servers, areaID)
}
addrs := s.getDCAddrs(server.Datacenter)
for _, resolver := range s.resolvers {
if resolver.datacenter == server.Datacenter {
resolver.updateAddrs(addrs)
}
}
}
// shouldIgnoreServer is used to contextually decide if a particular kind of
// server should be accepted into a given area.
//
// On client agents it's pretty easy: clients only participate in the standard
// LAN, so we only accept servers from the LAN.
//
// On server agents it's a little less obvious. This resolver is ultimately
// used to have servers dial other servers. If a server is going to cross
// between datacenters (using traditional federation) then we want to use the
// WAN addresses for them, but if a server is going to dial a sibling server in
// the same datacenter we want it to use the LAN addresses always. To achieve
// that here we simply never allow WAN servers for our current datacenter to be
// added into the resolver, letting only the LAN instances through.
func (s *ServerResolverBuilder) shouldIgnoreServer(areaID types.AreaID, server *metadata.Server) bool {
if s.cfg.AgentType == "client" && areaID != types.AreaLAN {
return true
}
if s.cfg.AgentType == "server" &&
server.Datacenter == s.cfg.Datacenter &&
areaID != types.AreaLAN {
return true
}
return false
}
// getDCAddrs returns a list of the server addresses for the given datacenter.
// This method requires that lock is held for reads.
func (s *ServerResolverBuilder) getDCAddrs(dc string) []resolver.Address {
lanRequest := (s.cfg.Datacenter == dc)
var (
addrs []resolver.Address
keptServerIDs = make(map[string]struct{})
)
for areaID, areaServers := range s.servers {
if (areaID == types.AreaLAN) != lanRequest {
// LAN requests only look at LAN data. WAN requests only look at
// WAN data.
continue
}
for _, server := range areaServers {
if server.Datacenter != dc {
continue
}
// Servers may be part of multiple areas, so only include each one once.
if _, ok := keptServerIDs[server.ID]; ok {
continue
}
keptServerIDs[server.ID] = struct{}{}
addrs = append(addrs, resolver.Address{
// NOTE: the address persisted here is only dialable using our custom dialer
Addr: DCPrefix(server.Datacenter, server.Addr.String()),
ServerName: server.Name,
})
}
}
return addrs
}
// UpdateLeaderAddr updates the leader address in the local DC's resolver.
func (s *ServerResolverBuilder) UpdateLeaderAddr(datacenter, addr string) {
s.lock.Lock()
defer s.lock.Unlock()
s.leaderResolver.globalAddr = DCPrefix(datacenter, addr)
s.leaderResolver.updateClientConn()
}
// serverResolver is a grpc Resolver that will keep a grpc.ClientConn up to date
// on the list of server addresses to use.
type serverResolver struct {
// datacenter that can be reached by the clientConn. Used by ServerResolverBuilder
// to filter resolvers for those in a specific datacenter.
datacenter string
// clientConn that this resolver is providing addresses for.
clientConn resolver.ClientConn
// close is used by ServerResolverBuilder to remove this resolver from the
// index of resolvers. It is called by grpc when the connection is closed.
close func()
// addrs stores the list of addresses passed to updateAddrs, so that they
// can be rebalanced periodically by ServerResolverBuilder.
addrs []resolver.Address
addrLock sync.Mutex
}
var _ resolver.Resolver = (*serverResolver)(nil)
// updateAddrs updates this serverResolver's ClientConn to use the given set of
// addrs.
func (r *serverResolver) updateAddrs(addrs []resolver.Address) {
r.addrLock.Lock()
defer r.addrLock.Unlock()
r.clientConn.UpdateState(resolver.State{Addresses: addrs})
r.addrs = addrs
}
func (r *serverResolver) Close() {
r.close()
}
// ResolveNow is not used
func (*serverResolver) ResolveNow(options resolver.ResolveNowOptions) {}
type leaderResolver struct {
globalAddr string
clientConn resolver.ClientConn
}
func (l leaderResolver) ResolveNow(resolver.ResolveNowOptions) {}
func (l leaderResolver) Close() {}
func (l leaderResolver) updateClientConn() {
if l.globalAddr == "" || l.clientConn == nil {
return
}
addrs := []resolver.Address{
{
// NOTE: the address persisted here is only dialable using our custom dialer
Addr: l.globalAddr,
ServerName: "leader",
},
}
l.clientConn.UpdateState(resolver.State{Addresses: addrs})
}
var _ resolver.Resolver = (*leaderResolver)(nil)