-
Notifications
You must be signed in to change notification settings - Fork 4.4k
/
service.go
325 lines (291 loc) · 11.1 KB
/
service.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
package connect
import (
"context"
"crypto/tls"
"crypto/x509"
"errors"
"net"
"net/http"
"time"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/api/watch"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/go-hclog"
"golang.org/x/net/http2"
)
// Service represents a Consul service that accepts and/or connects via Connect.
// This can represent a service that only is a server, only is a client, or
// both.
//
// TODO(banks): Agent implicit health checks based on knowing which certs are
// available should prevent clients being routed until the agent knows the
// service has been delivered valid certificates. Once built, document that here
// too.
type Service struct {
// service is the name (not ID) for the Consul service. This is used to request
// Connect metadata.
service string
// client is the Consul API client. It must be configured with an appropriate
// Token that has `service:write` policy on the provided service. If an
// insufficient token is provided, the Service will abort further attempts to
// fetch certificates and print a loud error message. It will not Close() or
// kill the process since that could lead to a crash loop in every service if
// ACL token was revoked. All attempts to dial will error and any incoming
// connections will fail to verify. It may be nil if the Service is being
// configured from local files for development or testing.
client *api.Client
// tlsCfg is the dynamic TLS config
tlsCfg *dynamicTLSConfig
// httpResolverFromAddr is a function that returns a Resolver from a string
// address for HTTP clients. It's privately pluggable to make testing easier
// but will default to a simple method to parse the host as a Consul DNS host.
httpResolverFromAddr func(addr string) (Resolver, error)
rootsWatch *watch.Plan
leafWatch *watch.Plan
logger hclog.Logger
}
// NewService creates and starts a Service. The caller must close the returned
// service to free resources and allow the program to exit normally. This is
// typically called in a signal handler.
//
// Caller must provide client which is already configured to speak to the local
// Consul agent, and with an ACL token that has `service:write` privileges for
// the service specified.
func NewService(serviceName string, client *api.Client) (*Service, error) {
logger := hclog.New(&hclog.LoggerOptions{})
return NewServiceWithLogger(serviceName, client,
logger)
}
// NewServiceWithLogger starts the service with a specified log.Logger.
func NewServiceWithLogger(serviceName string, client *api.Client,
logger hclog.Logger) (*Service, error) {
s := &Service{
service: serviceName,
client: client,
logger: logger.Named(logging.Connect).With("service", serviceName),
tlsCfg: newDynamicTLSConfig(defaultTLSConfig(), logger),
httpResolverFromAddr: ConsulResolverFromAddrFunc(client),
}
// Set up root and leaf watches
p, err := watch.Parse(map[string]interface{}{
"type": "connect_roots",
})
if err != nil {
return nil, err
}
s.rootsWatch = p
s.rootsWatch.HybridHandler = s.rootsWatchHandler
p, err = watch.Parse(map[string]interface{}{
"type": "connect_leaf",
"service": s.service,
})
if err != nil {
return nil, err
}
s.leafWatch = p
s.leafWatch.HybridHandler = s.leafWatchHandler
go s.rootsWatch.RunWithClientAndHclog(client, s.logger)
go s.leafWatch.RunWithClientAndHclog(client, s.logger)
return s, nil
}
// NewDevServiceFromCertFiles creates a Service using certificate and key files
// passed instead of fetching them from the client.
func NewDevServiceFromCertFiles(serviceID string, logger hclog.Logger,
caFile, certFile, keyFile string) (*Service, error) {
tlsCfg, err := devTLSConfigFromFiles(caFile, certFile, keyFile)
if err != nil {
return nil, err
}
return NewDevServiceWithTLSConfig(serviceID, logger, tlsCfg)
}
// NewDevServiceWithTLSConfig creates a Service using static TLS config passed.
// It's mostly useful for testing.
func NewDevServiceWithTLSConfig(serviceName string, logger hclog.Logger,
tlsCfg *tls.Config) (*Service, error) {
s := &Service{
service: serviceName,
logger: logger,
tlsCfg: newDynamicTLSConfig(tlsCfg, logger),
}
return s, nil
}
// Name returns the name of the service this object represents. Note it is the
// service _name_ as used during discovery, not the ID used to uniquely identify
// an instance of the service with an agent.
func (s *Service) Name() string {
return s.service
}
// ServerTLSConfig returns a *tls.Config that allows any TCP listener to accept
// and authorize incoming Connect clients. It will return a single static config
// with hooks to dynamically load certificates, and perform Connect
// authorization during verification. Service implementations do not need to
// reload this to get new certificates.
//
// At any time it may be possible that the Service instance does not have access
// to usable certificates due to not being initially setup yet or a prolonged
// error during renewal. The listener will be able to accept connections again
// once connectivity is restored provided the client's Token is valid.
//
// To prevent routing traffic to the app instance while it's certificates are
// invalid or not populated yet you may use Ready in a health check endpoint
// and/or ReadyWait during startup before starting the TLS listener. The latter
// only prevents connections during initial bootstrap (including permission
// issues where certs can never be issued due to bad credentials) but won't
// handle the case that certificates expire and an error prevents timely
// renewal.
func (s *Service) ServerTLSConfig() *tls.Config {
return s.tlsCfg.Get(newServerSideVerifier(s.logger, s.client, s.service))
}
// Dial connects to a remote Connect-enabled server. The passed Resolver is used
// to discover a single candidate instance which will be dialed and have it's
// TLS certificate verified against the expected identity. Failures are returned
// directly with no retries. Repeated dials may use different instances
// depending on the Resolver implementation.
//
// Timeout can be managed via the Context.
//
// Calls to Dial made before the Service has loaded certificates from the agent
// will fail. You can prevent this by using Ready or ReadyWait in app during
// startup.
func (s *Service) Dial(ctx context.Context, resolver Resolver) (net.Conn, error) {
addr, certURI, err := resolver.Resolve(ctx)
if err != nil {
return nil, err
}
s.logger.Debug("resolved service instance",
"address", addr,
"identity", certURI.URI(),
)
var dialer net.Dialer
tcpConn, err := dialer.DialContext(ctx, "tcp", addr)
if err != nil {
return nil, err
}
tlsConn := tls.Client(tcpConn, s.tlsCfg.Get(clientSideVerifier))
// Set deadline for Handshake to complete.
deadline, ok := ctx.Deadline()
if ok {
tlsConn.SetDeadline(deadline)
}
// Perform handshake
if err = tlsConn.Handshake(); err != nil {
tlsConn.Close()
return nil, err
}
// Clear deadline since that was only for connection. Caller can set their own
// deadline later as necessary.
tlsConn.SetDeadline(time.Time{})
// Verify that the connect server's URI matches certURI
err = verifyServerCertMatchesURI(tlsConn.ConnectionState().PeerCertificates,
certURI)
if err != nil {
tlsConn.Close()
return nil, err
}
s.logger.Debug("successfully connected to service instance",
"address", addr,
"identity", certURI.URI(),
)
return tlsConn, nil
}
// HTTPDialTLS is compatible with http.Transport.DialTLS. It expects the addr
// hostname to be specified using Consul DNS query syntax, e.g.
// "web.service.consul". It converts that into the equivalent ConsulResolver and
// then call s.Dial with the resolver. This is low level, clients should
// typically use HTTPClient directly.
func (s *Service) HTTPDialTLS(network,
addr string) (net.Conn, error) {
if s.httpResolverFromAddr == nil {
return nil, errors.New("no http resolver configured")
}
r, err := s.httpResolverFromAddr(addr)
if err != nil {
return nil, err
}
// TODO(banks): figure out how to do timeouts better.
return s.Dial(context.Background(), r)
}
// HTTPClient returns an *http.Client configured to dial remote Consul Connect
// HTTP services. The client will return an error if attempting to make requests
// to a non HTTPS hostname. It resolves the domain of the request with the same
// syntax as Consul DNS queries although it performs discovery directly via the
// API rather than just relying on Consul DNS. Hostnames that are not valid
// Consul DNS queries will fail.
func (s *Service) HTTPClient() *http.Client {
t := &http.Transport{
// Sadly we can't use DialContext hook since that is expected to return a
// plain TCP connection and http.Client tries to start a TLS handshake over
// it. We need to control the handshake to be able to do our validation.
// So we have to use the older DialTLS which means no context/timeout
// support.
//
// TODO(banks): figure out how users can configure a timeout when using
// this and/or compatibility with http.Request.WithContext.
DialTLS: s.HTTPDialTLS,
}
// Need to manually re-enable http2 support since we set custom DialTLS.
// See https://golang.org/src/net/http/transport.go?s=8692:9036#L228
http2.ConfigureTransport(t)
return &http.Client{
Transport: t,
}
}
// Close stops the service and frees resources.
func (s *Service) Close() error {
if s.rootsWatch != nil {
s.rootsWatch.Stop()
}
if s.leafWatch != nil {
s.leafWatch.Stop()
}
return nil
}
func (s *Service) rootsWatchHandler(blockParam watch.BlockingParamVal, raw interface{}) {
if raw == nil {
return
}
v, ok := raw.(*api.CARootList)
if !ok || v == nil {
s.logger.Error("got invalid response from root watch")
return
}
// Got new root certificates, update the tls.Configs.
roots := x509.NewCertPool()
for _, root := range v.Roots {
roots.AppendCertsFromPEM([]byte(root.RootCertPEM))
}
s.tlsCfg.SetRoots(roots)
}
func (s *Service) leafWatchHandler(blockParam watch.BlockingParamVal, raw interface{}) {
if raw == nil {
return // ignore
}
v, ok := raw.(*api.LeafCert)
if !ok || v == nil {
s.logger.Error("got invalid response from leaf watch")
return
}
// Got new leaf, update the tls.Configs
cert, err := tls.X509KeyPair([]byte(v.CertPEM), []byte(v.PrivateKeyPEM))
if err != nil {
s.logger.Error("failed to parse new leaf cert", "error", err)
return
}
s.tlsCfg.SetLeaf(&cert)
}
// Ready returns whether or not both roots and a leaf certificate are
// configured. If both are non-nil, they are assumed to be valid and usable.
func (s *Service) Ready() bool {
return s.tlsCfg.Ready()
}
// ReadyWait returns a chan that is closed when the Service becomes ready
// for use for the first time. Note that if the Service is ready when it is
// called it returns a nil chan. Ready means that it has root and leaf
// certificates configured which we assume are valid. The service may
// subsequently stop being "ready" if it's certificates expire or are revoked
// and an error prevents new ones being loaded but this method will not stop
// returning a nil chan in that case. It is only useful for initial startup. For
// ongoing health Ready() should be used.
func (s *Service) ReadyWait() <-chan struct{} {
return s.tlsCfg.ReadyWait()
}