/
agent.go
871 lines (763 loc) · 29.4 KB
/
agent.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
// Copyright Istio Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package istioagent
import (
"context"
"crypto/tls"
"errors"
"fmt"
"net/netip"
"os"
"path"
"path/filepath"
"strings"
"sync"
"time"
"google.golang.org/api/option"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/protobuf/proto"
mesh "istio.io/api/mesh/v1alpha1"
"istio.io/istio/pilot/cmd/pilot-agent/config"
"istio.io/istio/pilot/cmd/pilot-agent/status/ready"
"istio.io/istio/pkg/backoff"
"istio.io/istio/pkg/bootstrap"
"istio.io/istio/pkg/bootstrap/platform"
"istio.io/istio/pkg/config/constants"
dnsClient "istio.io/istio/pkg/dns/client"
dnsProto "istio.io/istio/pkg/dns/proto"
"istio.io/istio/pkg/envoy"
common_features "istio.io/istio/pkg/features"
"istio.io/istio/pkg/filewatcher"
"istio.io/istio/pkg/istio-agent/grpcxds"
"istio.io/istio/pkg/log"
"istio.io/istio/pkg/model"
"istio.io/istio/pkg/security"
"istio.io/istio/pkg/wasm"
"istio.io/istio/security/pkg/nodeagent/cache"
"istio.io/istio/security/pkg/nodeagent/caclient"
citadel "istio.io/istio/security/pkg/nodeagent/caclient/providers/citadel"
gca "istio.io/istio/security/pkg/nodeagent/caclient/providers/google"
cas "istio.io/istio/security/pkg/nodeagent/caclient/providers/google-cas"
)
const (
// Location of K8S CA root.
k8sCAPath = "./var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
// Location of K8s CA root mounted by istio. This is to avoid issues when service account automount is disabled.
k8sCAIstioMountedPath = "./var/run/secrets/istio/kubernetes/ca.crt"
// CitadelCACertPath is the directory for Citadel CA certificate.
// This is mounted from config map 'istio-ca-root-cert'. Part of startup,
// this may be replaced with ./etc/certs, if a root-cert.pem is found, to
// handle secrets mounted from non-citadel CAs.
CitadelCACertPath = "./var/run/secrets/istio"
)
const (
// MetadataClientCertKey is ISTIO_META env var used for client key.
MetadataClientCertKey = "ISTIO_META_TLS_CLIENT_KEY"
// MetadataClientCertChain is ISTIO_META env var used for client cert chain.
MetadataClientCertChain = "ISTIO_META_TLS_CLIENT_CERT_CHAIN"
// MetadataClientRootCert is ISTIO_META env var used for client root cert.
MetadataClientRootCert = "ISTIO_META_TLS_CLIENT_ROOT_CERT"
)
var _ ready.Prober = &Agent{}
type LifecycleEvent string
const (
DrainLifecycleEvent LifecycleEvent = "drain"
ExitLifecycleEvent LifecycleEvent = "exit"
)
type SDSService interface {
OnSecretUpdate(resourceName string)
Stop()
}
type SDSServiceFactory = func(_ *security.Options, _ security.SecretManager, _ *mesh.PrivateKeyProvider) SDSService
// Shared properties with Pilot Proxy struct.
type Proxy struct {
ID string
IPAddresses []string
Type model.NodeType
ipMode model.IPMode
DNSDomain string
}
func (node *Proxy) DiscoverIPMode() {
node.ipMode = model.DiscoverIPMode(node.IPAddresses)
}
// IsIPv6 returns true if proxy only supports IPv6 addresses.
func (node *Proxy) IsIPv6() bool {
return node.ipMode == model.IPv6
}
func (node *Proxy) SupportsIPv6() bool {
return node.ipMode == model.IPv6 || node.ipMode == model.Dual
}
const (
serviceNodeSeparator = "~"
)
func (node *Proxy) ServiceNode() string {
ip := ""
if len(node.IPAddresses) > 0 {
ip = node.IPAddresses[0]
}
return strings.Join([]string{
string(node.Type), ip, node.ID, node.DNSDomain,
}, serviceNodeSeparator)
}
// Agent contains the configuration of the agent, based on the injected
// environment:
// - SDS hostPath if node-agent was used
// - /etc/certs/key if Citadel or other mounted Secrets are used
// - root cert to use for connecting to XDS server
// - CA address, with proper defaults and detection
type Agent struct {
proxyConfig *mesh.ProxyConfig
cfg *AgentOptions
secOpts *security.Options
envoyOpts envoy.ProxyConfig
envoyAgent *envoy.Agent
sdsServer SDSService
secretCache *cache.SecretManagerClient
// Used when proxying envoy xds via istio-agent is enabled.
xdsProxy *XdsProxy
fileWatcher filewatcher.FileWatcher
// local DNS Server that processes DNS requests locally and forwards to upstream DNS if needed.
localDNSServer *dnsClient.LocalDNSServer
// Signals true completion (e.g. with delayed graceful termination of Envoy)
wg sync.WaitGroup
}
// AgentOptions contains additional config for the agent, not included in ProxyConfig.
// Most are from env variables ( still experimental ) or for testing only.
// Eventually most non-test settings should graduate to ProxyConfig
// Please don't add 100 parameters to the NewAgent function (or any other)!
type AgentOptions struct {
// ProxyXDSDebugViaAgent if true will listen on 15004 and forward queries
// to XDS istio.io/debug.
ProxyXDSDebugViaAgent bool
// Port value for the debugging endpoint.
ProxyXDSDebugViaAgentPort int
// DNSCapture indicates if the XDS proxy has dns capture enabled or not
DNSCapture bool
// DNSAddr is the DNS capture address
DNSAddr string
// DNSForwardParallel indicates whether the agent should send parallel DNS queries to all upstream nameservers.
DNSForwardParallel bool
// ProxyType is the type of proxy we are configured to handle
ProxyType model.NodeType
// ProxyNamespace to use for local dns resolution
ProxyNamespace string
// ProxyDomain is the DNS domain associated with the proxy (assumed
// to include the namespace as well) (for local dns resolution)
ProxyDomain string
// Node identifier used by Envoy
ServiceNode string
// XDSRootCerts is the location of the root CA for the XDS connection. Used for setting platform certs or
// using custom roots.
XDSRootCerts string
// CARootCerts of the location of the root CA for the CA connection. Used for setting platform certs or
// using custom roots.
CARootCerts string
// Extra headers to add to the XDS connection.
XDSHeaders map[string]string
// Is the proxy an IPv6 proxy
IsIPv6 bool
// Path to local UDS to communicate with Envoy
XdsUdsPath string
// Ability to retrieve ProxyConfig dynamically through XDS
EnableDynamicProxyConfig bool
// All of the proxy's IP Addresses
ProxyIPAddresses []string
// Envoy status port (that circles back to the agent status port). Really belongs to the proxy config.
// Cannot be eradicated because mistakes have been made.
EnvoyStatusPort int
// Envoy prometheus port that circles back to its admin port for prom endpoint. Really belongs to the
// proxy config.
EnvoyPrometheusPort int
MinimumDrainDuration time.Duration
ExitOnZeroActiveConnections bool
// Cloud platform
Platform platform.Environment
// GRPCBootstrapPath if set will generate a file compatible with GRPC_XDS_BOOTSTRAP
GRPCBootstrapPath string
// Disables all envoy agent features
DisableEnvoy bool
DownstreamGrpcOptions []grpc.ServerOption
IstiodSAN string
WASMOptions wasm.Options
// Is the proxy in Dual Stack environment
DualStack bool
UseExternalWorkloadSDS bool
// Enable metadata discovery bootstrap extension
MetadataDiscovery bool
SDSFactory func(options *security.Options, workloadSecretCache security.SecretManager, pkpConf *mesh.PrivateKeyProvider) SDSService
}
// NewAgent hosts the functionality for local SDS and XDS. This consists of the local SDS server and
// associated clients to sign certificates (when not using files), and the local XDS proxy (including
// health checking for VMs and DNS proxying).
func NewAgent(proxyConfig *mesh.ProxyConfig, agentOpts *AgentOptions, sopts *security.Options, eopts envoy.ProxyConfig) *Agent {
return &Agent{
proxyConfig: proxyConfig,
cfg: agentOpts,
secOpts: sopts,
envoyOpts: eopts,
fileWatcher: filewatcher.NewWatcher(),
}
}
// EnvoyDisabled if true indicates calling Run will not run and wait for Envoy.
func (a *Agent) EnvoyDisabled() bool {
return a.envoyOpts.TestOnly || a.cfg.DisableEnvoy
}
// WaitForSigterm if true indicates calling Run will block until SIGTERM or SIGNT is received.
func (a *Agent) WaitForSigterm() bool {
return a.EnvoyDisabled() && !a.envoyOpts.TestOnly
}
func (a *Agent) generateNodeMetadata() (*model.Node, error) {
var pilotSAN []string
if a.proxyConfig.ControlPlaneAuthPolicy == mesh.AuthenticationPolicy_MUTUAL_TLS {
// Obtain Pilot SAN, using DNS.
pilotSAN = []string{config.GetPilotSan(a.proxyConfig.DiscoveryAddress)}
}
credentialSocketExists, err := checkSocket(context.TODO(), security.CredentialNameSocketPath)
if err != nil {
return nil, fmt.Errorf("failed to check credential SDS socket: %v", err)
}
if credentialSocketExists {
log.Info("Credential SDS socket found")
}
return bootstrap.GetNodeMetaData(bootstrap.MetadataOptions{
ID: a.cfg.ServiceNode,
Envs: os.Environ(),
Platform: a.cfg.Platform,
InstanceIPs: a.cfg.ProxyIPAddresses,
StsPort: a.secOpts.STSPort,
ProxyConfig: a.proxyConfig,
PilotSubjectAltName: pilotSAN,
CredentialSocketExists: credentialSocketExists,
OutlierLogPath: a.envoyOpts.OutlierLogPath,
EnvoyPrometheusPort: a.cfg.EnvoyPrometheusPort,
EnvoyStatusPort: a.cfg.EnvoyStatusPort,
ExitOnZeroActiveConnections: a.cfg.ExitOnZeroActiveConnections,
XDSRootCert: a.cfg.XDSRootCerts,
MetadataDiscovery: a.cfg.MetadataDiscovery,
})
}
func (a *Agent) initializeEnvoyAgent(_ context.Context) error {
node, err := a.generateNodeMetadata()
if err != nil {
return fmt.Errorf("failed to generate bootstrap metadata: %v", err)
}
log.Infof("Pilot SAN: %v", node.Metadata.PilotSubjectAltName)
// Note: the cert checking still works, the generated file is updated if certs are changed.
// We just don't save the generated file, but use a custom one instead. Pilot will keep
// monitoring the certs and restart if the content of the certs changes.
if len(a.proxyConfig.CustomConfigFile) > 0 {
// there is a custom configuration. Don't write our own config - but keep watching the certs.
a.envoyOpts.ConfigPath = a.proxyConfig.CustomConfigFile
a.envoyOpts.ConfigCleanup = false
} else {
out, err := bootstrap.New(bootstrap.Config{
Node: node,
CompliancePolicy: common_features.CompliancePolicy,
LogAsJSON: a.envoyOpts.LogAsJSON,
}).CreateFile()
if err != nil {
return fmt.Errorf("failed to generate bootstrap config: %v", err)
}
a.envoyOpts.ConfigPath = out
a.envoyOpts.ConfigCleanup = true
}
// Back-fill envoy options from proxy config options
a.envoyOpts.BinaryPath = a.proxyConfig.BinaryPath
a.envoyOpts.AdminPort = a.proxyConfig.ProxyAdminPort
a.envoyOpts.DrainDuration = a.proxyConfig.DrainDuration
a.envoyOpts.Concurrency = a.proxyConfig.Concurrency.GetValue()
// Checking only uid should be sufficient - but tests also run as root and
// will break due to permission errors if we start envoy as 1337.
// This is a mode used for permission-less docker, where iptables can't be
// used.
a.envoyOpts.AgentIsRoot = os.Getuid() == 0 && strings.HasSuffix(a.cfg.DNSAddr, ":53")
a.envoyOpts.DualStack = a.cfg.DualStack
envoyProxy := envoy.NewProxy(a.envoyOpts)
drainDuration := a.proxyConfig.TerminationDrainDuration.AsDuration()
localHostAddr := localHostIPv4
if a.cfg.IsIPv6 {
localHostAddr = localHostIPv6
}
a.envoyAgent = envoy.NewAgent(envoyProxy, drainDuration, a.cfg.MinimumDrainDuration, localHostAddr,
int(a.proxyConfig.ProxyAdminPort), a.cfg.EnvoyStatusPort, a.cfg.EnvoyPrometheusPort, a.cfg.ExitOnZeroActiveConnections)
return nil
}
// Run is a non-blocking call which returns either an error or a function to await for completion.
func (a *Agent) Run(ctx context.Context) (func(), error) {
var err error
if err = a.initLocalDNSServer(); err != nil {
return nil, fmt.Errorf("failed to start local DNS server: %v", err)
}
socketExists, err := checkSocket(ctx, security.WorkloadIdentitySocketPath)
if err != nil {
return nil, fmt.Errorf("failed to check SDS socket: %v", err)
}
if socketExists {
log.Info("Workload SDS socket found. Istio SDS Server won't be started")
} else {
if a.cfg.UseExternalWorkloadSDS {
return nil, errors.New("workload SDS socket is required but not found")
}
log.Info("Workload SDS socket not found. Starting Istio SDS Server")
err = a.initSdsServer()
if err != nil {
return nil, fmt.Errorf("failed to start SDS server: %v", err)
}
}
a.xdsProxy, err = initXdsProxy(a)
if err != nil {
return nil, fmt.Errorf("failed to start xds proxy: %v", err)
}
if a.cfg.ProxyXDSDebugViaAgent {
err = a.xdsProxy.initDebugInterface(a.cfg.ProxyXDSDebugViaAgentPort)
if err != nil {
return nil, fmt.Errorf("failed to start istio tap server: %v", err)
}
}
if a.cfg.GRPCBootstrapPath != "" {
if err := a.generateGRPCBootstrap(); err != nil {
return nil, fmt.Errorf("failed generating gRPC XDS bootstrap: %v", err)
}
}
if a.proxyConfig.ControlPlaneAuthPolicy != mesh.AuthenticationPolicy_NONE {
rootCAForXDS, err := a.FindRootCAForXDS()
if err != nil {
return nil, fmt.Errorf("failed to find root XDS CA: %v", err)
}
go a.startFileWatcher(ctx, rootCAForXDS, func() {
if err := a.xdsProxy.initIstiodDialOptions(a); err != nil {
log.Warnf("Failed to init xds proxy dial options")
}
})
}
if !a.EnvoyDisabled() {
err = a.initializeEnvoyAgent(ctx)
if err != nil {
return nil, fmt.Errorf("failed to initialize envoy agent: %v", err)
}
a.wg.Add(1)
go func() {
defer a.wg.Done()
// This is a blocking call for graceful termination.
a.envoyAgent.Run(ctx)
}()
} else if a.WaitForSigterm() {
// wait for SIGTERM and perform graceful shutdown
a.wg.Add(1)
go func() {
defer a.wg.Done()
<-ctx.Done()
}()
}
return a.wg.Wait, nil
}
func (a *Agent) initSdsServer() error {
var err error
if security.CheckWorkloadCertificate(security.WorkloadIdentityCertChainPath, security.WorkloadIdentityKeyPath, security.WorkloadIdentityRootCertPath) {
log.Info("workload certificate files detected, creating secret manager without caClient")
a.secOpts.RootCertFilePath = security.WorkloadIdentityRootCertPath
a.secOpts.CertChainFilePath = security.WorkloadIdentityCertChainPath
a.secOpts.KeyFilePath = security.WorkloadIdentityKeyPath
a.secOpts.FileMountedCerts = true
}
a.secretCache, err = a.newSecretManager()
if err != nil {
return fmt.Errorf("failed to start workload secret manager %v", err)
}
if a.cfg.DisableEnvoy {
// For proxyless we don't need an SDS server, but still need the keys and
// we need them refreshed periodically.
//
// This is based on the code from newSDSService, but customized to have explicit rotation.
go func() {
st := a.secretCache
st.RegisterSecretHandler(func(resourceName string) {
// The secret handler is called when a secret should be renewed, after invalidating the cache.
// The handler does not call GenerateSecret - it is a side-effect of the SDS generate() method, which
// is called by sdsServer.OnSecretUpdate, which triggers a push and eventually calls sdsservice.Generate
// TODO: extract the logic to detect expiration time, and use a simpler code to rotate to files.
_, _ = a.getWorkloadCerts(st)
})
_, _ = a.getWorkloadCerts(st)
}()
} else {
pkpConf := a.proxyConfig.GetPrivateKeyProvider()
a.sdsServer = a.cfg.SDSFactory(a.secOpts, a.secretCache, pkpConf)
a.secretCache.RegisterSecretHandler(a.sdsServer.OnSecretUpdate)
}
return nil
}
// getWorkloadCerts will attempt to get a cert, with infinite exponential backoff
// It will not return until both workload cert and root cert are generated.
//
// TODO: evaluate replacing the STS server with a file data source, to simplify Envoy config
// TODO: Fix this method with unused return value
// nolint: unparam
func (a *Agent) getWorkloadCerts(st *cache.SecretManagerClient) (sk *security.SecretItem, err error) {
b := backoff.NewExponentialBackOff(backoff.DefaultOption())
// This will loop forever until success
err = b.RetryWithContext(context.TODO(), func() error {
sk, err = st.GenerateSecret(security.WorkloadKeyCertResourceName)
if err == nil {
return nil
}
log.Warnf("failed to get certificate: %v", err)
return err
})
if err != nil {
return nil, err
}
err = b.RetryWithContext(context.TODO(), func() error {
_, err := st.GenerateSecret(security.RootCertReqResourceName)
if err == nil {
return nil
}
log.Warnf("failed to get root certificate: %v", err)
return err
})
return
}
func (a *Agent) startFileWatcher(ctx context.Context, filePath string, handler func()) {
if err := a.fileWatcher.Add(filePath); err != nil {
log.Warnf("Failed to add file watcher %s", filePath)
return
}
log.Debugf("Add file %s watcher", filePath)
for {
select {
case gotEvent := <-a.fileWatcher.Events(filePath):
log.Debugf("Receive file %s event %v", filePath, gotEvent)
handler()
case err := <-a.fileWatcher.Errors(filePath):
log.Warnf("Watch file %s error: %v", filePath, err)
case <-ctx.Done():
return
}
}
}
func (a *Agent) initLocalDNSServer() (err error) {
// we don't need dns server on gateways
if a.cfg.DNSCapture && a.cfg.ProxyType == model.SidecarProxy {
if a.localDNSServer, err = dnsClient.NewLocalDNSServer(a.cfg.ProxyNamespace, a.cfg.ProxyDomain, a.cfg.DNSAddr,
a.cfg.DNSForwardParallel); err != nil {
return err
}
a.localDNSServer.StartDNS()
}
return nil
}
func (a *Agent) generateGRPCBootstrap() error {
// generate metadata
node, err := a.generateNodeMetadata()
if err != nil {
return fmt.Errorf("failed generating node metadata: %v", err)
}
// GRPC bootstrap requires this. Original implementation injected this via env variable, but
// this interfere with envoy, we should be able to use both envoy for TCP/HTTP and proxyless.
node.Metadata.Generator = "grpc"
if err := os.MkdirAll(filepath.Dir(a.cfg.GRPCBootstrapPath), 0o700); err != nil {
return err
}
_, err = grpcxds.GenerateBootstrapFile(grpcxds.GenerateBootstrapOptions{
Node: node,
XdsUdsPath: a.cfg.XdsUdsPath,
DiscoveryAddress: a.proxyConfig.DiscoveryAddress,
CertDir: a.secOpts.OutputKeyCertToDir,
}, a.cfg.GRPCBootstrapPath)
if err != nil {
return err
}
return nil
}
// Check is used in to readiness check of agent to ensure DNSServer is ready.
func (a *Agent) Check() (err error) {
// we dont need dns server on gateways
if a.cfg.DNSCapture && a.cfg.ProxyType == model.SidecarProxy {
if !a.localDNSServer.IsReady() {
return errors.New("istio DNS capture is turned ON and DNS lookup table is not ready yet")
}
}
return nil
}
// GetDNSTable builds DNS table used in debugging interface.
func (a *Agent) GetDNSTable() *dnsProto.NameTable {
if a.localDNSServer != nil && a.localDNSServer.NameTable() != nil {
nt := a.localDNSServer.NameTable()
nt = proto.Clone(nt).(*dnsProto.NameTable)
a.localDNSServer.BuildAlternateHosts(nt, func(althosts map[string]struct{}, ipv4 []netip.Addr, ipv6 []netip.Addr, _ []string) {
for host := range althosts {
if _, exists := nt.Table[host]; !exists {
addresses := make([]string, 0, len(ipv4)+len(ipv6))
for _, addr := range ipv4 {
addresses = append(addresses, addr.String())
}
for _, addr := range ipv6 {
addresses = append(addresses, addr.String())
}
nt.Table[host] = &dnsProto.NameTable_NameInfo{
Ips: addresses,
Registry: "Kubernetes",
}
}
}
})
return nt
}
return nil
}
func (a *Agent) Close() {
if a.xdsProxy != nil {
a.xdsProxy.close()
}
if a.localDNSServer != nil {
a.localDNSServer.Close()
}
if a.sdsServer != nil {
a.sdsServer.Stop()
}
if a.secretCache != nil {
a.secretCache.Close()
}
if a.fileWatcher != nil {
_ = a.fileWatcher.Close()
}
}
// FindRootCAForXDS determines the root CA to be configured in bootstrap file.
// It may be different from the CA for the cert server - which is based on CA_ADDR
// In addition it deals with the case the XDS server is on port 443, expected with a proper cert.
// /etc/ssl/certs/ca-certificates.crt
func (a *Agent) FindRootCAForXDS() (string, error) {
var rootCAPath string
if a.cfg.XDSRootCerts == security.SystemRootCerts {
// Special case input for root cert configuration to use system root certificates
return "", nil
} else if a.cfg.XDSRootCerts != "" {
// Using specific platform certs or custom roots
rootCAPath = a.cfg.XDSRootCerts
} else if fileExists(security.DefaultRootCertFilePath) {
// Old style - mounted cert. This is used for XDS auth only,
// not connecting to CA_ADDR because this mode uses external
// agent (Secret refresh, etc)
return security.DefaultRootCertFilePath, nil
} else if a.secOpts.PilotCertProvider == constants.CertProviderKubernetes {
// Using K8S - this is likely incorrect, may work by accident (https://github.com/istio/istio/issues/22161)
if fileExists(k8sCAIstioMountedPath) {
rootCAPath = k8sCAIstioMountedPath
} else {
rootCAPath = k8sCAPath
}
} else if a.secOpts.ProvCert != "" {
// This was never completely correct - PROV_CERT are only intended for auth with CA_ADDR,
// and should not be involved in determining the root CA.
// For VMs, the root cert file used to auth may be populated afterwards.
// Thus, return directly here and skip checking for existence.
return a.secOpts.ProvCert + "/root-cert.pem", nil
} else if a.secOpts.FileMountedCerts {
// FileMountedCerts - Load it from Proxy Metadata.
rootCAPath = a.proxyConfig.ProxyMetadata[MetadataClientRootCert]
} else if a.secOpts.PilotCertProvider == constants.CertProviderNone {
return "", fmt.Errorf("root CA file for XDS required but configured provider as none")
} else {
// PILOT_CERT_PROVIDER - default is istiod
// This is the default - a mounted config map on K8S
rootCAPath = path.Join(CitadelCACertPath, constants.CACertNamespaceConfigMapDataName)
}
// Additional checks for root CA cert existence. Fail early, instead of obscure envoy errors
if fileExists(rootCAPath) {
return rootCAPath, nil
}
return "", fmt.Errorf("root CA file for XDS does not exist %s", rootCAPath)
}
// GetKeyCertsForXDS return the key cert files path for connecting with xds.
func (a *Agent) GetKeyCertsForXDS() (string, string) {
var key, cert string
if a.secOpts.ProvCert != "" {
key, cert = getKeyCertInner(a.secOpts.ProvCert)
} else if a.secOpts.FileMountedCerts {
key = a.proxyConfig.ProxyMetadata[MetadataClientCertKey]
cert = a.proxyConfig.ProxyMetadata[MetadataClientCertChain]
}
return key, cert
}
func fileExists(path string) bool {
if fi, err := os.Stat(path); err == nil && fi.Mode().IsRegular() {
return true
}
return false
}
func socketFileExists(path string) bool {
if fi, err := os.Stat(path); err == nil && !fi.Mode().IsRegular() {
return true
}
return false
}
// Checks whether the socket exists and is responsive.
// If it doesn't exist, returns (false, nil)
// If it exists and is NOT responsive, tries to delete the socket file.
// If it can be deleted, returns (false, nil).
// If it cannot be deleted, returns (false, error).
// Otherwise, returns (true, nil)
func checkSocket(ctx context.Context, socketPath string) (bool, error) {
socketExists := socketFileExists(socketPath)
if !socketExists {
return false, nil
}
err := socketHealthCheck(ctx, socketPath)
if err != nil {
log.Debugf("SDS socket detected but not healthy: %v", err)
err = os.Remove(socketPath)
if err != nil {
return false, fmt.Errorf("existing SDS socket could not be removed: %v", err)
}
return false, nil
}
return true, nil
}
func socketHealthCheck(ctx context.Context, socketPath string) error {
ctx, cancel := context.WithDeadline(ctx, time.Now().Add(time.Second))
defer cancel()
conn, err := grpc.DialContext(ctx, fmt.Sprintf("unix:%s", socketPath),
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.FailOnNonTempDialError(true),
grpc.WithReturnConnectionError(),
grpc.WithBlock(),
)
if err != nil {
return err
}
err = conn.Close()
if err != nil {
log.Infof("connection is not closed: %v", err)
}
return nil
}
// FindRootCAForCA Find the root CA to use when connecting to the CA (Istiod or external).
func (a *Agent) FindRootCAForCA() (string, error) {
var rootCAPath string
if a.cfg.CARootCerts == security.SystemRootCerts {
return "", nil
} else if a.cfg.CARootCerts != "" {
rootCAPath = a.cfg.CARootCerts
} else if a.secOpts.PilotCertProvider == constants.CertProviderKubernetes {
// Using K8S - this is likely incorrect, may work by accident.
// API is GA.
if fileExists(k8sCAIstioMountedPath) {
rootCAPath = k8sCAIstioMountedPath
} else {
rootCAPath = k8sCAPath
}
} else if a.secOpts.PilotCertProvider == constants.CertProviderCustom {
rootCAPath = security.DefaultRootCertFilePath // ./etc/certs/root-cert.pem
} else if a.secOpts.ProvCert != "" {
// This was never completely correct - PROV_CERT are only intended for auth with CA_ADDR,
// and should not be involved in determining the root CA.
// For VMs, the root cert file used to auth may be populated afterwards.
// Thus, return directly here and skip checking for existence.
return a.secOpts.ProvCert + "/root-cert.pem", nil
} else if a.secOpts.PilotCertProvider == constants.CertProviderNone {
return "", fmt.Errorf("root CA file for CA required but configured provider as none")
} else {
// This is the default - a mounted config map on K8S
rootCAPath = path.Join(CitadelCACertPath, constants.CACertNamespaceConfigMapDataName)
// or: "./var/run/secrets/istio/root-cert.pem"
}
// Additional checks for root CA cert existence.
if fileExists(rootCAPath) {
return rootCAPath, nil
}
return "", fmt.Errorf("root CA file for CA does not exist %s", rootCAPath)
}
// getKeyCertsForXDS return the key cert files path for connecting with CA server.
func (a *Agent) getKeyCertsForCA() (string, string) {
var key, cert string
if a.secOpts.ProvCert != "" {
key, cert = getKeyCertInner(a.secOpts.ProvCert)
}
return key, cert
}
func getKeyCertInner(certPath string) (string, string) {
key := path.Join(certPath, constants.KeyFilename)
cert := path.Join(certPath, constants.CertChainFilename)
return key, cert
}
// newSecretManager creates the SecretManager for workload secrets
func (a *Agent) newSecretManager() (*cache.SecretManagerClient, error) {
// If proxy is using file mounted certs, we do not have to connect to CA.
if a.secOpts.FileMountedCerts {
log.Info("Workload is using file mounted certificates. Skipping connecting to CA")
return cache.NewSecretManagerClient(nil, a.secOpts)
}
log.Infof("CA Endpoint %s, provider %s", a.secOpts.CAEndpoint, a.secOpts.CAProviderName)
// TODO: this should all be packaged in a plugin, possibly with optional compilation.
if a.secOpts.CAProviderName == security.GoogleCAProvider {
// Use a plugin to an external CA - this has direct support for the K8S JWT token
// This is only used if the proper env variables are injected - otherwise the existing Citadel or Istiod will be
// used.
caClient, err := gca.NewGoogleCAClient(a.secOpts.CAEndpoint, true, caclient.NewCATokenProvider(a.secOpts))
if err != nil {
return nil, err
}
return cache.NewSecretManagerClient(caClient, a.secOpts)
} else if a.secOpts.CAProviderName == security.GoogleCASProvider {
// Use a plugin
tlsConfig := &tls.Config{MinVersion: tls.VersionTLS12}
model.EnforceGoCompliance(tlsConfig)
caClient, err := cas.NewGoogleCASClient(a.secOpts.CAEndpoint,
option.WithGRPCDialOption(grpc.WithPerRPCCredentials(caclient.NewCATokenProvider(a.secOpts))),
option.WithGRPCDialOption(grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))))
if err != nil {
return nil, err
}
return cache.NewSecretManagerClient(caClient, a.secOpts)
}
// Using citadel CA
var tlsOpts *citadel.TLSOptions
var err error
// Special case: if Istiod runs on a secure network, on the default port, don't use TLS
// TODO: may add extra cases or explicit settings - but this is a rare use cases, mostly debugging
if strings.HasSuffix(a.secOpts.CAEndpoint, ":15010") {
log.Warn("Debug mode or IP-secure network")
} else {
tlsOpts = &citadel.TLSOptions{}
tlsOpts.RootCert, err = a.FindRootCAForCA()
if err != nil {
return nil, fmt.Errorf("failed to find root CA cert for CA: %v", err)
}
if tlsOpts.RootCert == "" {
log.Infof("Using CA %s cert with system certs", a.secOpts.CAEndpoint)
} else if !fileExists(tlsOpts.RootCert) {
log.Fatalf("invalid config - %s missing a root certificate %s", a.secOpts.CAEndpoint, tlsOpts.RootCert)
} else {
log.Infof("Using CA %s cert with certs: %s", a.secOpts.CAEndpoint, tlsOpts.RootCert)
}
tlsOpts.Key, tlsOpts.Cert = a.getKeyCertsForCA()
}
// Will use TLS unless the reserved 15010 port is used ( istiod on an ipsec/secure VPC)
// rootCert may be nil - in which case the system roots are used, and the CA is expected to have public key
// Otherwise assume the injection has mounted /etc/certs/root-cert.pem
caClient, err := citadel.NewCitadelClient(a.secOpts, tlsOpts)
if err != nil {
return nil, err
}
return cache.NewSecretManagerClient(caClient, a.secOpts)
}
// GRPCBootstrapPath returns the most recently generated gRPC bootstrap or nil if there is none.
func (a *Agent) GRPCBootstrapPath() string {
return a.cfg.GRPCBootstrapPath
}
func (a *Agent) DrainNow() {
a.envoyAgent.DrainNow()
}