From 78d75a00b8ed02ea3afc7453c9205ffa13da1058 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 12 Mar 2021 10:55:43 -0800 Subject: [PATCH 01/11] clustermesh: Relax cluster validation on Enable Allow enabling clustermesh with default cluster name and ID (zero) to allow external workloads to be used without explicitly setting cluster ID and/or name. Validate both local and remote cluster config fully when connecting to remote cluster instead. Signed-off-by: Jarno Rajahalme --- clustermesh/clustermesh.go | 59 +++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/clustermesh/clustermesh.go b/clustermesh/clustermesh.go index 1e38f74220..18b79ed20e 100644 --- a/clustermesh/clustermesh.go +++ b/clustermesh/clustermesh.go @@ -463,57 +463,31 @@ func (k *K8sClusterMesh) Log(format string, a ...interface{}) { fmt.Fprintf(k.params.Writer, format+"\n", a...) } -func (k *K8sClusterMesh) Validate(ctx context.Context) error { +func (k *K8sClusterMesh) GetClusterConfig(ctx context.Context) error { f, err := k.client.AutodetectFlavor(ctx) if err != nil { return err } k.flavor = f - var failures int - k.Log("✨ Validating cluster configuration...") - cm, err := k.client.GetConfigMap(ctx, k.params.Namespace, defaults.ConfigMapName, metav1.GetOptions{}) if err != nil { return fmt.Errorf("unable to retrieve ConfigMap %q: %w", defaults.ConfigMapName, err) } - if cm.Data == nil { - return fmt.Errorf("ConfigMap %q does not contain any configuration", defaults.ConfigMapName) - } - - clusterID, ok := cm.Data[configNameClusterID] - if !ok { - k.Log("❌ Cluster ID (%q) is not set", configNameClusterID) - failures++ - } - + clusterID := cm.Data[configNameClusterID] if clusterID == "" || clusterID == "0" { - k.Log("❌ Cluster ID (%q) must be set to a value > 0", configNameClusterID) - failures++ + clusterID = "0" } k.clusterID = clusterID - clusterName, ok := cm.Data[configNameClusterName] - if !ok { - k.Log("❌ Cluster name (%q) is not set", configNameClusterName) - failures++ - } - + clusterName := cm.Data[configNameClusterName] if clusterName == "" || clusterName == "default" { - k.Log("❌ Cluster name (%q) must be set to a value other than \"default\"", configNameClusterName) - failures++ + clusterName = "default" } k.clusterName = clusterName - if failures > 0 { - return fmt.Errorf("%d validation errors", failures) - } - - k.Log("✅ Valid cluster identification found: name=%q id=%q", clusterName, clusterID) - return nil - } func (k *K8sClusterMesh) Disable(ctx context.Context) error { @@ -549,7 +523,7 @@ func (k *K8sClusterMesh) Enable(ctx context.Context) error { return err } - if err := k.Validate(ctx); err != nil { + if err := k.GetClusterConfig(ctx); err != nil { return err } @@ -590,6 +564,7 @@ func (k *K8sClusterMesh) Enable(ctx context.Context) error { type accessInformation struct { ServiceIPs []string ServicePort int + ClusterID string ClusterName string CA []byte ClientCert []byte @@ -616,6 +591,7 @@ func (k *K8sClusterMesh) extractAccessInformation(ctx context.Context, client k8 return nil, fmt.Errorf("%s is not set in ConfigMap %q", configNameClusterName, defaults.ConfigMapName) } + clusterID := cm.Data[configNameClusterID] clusterName := cm.Data[configNameClusterName] if verbose { @@ -655,7 +631,8 @@ func (k *K8sClusterMesh) extractAccessInformation(ctx context.Context, client k8 } ai := &accessInformation{ - ClusterName: cm.Data[configNameClusterName], + ClusterID: clusterID, + ClusterName: clusterName, CA: caCert, ClientKey: clientKey, ClientCert: clientCert, @@ -808,12 +785,28 @@ func (k *K8sClusterMesh) Connect(ctx context.Context) error { return err } + if aiRemote.ClusterName == "" || aiRemote.ClusterName == "default" || aiRemote.ClusterID == "" || aiRemote.ClusterID == "0" { + return fmt.Errorf("remote cluster has non-unique name (%s) and/or ID (%s)", aiRemote.ClusterName, aiRemote.ClusterID) + } + aiLocal, err := k.extractAccessInformation(ctx, k.client, k.params.SourceEndpoints, true) if err != nil { k.Log("❌ Unable to retrieve access information of local cluster %q: %s", k.client.ClusterName(), err) return err } + if aiLocal.ClusterName == "" || aiLocal.ClusterName == "default" || aiLocal.ClusterID == "" || aiLocal.ClusterID == "0" { + return fmt.Errorf("local cluster has non-unique name (%s) and/or ID (%s)", aiLocal.ClusterName, aiLocal.ClusterID) + } + + if aiRemote.ClusterName == aiLocal.ClusterName { + return fmt.Errorf("remote and local cluster have the same, non-unique name: %s", aiLocal.ClusterName) + } + + if aiRemote.ClusterID == aiLocal.ClusterID { + return fmt.Errorf("remote and local cluster have the same, non-unique ID: %s", aiLocal.ClusterID) + } + k.Log("✨ Connecting cluster %s -> %s...", k.client.ClusterName(), remoteCluster.ClusterName()) if err := k.patchConfig(ctx, k.client, aiRemote); err != nil { return err From 0fd940e29c9f20606d96e115816c3d51217323d0 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Wed, 10 Mar 2021 18:39:00 -0800 Subject: [PATCH 02/11] clustermesh: Add support for ClusterIP Add support for extracting service's ClusterIP and port rather than erroring out. Signed-off-by: Jarno Rajahalme --- clustermesh/clustermesh.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/clustermesh/clustermesh.go b/clustermesh/clustermesh.go index 18b79ed20e..0bb02649ed 100644 --- a/clustermesh/clustermesh.go +++ b/clustermesh/clustermesh.go @@ -662,7 +662,18 @@ func (k *K8sClusterMesh) extractAccessInformation(ctx context.Context, client k8 } case svc.Spec.Type == corev1.ServiceTypeClusterIP: - return nil, fmt.Errorf("not able to derive service IPs for type ClusterIP, please specify IPs manually") + if len(svc.Spec.Ports) == 0 { + return nil, fmt.Errorf("port of service could not be derived, service has no ports") + } + if svc.Spec.Ports[0].Port == 0 { + return nil, fmt.Errorf("port is not set in service") + } + ai.ServicePort = int(svc.Spec.Ports[0].Port) + + if svc.Spec.ClusterIP == "" { + return nil, fmt.Errorf("IP of service could not be derived, service has no ClusterIP") + } + ai.ServiceIPs = append(ai.ServiceIPs, svc.Spec.ClusterIP) case svc.Spec.Type == corev1.ServiceTypeNodePort: if len(svc.Spec.Ports) == 0 { From 2254d0c0f840a7784af6ba02df466e21237dbf53 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Wed, 10 Mar 2021 12:30:39 -0800 Subject: [PATCH 03/11] clustermesh: Error out if service type is not specified or LoadBalancer type can not be used Error out if service type is not explicitly set or can not be auto-detected as LoadBalancer type. Warn if service type is set to HostPort. Signed-off-by: Jarno Rajahalme --- clustermesh/clustermesh.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/clustermesh/clustermesh.go b/clustermesh/clustermesh.go index 0bb02649ed..987f8ad41d 100644 --- a/clustermesh/clustermesh.go +++ b/clustermesh/clustermesh.go @@ -92,7 +92,7 @@ var clusterRole = &rbacv1.ClusterRole{ }, } -func (k *K8sClusterMesh) generateService() *corev1.Service { +func (k *K8sClusterMesh) generateService() (*corev1.Service, error) { svc := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: defaults.ClusterMeshServiceName, @@ -109,6 +109,9 @@ func (k *K8sClusterMesh) generateService() *corev1.Service { } if k.params.ServiceType != "" { + if k.params.ServiceType == "NodePort" { + k.Log("⚠️ Using service type NodePort may fail when nodes are removed from the cluster!") + } svc.Spec.Type = corev1.ServiceType(k.params.ServiceType) } else { switch k.flavor.Kind { @@ -127,11 +130,11 @@ func (k *K8sClusterMesh) generateService() *corev1.Service { svc.Spec.Type = corev1.ServiceTypeLoadBalancer svc.ObjectMeta.Annotations["service.beta.kubernetes.io/aws-load-balancer-internal"] = "0.0.0.0/0" default: - svc.Spec.Type = corev1.ServiceTypeClusterIP + return nil, fmt.Errorf("cannot auto-detect service type, please specify using '--service-type' option") } } - return svc + return svc, nil } var initContainerArgs = []string{`rm -rf /var/run/etcd/*; @@ -527,7 +530,12 @@ func (k *K8sClusterMesh) Enable(ctx context.Context) error { return err } - _, err := k.client.GetDeployment(ctx, k.params.Namespace, "clustermesh-apiserver", metav1.GetOptions{}) + svc, err := k.generateService() + if err != nil { + return err + } + + _, err = k.client.GetDeployment(ctx, k.params.Namespace, "clustermesh-apiserver", metav1.GetOptions{}) if err == nil { k.Log("✅ ClusterMesh is already enabled") return nil @@ -554,7 +562,7 @@ func (k *K8sClusterMesh) Enable(ctx context.Context) error { return err } - if _, err := k.client.CreateService(ctx, k.params.Namespace, k.generateService(), metav1.CreateOptions{}); err != nil { + if _, err := k.client.CreateService(ctx, k.params.Namespace, svc, metav1.CreateOptions{}); err != nil { return err } @@ -714,6 +722,7 @@ func (k *K8sClusterMesh) extractAccessInformation(ctx context.Context, client k8 break } } + k.Log("⚠️ Service type NodePort detected! Service may fail when nodes are removed from the cluster!") case svc.Spec.Type == corev1.ServiceTypeLoadBalancer: if len(svc.Spec.Ports) == 0 { From efb399c4f6792ea855c3cd23d9fdcee52f620b12 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Wed, 10 Mar 2021 12:28:08 -0800 Subject: [PATCH 04/11] clustermesh: Set CNs to etcd user account names Client certificate's Common Name is used as etcd user account name once TLS based user auth (--client-cert-auth) is enabled. Use the user account names as CNs as follows: - Admin cert: root - Client cert: remote Signed-off-by: Jarno Rajahalme --- clustermesh/certs.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clustermesh/certs.go b/clustermesh/certs.go index b4a090bb62..126ee16af9 100644 --- a/clustermesh/certs.go +++ b/clustermesh/certs.go @@ -76,7 +76,7 @@ func (k *K8sClusterMesh) createClusterMeshAdminCertificate(ctx context.Context) "localhost", "127.0.0.1", }, - CN: "ClusterMesh Admin", + CN: "root", } signConf := &config.Signing{ @@ -112,7 +112,7 @@ func (k *K8sClusterMesh) createClusterMeshClientCertificate(ctx context.Context) Names: []csr.Name{{C: "US", ST: "San Francisco", L: "CA"}}, KeyRequest: csr.NewKeyRequest(), Hosts: []string{""}, - CN: "ClusterMesh Client", + CN: "remote", } signConf := &config.Signing{ From 843200bcfc9c49263d18c0e8b725d081d2853e8d Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 8 Mar 2021 20:52:37 -0800 Subject: [PATCH 05/11] clustermesh: Add support for external-workload cert Add a new cert to be used by External Workloads. Common Name is set to the etcd user account name that has write access to the registation key (externalworkload). Signed-off-by: Jarno Rajahalme --- clustermesh/certs.go | 40 +++++++++++++++++++++++++++++++++ clustermesh/clustermesh.go | 45 +++++++++++++++++++++++++++----------- defaults/defaults.go | 33 +++++++++++++++------------- 3 files changed, 90 insertions(+), 28 deletions(-) diff --git a/clustermesh/certs.go b/clustermesh/certs.go index 126ee16af9..189b773479 100644 --- a/clustermesh/certs.go +++ b/clustermesh/certs.go @@ -143,11 +143,48 @@ func (k *K8sClusterMesh) createClusterMeshClientCertificate(ctx context.Context) return nil } +func (k *K8sClusterMesh) createClusterMeshExternalWorkloadCertificate(ctx context.Context) error { + certReq := &csr.CertificateRequest{ + Names: []csr.Name{{C: "US", ST: "San Francisco", L: "CA"}}, + KeyRequest: csr.NewKeyRequest(), + Hosts: []string{""}, + CN: "externalworkload", + } + + signConf := &config.Signing{ + Default: &config.SigningProfile{Expiry: 5 * 365 * 24 * time.Hour}, + Profiles: map[string]*config.SigningProfile{ + defaults.ClusterMeshExternalWorkloadSecretName: { + Expiry: 5 * 365 * 24 * time.Hour, + Usage: []string{"signing", "key encipherment", "server auth", "client auth"}, + }, + }, + } + + cert, key, err := k.certManager.GenerateCertificate(defaults.ClusterMeshExternalWorkloadSecretName, certReq, signConf) + if err != nil { + return fmt.Errorf("unable to generate certificate %s: %w", defaults.ClusterMeshExternalWorkloadSecretName, err) + } + + data := map[string][]byte{ + defaults.ClusterMeshExternalWorkloadSecretCertName: cert, + defaults.ClusterMeshExternalWorkloadSecretKeyName: key, + } + + _, err = k.client.CreateSecret(ctx, k.params.Namespace, k8s.NewSecret(defaults.ClusterMeshExternalWorkloadSecretName, k.params.Namespace, data), metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("unable to create secret %s/%s: %w", k.params.Namespace, defaults.ClusterMeshExternalWorkloadSecretName, err) + } + + return nil +} + func (k *K8sClusterMesh) deleteCertificates(ctx context.Context) error { k.Log("🔥 Deleting ClusterMesh certificates...") k.client.DeleteSecret(ctx, k.params.Namespace, defaults.ClusterMeshServerSecretName, metav1.DeleteOptions{}) k.client.DeleteSecret(ctx, k.params.Namespace, defaults.ClusterMeshAdminSecretName, metav1.DeleteOptions{}) k.client.DeleteSecret(ctx, k.params.Namespace, defaults.ClusterMeshClientSecretName, metav1.DeleteOptions{}) + k.client.DeleteSecret(ctx, k.params.Namespace, defaults.ClusterMeshExternalWorkloadSecretName, metav1.DeleteOptions{}) return nil } @@ -181,6 +218,9 @@ func (k *K8sClusterMesh) installCertificates(ctx context.Context) error { if err := k.createClusterMeshClientCertificate(ctx); err != nil { return err } + if err := k.createClusterMeshExternalWorkloadCertificate(ctx); err != nil { + return err + } return nil } diff --git a/clustermesh/clustermesh.go b/clustermesh/clustermesh.go index 987f8ad41d..81076102fd 100644 --- a/clustermesh/clustermesh.go +++ b/clustermesh/clustermesh.go @@ -570,13 +570,15 @@ func (k *K8sClusterMesh) Enable(ctx context.Context) error { } type accessInformation struct { - ServiceIPs []string - ServicePort int - ClusterID string - ClusterName string - CA []byte - ClientCert []byte - ClientKey []byte + ServiceIPs []string + ServicePort int + ClusterID string + ClusterName string + CA []byte + ClientCert []byte + ClientKey []byte + ExternalWorkloadCert []byte + ExternalWorkloadKey []byte } func (ai *accessInformation) etcdConfiguration() string { @@ -638,13 +640,30 @@ func (k *K8sClusterMesh) extractAccessInformation(ctx context.Context, client k8 return nil, fmt.Errorf("secret %q does not contain key %q", defaults.ClusterMeshClientSecretName, defaults.ClusterMeshClientSecretCertName) } + externalWorkloadSecret, err := client.GetSecret(ctx, k.params.Namespace, defaults.ClusterMeshExternalWorkloadSecretName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("unable to get secret %q to access clustermesh service: %s", defaults.ClusterMeshExternalWorkloadSecretName, err) + } + + externalWorkloadKey, ok := externalWorkloadSecret.Data[defaults.ClusterMeshExternalWorkloadSecretKeyName] + if !ok { + return nil, fmt.Errorf("secret %q does not contain key %q", defaults.ClusterMeshExternalWorkloadSecretName, defaults.ClusterMeshExternalWorkloadSecretKeyName) + } + + externalWorkloadCert, ok := externalWorkloadSecret.Data[defaults.ClusterMeshExternalWorkloadSecretCertName] + if !ok { + return nil, fmt.Errorf("secret %q does not contain key %q", defaults.ClusterMeshExternalWorkloadSecretName, defaults.ClusterMeshExternalWorkloadSecretCertName) + } + ai := &accessInformation{ - ClusterID: clusterID, - ClusterName: clusterName, - CA: caCert, - ClientKey: clientKey, - ClientCert: clientCert, - ServiceIPs: []string{}, + ClusterID: clusterID, + ClusterName: clusterName, + CA: caCert, + ClientKey: clientKey, + ClientCert: clientCert, + ExternalWorkloadKey: externalWorkloadKey, + ExternalWorkloadCert: externalWorkloadCert, + ServiceIPs: []string{}, } switch { diff --git a/defaults/defaults.go b/defaults/defaults.go index 552f7091ff..b9c31af245 100644 --- a/defaults/defaults.go +++ b/defaults/defaults.go @@ -57,21 +57,24 @@ const ( RelayClientSecretCertName = "tls.crt" RelayClientSecretKeyName = "tls.key" - ClusterMeshDeploymentName = "clustermesh-apiserver" - ClusterMeshServiceAccountName = "clustermesh-apiserver" - ClusterMeshClusterRoleName = "clustermesh-apiserver" - ClusterMeshApiserverImage = "quay.io/cilium/clustermesh-apiserver:" + Version - ClusterMeshServiceName = "clustermesh-apiserver" - ClusterMeshSecretName = "cilium-clustermesh" // Secret which contains the clustermesh configuration - ClusterMeshServerSecretName = "clustermesh-apiserver-server-certs" - ClusterMeshServerSecretCertName = "tls.crt" - ClusterMeshServerSecretKeyName = "tls.key" - ClusterMeshAdminSecretName = "clustermesh-apiserver-admin-certs" - ClusterMeshAdminSecretCertName = "tls.crt" - ClusterMeshAdminSecretKeyName = "tls.key" - ClusterMeshClientSecretName = "clustermesh-apiserver-client-certs" - ClusterMeshClientSecretCertName = "tls.crt" - ClusterMeshClientSecretKeyName = "tls.key" + ClusterMeshDeploymentName = "clustermesh-apiserver" + ClusterMeshServiceAccountName = "clustermesh-apiserver" + ClusterMeshClusterRoleName = "clustermesh-apiserver" + ClusterMeshApiserverImage = "quay.io/cilium/clustermesh-apiserver:" + Version + ClusterMeshServiceName = "clustermesh-apiserver" + ClusterMeshSecretName = "cilium-clustermesh" // Secret which contains the clustermesh configuration + ClusterMeshServerSecretName = "clustermesh-apiserver-server-certs" + ClusterMeshServerSecretCertName = "tls.crt" + ClusterMeshServerSecretKeyName = "tls.key" + ClusterMeshAdminSecretName = "clustermesh-apiserver-admin-certs" + ClusterMeshAdminSecretCertName = "tls.crt" + ClusterMeshAdminSecretKeyName = "tls.key" + ClusterMeshClientSecretName = "clustermesh-apiserver-client-certs" + ClusterMeshClientSecretCertName = "tls.crt" + ClusterMeshClientSecretKeyName = "tls.key" + ClusterMeshExternalWorkloadSecretName = "clustermesh-apiserver-external-workload-certs" + ClusterMeshExternalWorkloadSecretCertName = "tls.crt" + ClusterMeshExternalWorkloadSecretKeyName = "tls.key" ConnectivityCheckNamespace = "cilium-test" From 17ebab00ce700efb92cccd977bea3e7b5130c6c8 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 12 Mar 2021 08:20:56 -0800 Subject: [PATCH 06/11] install: Auto-detect datapath mode for Kind Auto-detect tunnel mode for Kind and disable kube-proxy replacement to be able to access NodePort services. Signed-off-by: Jarno Rajahalme --- install/autodetect.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/install/autodetect.go b/install/autodetect.go index 5f9eb52e1b..ba664d8bbf 100644 --- a/install/autodetect.go +++ b/install/autodetect.go @@ -130,6 +130,11 @@ func (k *K8sInstaller) autodetectAndValidate(ctx context.Context) error { if k.params.DatapathMode == "" { switch f.Kind { + case k8s.KindKind: + k.params.DatapathMode = DatapathTunnel + k.Log("ℹ️ kube-proxy-replacement disabled") + k.params.KubeProxyReplacement = "disabled" + case k8s.KindMinikube: k.params.DatapathMode = DatapathTunnel case k8s.KindEKS: From 077d821c709a6f5959b376c665553fa9833df47c Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 15 Mar 2021 19:33:13 -0700 Subject: [PATCH 07/11] clustermesh: Fix status when clustermesh is not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes 'cilium clustermesh status' succeed with a warning message instead of failing when Cluster ID and/or Cluster Name has not been set when Cilium was installed. In that case warn like this: ✅ Service "clustermesh-apiserver" of type "NodePort" found ⚠️ Cluster not configured for clustermesh, use '--cluster-id' and '--cluster-name' with 'cilium install'. External workloads may still be configured. Signed-off-by: Jarno Rajahalme --- clustermesh/clustermesh.go | 20 ++++++++++++++------ status/k8s.go | 5 ++++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/clustermesh/clustermesh.go b/clustermesh/clustermesh.go index 81076102fd..62973059b1 100644 --- a/clustermesh/clustermesh.go +++ b/clustermesh/clustermesh.go @@ -1070,7 +1070,7 @@ retry: } func (k *K8sClusterMesh) determineStatusConnectivity(ctx context.Context) (*ConnectivityStatus, error) { - status := &ConnectivityStatus{ + stats := &ConnectivityStatus{ GlobalServices: StatisticalStatus{Min: -1}, Connected: StatisticalStatus{Min: -1}, Errors: status.ErrorCountMapMap{}, @@ -1085,16 +1085,21 @@ func (k *K8sClusterMesh) determineStatusConnectivity(ctx context.Context) (*Conn for _, pod := range pods.Items { s, err := k.statusCollector.ClusterMeshConnectivity(ctx, pod.Name) if err != nil { + if err == status.ErrClusterMeshStatusNotAvailable { + continue + } return nil, fmt.Errorf("unable to determine status of cilium pod %q: %w", pod.Name, err) } - status.parseAgentStatus(pod.Name, s) + stats.parseAgentStatus(pod.Name, s) } - status.GlobalServices.Avg /= float64(len(pods.Items)) - status.Connected.Avg /= float64(len(pods.Items)) + if len(pods.Items) > 0 { + stats.GlobalServices.Avg /= float64(len(pods.Items)) + stats.Connected.Avg /= float64(len(pods.Items)) + } - return status, nil + return stats, nil } func (k *K8sClusterMesh) Status(ctx context.Context, log bool) (*Status, error) { @@ -1148,7 +1153,10 @@ func (k *K8sClusterMesh) Status(ctx context.Context, log bool) (*Status, error) s.Connectivity, err = k.statusConnectivity(ctx, log) if log && s.Connectivity != nil { - if s.Connectivity.NotReady > 0 { + if len(s.Connectivity.Clusters) == 0 { + k.Log("⚠️ Cluster not configured for clustermesh, use '--cluster-id' and '--cluster-name' with 'cilium install'. External workloads may still be configured.") + return s, nil + } else if s.Connectivity.NotReady > 0 { k.Log("⚠️ %d/%d nodes are not connected to all clusters [min:%d / avg:%.1f / max:%d]", s.Connectivity.NotReady, s.Connectivity.Total, diff --git a/status/k8s.go b/status/k8s.go index c94fa4ec43..26a85c421b 100644 --- a/status/k8s.go +++ b/status/k8s.go @@ -16,6 +16,7 @@ package status import ( "context" + "errors" "fmt" "time" @@ -103,6 +104,8 @@ retry: return s, err } +var ErrClusterMeshStatusNotAvailable = errors.New("ClusterMesh status is not available") + func (k *K8sStatusCollector) clusterMeshConnectivity(ctx context.Context, ciliumPod string) (*ClusterMeshAgentConnectivityStatus, error) { c := &ClusterMeshAgentConnectivityStatus{ Clusters: map[string]*models.RemoteCluster{}, @@ -114,7 +117,7 @@ func (k *K8sStatusCollector) clusterMeshConnectivity(ctx context.Context, cilium } if status.ClusterMesh == nil { - return nil, fmt.Errorf("ClusterMesh status is not available") + return nil, ErrClusterMeshStatusNotAvailable } c.GlobalServices = status.ClusterMesh.NumGlobalServices From e5d594ec195003e4345f6e5b83c48f7cbfc85c44 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 12 Mar 2021 08:21:07 -0800 Subject: [PATCH 08/11] clustermesh: Add subcommand 'external-workload' (alias 'vm') Add 'external-workload' (alias 'vm') subcommands to 'clustermesh': 'cilium clustermesh external-workload status' - Show the status of external workloads 'cilium clustermesh external-workload create ' - Create new Cilium External Workload resource to allow a VM to join A new CEW resource with name is created with a "default" namespace label. Options: '--namespace string' (alias '-n') Specify other than "default" as the namespace label '--labels' Pass a comma separated list of other labels for the identity of the external workload '--ipv4-alloc-cidr string' IPv4 allocation CIDR to be used instead the default picked by the VM (e.g., 10.15.0.0/30) '--ipv6-alloc-cidr string' IPv6 allocation CIDR to be used instead the default picked by the VM (e.g., f00d::a0f:0:0:0/126) 'cilium clustermesh external-workload delete ' - Delete Cilium External Workload resources The named CEW resources will be deleted. External Workloads that have already registered may continue to communicate with the cluster, but may not rergister again. Options: '--all' Delete all CEW resources if none are named on the command line. 'cilium clustermesh external-workload install ' - Create an installation script to be used in external workloads to install or uninstall Cilium Write an installation script to the named file. Note that the script inlines the TLS credentials for external workload registration as well as the access details to the your k8s cluster. The file needs to be copied to the external workload (such as a VM) and executed there to install Cilium as a Docker container and connect to your k8s cluster. 'uninstall' parameter to the script will cause the script to uninstall Cilium from the external workload. All these commands require clustermesh to be enabled (via 'cilium clustermesh enable'). Signed-off-by: Jarno Rajahalme --- clustermesh/clustermesh.go | 357 ++++++++++++++++++++++++++++++++ internal/cli/cmd/clustermesh.go | 163 +++++++++++++++ internal/k8s/client.go | 16 ++ 3 files changed, 536 insertions(+) diff --git a/clustermesh/clustermesh.go b/clustermesh/clustermesh.go index 62973059b1..270395e6cf 100644 --- a/clustermesh/clustermesh.go +++ b/clustermesh/clustermesh.go @@ -15,13 +15,16 @@ package clustermesh import ( + "bytes" "context" "encoding/base64" + "errors" "fmt" "io" "net" "strconv" "strings" + "text/tabwriter" "time" "github.com/cilium/cilium-cli/defaults" @@ -31,6 +34,7 @@ import ( "github.com/cilium/cilium-cli/status" "github.com/cilium/cilium/api/v1/models" + ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -420,6 +424,10 @@ type k8sClusterMeshImplementation interface { AutodetectFlavor(ctx context.Context) (k8s.Flavor, error) CiliumStatus(ctx context.Context, namespace, pod string) (*models.StatusResponse, error) ClusterName() string + ListCiliumExternalWorkloads(ctx context.Context, opts metav1.ListOptions) (*ciliumv2.CiliumExternalWorkloadList, error) + GetCiliumExternalWorkload(ctx context.Context, name string, opts metav1.GetOptions) (*ciliumv2.CiliumExternalWorkload, error) + CreateCiliumExternalWorkload(ctx context.Context, cew *ciliumv2.CiliumExternalWorkload, opts metav1.CreateOptions) (*ciliumv2.CiliumExternalWorkload, error) + DeleteCiliumExternalWorkload(ctx context.Context, name string, opts metav1.DeleteOptions) error } type K8sClusterMesh struct { @@ -444,6 +452,10 @@ type Parameters struct { ApiserverImage string CreateCA bool Writer io.Writer + Labels map[string]string + IPv4AllocCIDR string + IPv6AllocCIDR string + All bool } func (p Parameters) waitTimeout() time.Duration { @@ -1202,3 +1214,348 @@ func (k *K8sClusterMesh) Status(ctx context.Context, log bool) (*Status, error) return s, nil } + +func (k *K8sClusterMesh) CreateExternalWorkload(ctx context.Context, names []string) error { + count := 0 + for _, name := range names { + cew := &ciliumv2.CiliumExternalWorkload{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: k.params.Labels, + Annotations: map[string]string{}, + }, + Spec: ciliumv2.CiliumExternalWorkloadSpec{ + IPv4AllocCIDR: k.params.IPv4AllocCIDR, + IPv6AllocCIDR: k.params.IPv6AllocCIDR, + }, + } + + _, err := k.client.CreateCiliumExternalWorkload(ctx, cew, metav1.CreateOptions{}) + if err != nil { + return err + } + count++ + } + k.Log("✅ Added %d external workload resources.", count) + return nil +} + +func (k *K8sClusterMesh) DeleteExternalWorkload(ctx context.Context, names []string) error { + var errs []string + count := 0 + + if len(names) == 0 && k.params.All { + cewList, err := k.client.ListCiliumExternalWorkloads(ctx, metav1.ListOptions{}) + if err != nil { + return err + } + for _, cew := range cewList.Items { + names = append(names, cew.Name) + } + } + for _, name := range names { + err := k.client.DeleteCiliumExternalWorkload(ctx, name, metav1.DeleteOptions{}) + if err != nil { + errs = append(errs, err.Error()) + } else { + count++ + } + } + if count > 0 { + k.Log("✅ Removed %d external workload resources.", count) + } + if len(errs) > 0 { + return errors.New(strings.Join(errs, ", ")) + } + return nil +} + +var installScriptFmt = `#!/bin/bash +CILIUM_IMAGE=${1:-%[1]s} +CLUSTER_ADDR=${2:-%[2]s} + +set -e +shopt -s extglob + +if [ "$1" = "uninstall" ] ; then + if [ -n "$(sudo docker ps -a -q -f name=cilium)" ]; then + echo "Shutting down running Cilium agent" + sudo docker rm -f cilium || true + fi + if [ -f /usr/bin/cilium ] ; then + echo "Removing /usr/bin/cilium" + sudo rm /usr/bin/cilium + fi + pushd /etc + if [ -f resolv.conf.orig ] ; then + echo "Restoring /etc/resolv.conf" + sudo mv -f resolv.conf.orig resolv.conf + elif [ -f resolv.conf.link ] && [ -f $(cat resolv.conf.link) ] ; then + echo "Restoring systemd resolved config..." + if [ -f /usr/lib/systemd/resolved.conf.d/cilium-kube-dns.conf ] ; then + sudo rm /usr/lib/systemd/resolved.conf.d/cilium-kube-dns.conf + fi + sudo systemctl daemon-reload + sudo systemctl reenable systemd-resolved.service + sudo service systemd-resolved restart + sudo ln -fs $(cat resolv.conf.link) resolv.conf + sudo rm resolv.conf.link + fi + popd + exit 0 +fi + +if [ -z "$CLUSTER_ADDR" ] ; then + echo "CLUSTER_ADDR must be defined to the IP:PORT at which the clustermesh-apiserver is reachable." + exit 1 +fi + +port='@(6553[0-5]|655[0-2][0-9]|65[0-4][0-9][0-9]|6[0-4][0-9][0-9][0-9]|[1-5][0-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9]|[1-9])' +byte='@(25[0-5]|2[0-4][0-9]|[1][0-9][0-9]|[1-9][0-9]|[0-9])' +ipv4="$byte\.$byte\.$byte\.$byte" + +# Default port is for a HostPort service +case "$CLUSTER_ADDR" in + \[+([0-9a-fA-F:])\]:$port) + CLUSTER_PORT=${CLUSTER_ADDR##\[*\]:} + CLUSTER_IP=${CLUSTER_ADDR#\[} + CLUSTER_IP=${CLUSTER_IP%%\]:*} + ;; + [^[]$ipv4:$port) + CLUSTER_PORT=${CLUSTER_ADDR##*:} + CLUSTER_IP=${CLUSTER_ADDR%%:*} + ;; + *:*) + echo "Malformed CLUSTER_ADDR: $CLUSTER_ADDR" + exit 1 + ;; + *) + CLUSTER_PORT=2379 + CLUSTER_IP=$CLUSTER_ADDR + ;; +esac + +sudo mkdir -p /var/lib/cilium/etcd +sudo tee /var/lib/cilium/etcd/ca.crt </dev/null +%[3]sEOF +sudo tee /var/lib/cilium/etcd/tls.crt </dev/null +%[4]sEOF +sudo tee /var/lib/cilium/etcd/tls.key </dev/null +%[5]sEOF +sudo tee /var/lib/cilium/etcd/config.yaml </dev/null +--- +trusted-ca-file: /var/lib/cilium/etcd/ca.crt +cert-file: /var/lib/cilium/etcd/tls.crt +key-file: /var/lib/cilium/etcd/tls.key +endpoints: +- https://clustermesh-apiserver.cilium.io:$CLUSTER_PORT +EOF + +CILIUM_OPTS=" --join-cluster --enable-host-reachable-services --enable-endpoint-health-checking=false" +CILIUM_OPTS+=" --kvstore etcd --kvstore-opt etcd.config=/var/lib/cilium/etcd/config.yaml" +if [ -n "$HOST_IP" ] ; then + CILIUM_OPTS+=" --ipv4-node $HOST_IP" +fi +if [ -n "$DEBUG" ] ; then + CILIUM_OPTS+=" --debug --restore=false" +fi + +DOCKER_OPTS=" -d --log-driver syslog --restart always" +DOCKER_OPTS+=" --privileged --network host --cap-add NET_ADMIN --cap-add SYS_MODULE" +DOCKER_OPTS+=" --volume /var/lib/cilium/etcd:/var/lib/cilium/etcd" +DOCKER_OPTS+=" --volume /var/run/cilium:/var/run/cilium" +DOCKER_OPTS+=" --volume /boot:/boot" +DOCKER_OPTS+=" --volume /lib/modules:/lib/modules" +DOCKER_OPTS+=" --volume /sys/fs/bpf:/sys/fs/bpf" +DOCKER_OPTS+=" --volume /run/xtables.lock:/run/xtables.lock" +DOCKER_OPTS+=" --add-host clustermesh-apiserver.cilium.io:$CLUSTER_IP" + +if [ -n "$(sudo docker ps -a -q -f name=cilium)" ]; then + echo "Shutting down running Cilium agent" + sudo docker rm -f cilium || true +fi + +echo "Launching Cilium agent $CILIUM_IMAGE..." +sudo docker run --name cilium $DOCKER_OPTS $CILIUM_IMAGE cilium-agent $CILIUM_OPTS + +# Copy Cilium CLI +sudo docker cp cilium:/usr/bin/cilium /usr/bin/cilium + +# Wait for cilium agent to become available +cilium_started=false +for ((i = 0 ; i < 24; i++)); do + if cilium status --brief > /dev/null 2>&1; then + cilium_started=true + break + fi + sleep 5s + echo "Waiting for Cilium daemon to come up..." +done + +if [ "$cilium_started" = true ] ; then + echo 'Cilium successfully started!' +else + >&2 echo 'Timeout waiting for Cilium to start.' + exit 1 +fi + +# Wait for kube-dns service to become available +kubedns="" +for ((i = 0 ; i < 24; i++)); do + kubedns=$(cilium service list get -o jsonpath='{[?(@.spec.frontend-address.port==53)].spec.frontend-address.ip}') + if [ -n "$kubedns" ] ; then + break + fi + sleep 5s + echo "Waiting for kube-dns service to come available..." +done + +if [ -n "$kubedns" ] ; then + if grep "nameserver $kubedns" /etc/resolv.conf ; then + echo "kube-dns IP $kubedns already in /etc/resolv.conf" + else + linkval=$(readlink /etc/resolv.conf) && echo "$linkval" | sudo tee /etc/resolv.conf.link || true + if [[ "$linkval" == *"/systemd/"* ]] ; then + echo "updating systemd resolved with kube-dns IP $kubedns" + sudo mkdir -p /usr/lib/systemd/resolved.conf.d + sudo tee /usr/lib/systemd/resolved.conf.d/cilium-kube-dns.conf </dev/null +# This file is installed by Cilium to use kube dns server from a non-k8s node. +[Resolve] +DNS=$kubedns +EOF + sudo systemctl daemon-reload + sudo systemctl reenable systemd-resolved.service + sudo service systemd-resolved restart + sudo ln -fs /run/systemd/resolve/resolv.conf /etc/resolv.conf + else + echo "Adding kube-dns IP $kubedns to /etc/resolv.conf" + sudo cp /etc/resolv.conf /etc/resolv.conf.orig + resolvconf="nameserver $kubedns\n$(cat /etc/resolv.conf)\n" + printf "$resolvconf" | sudo tee /etc/resolv.conf + fi + fi +else + >&2 echo "kube-dns not found." + exit 1 +fi +` + +func (k *K8sClusterMesh) WriteExternalWorkloadInstallScript(ctx context.Context, writer io.Writer) error { + daemonSet, err := k.client.GetDaemonSet(ctx, k.params.Namespace, defaults.AgentDaemonSetName, metav1.GetOptions{}) + if err != nil { + return err + } + if daemonSet == nil { + return fmt.Errorf("DaemomSet %s is not available", defaults.AgentDaemonSetName) + } + k.Log("✅ Using image from Cilium DaemonSet: %s", daemonSet.Spec.Template.Spec.Containers[0].Image) + + ai, err := k.statusAccessInformation(ctx, false) + if err != nil { + return err + } + clusterAddr := fmt.Sprintf("%s:%d", ai.ServiceIPs[0], ai.ServicePort) + k.Log("✅ Using clustermesh-apiserver service address: %s", clusterAddr) + + fmt.Fprintf(writer, installScriptFmt, + daemonSet.Spec.Template.Spec.Containers[0].Image, clusterAddr, + string(ai.CA), string(ai.ExternalWorkloadCert), string(ai.ExternalWorkloadKey)) + return nil +} + +func formatCEW(cew ciliumv2.CiliumExternalWorkload) string { + var items []string + ip := cew.Status.IP + if ip == "" { + ip = "N/A" + } + items = append(items, fmt.Sprintf("IP: %s", ip)) + var labels []string + for key, value := range cew.Labels { + labels = append(labels, fmt.Sprintf("%s=%s", key, value)) + } + items = append(items, fmt.Sprintf("Labels: %s", strings.Join(labels, ","))) + return strings.Join(items, ", ") +} + +func (k *K8sClusterMesh) ExternalWorkloadStatus(ctx context.Context, names []string) error { + log := true + + collector, err := status.NewK8sStatusCollector(ctx, k.client, status.K8sStatusParameters{ + Namespace: k.params.Namespace, + }) + if err != nil { + return fmt.Errorf("unable to create client to collect status: %w", err) + } + + k.statusCollector = collector + + ctx, cancel := context.WithTimeout(ctx, k.params.waitTimeout()) + defer cancel() + + ai, err := k.statusAccessInformation(ctx, log) + if err != nil { + return err + } + + if log { + k.Log("✅ Cluster access information is available:") + for _, ip := range ai.ServiceIPs { + k.Log(" - %s:%d", ip, ai.ServicePort) + } + } + + svc, err := k.statusService(ctx, log) + if err != nil { + return err + } + + if log { + k.Log("✅ Service %q of type %q found", defaults.ClusterMeshServiceName, svc.Spec.Type) + } + + if svc.Spec.Type == corev1.ServiceTypeLoadBalancer { + if len(ai.ServiceIPs) == 0 { + if log { + k.Log("❌ Service is of type LoadBalancer but has no IPs assigned") + } + return fmt.Errorf("no IP available to reach cluster") + } + } + var cews []ciliumv2.CiliumExternalWorkload + + if len(names) == 0 { + cewList, err := k.client.ListCiliumExternalWorkloads(ctx, metav1.ListOptions{}) + if err != nil { + return err + } + cews = cewList.Items + if log { + if len(cews) == 0 { + k.Log("⚠️ No external workloads found.") + return nil + } + } + } else { + for _, name := range names { + cew, err := k.client.GetCiliumExternalWorkload(ctx, name, metav1.GetOptions{}) + if err != nil { + return err + } + cews = append(cews, *cew) + } + } + + var buf bytes.Buffer + w := tabwriter.NewWriter(&buf, 0, 0, 4, ' ', 0) + + header := "External Workloads" + for _, cew := range cews { + fmt.Fprintf(w, "%s\t%s\t%s\n", header, cew.Name, formatCEW(cew)) + header = "" + } + + w.Flush() + fmt.Println(buf.String()) + return err +} diff --git a/internal/cli/cmd/clustermesh.go b/internal/cli/cmd/clustermesh.go index 2006445e48..838eb3550c 100644 --- a/internal/cli/cmd/clustermesh.go +++ b/internal/cli/cmd/clustermesh.go @@ -16,9 +16,13 @@ package cmd import ( "context" + "io" "os" + "strings" "time" + k8sConst "github.com/cilium/cilium/pkg/k8s/apis/cilium.io" + "github.com/cilium/cilium-cli/clustermesh" "github.com/spf13/cobra" @@ -37,6 +41,7 @@ func newCmdClusterMesh() *cobra.Command { newCmdClusterMeshConnect(), newCmdClusterMeshDisconnect(), newCmdClusterMeshStatus(), + newCmdClusterMeshExternalWorkload(), ) return cmd @@ -171,3 +176,161 @@ func newCmdClusterMeshStatus() *cobra.Command { return cmd } + +func newCmdClusterMeshExternalWorkload() *cobra.Command { + cmd := &cobra.Command{ + Use: "external-workload", + Aliases: []string{"vm"}, + Short: "External Workload Management", + Long: ``, + } + + cmd.AddCommand( + newCmdExternalWorkloadCreate(), + newCmdExternalWorkloadDelete(), + newCmdExternalWorkloadInstall(), + newCmdExternalWorkloadStatus(), + ) + + return cmd +} + +func parseLabels(labels string) map[string]string { + res := make(map[string]string) + for _, str := range strings.Split(labels, ",") { + str = strings.TrimSpace(str) + i := strings.IndexByte(str, '=') + if i < 0 { + res[str] = "" + } else { + res[str[:i]] = str[i+1:] + } + } + return res +} + +func newCmdExternalWorkloadCreate() *cobra.Command { + var params = clustermesh.Parameters{ + Writer: os.Stderr, + } + var labels string + var namespace string + + cmd := &cobra.Command{ + Use: "create ", + Short: "Create new external workloads", + Long: ``, + RunE: func(cmd *cobra.Command, args []string) error { + if labels != "" { + params.Labels = parseLabels(labels) + } + if namespace != "" { + if params.Labels == nil { + params.Labels = make(map[string]string) + } + params.Labels[k8sConst.PodNamespaceLabel] = namespace + } + cm := clustermesh.NewK8sClusterMesh(k8sClient, params) + if err := cm.CreateExternalWorkload(context.Background(), args); err != nil { + fatalf("Unable to add external workloads: %s", err) + } + return nil + }, + } + + cmd.Flags().StringVar(&contextName, "context", "", "Kubernetes configuration context") + cmd.Flags().StringVarP(&namespace, "namespace", "n", "default", "Namespace for external workload labels") + cmd.Flags().StringVar(&labels, "labels", "", "Comma separated list of labels for the external workload identity") + cmd.Flags().StringVar(¶ms.IPv4AllocCIDR, "ipv4-alloc-cidr", "", "Unique IPv4 CIDR allocated for the external workload") + cmd.Flags().StringVar(¶ms.IPv6AllocCIDR, "ipv6-alloc-cidr", "", "Unique IPv6 CIDR allocated for the external workload") + + return cmd +} + +func newCmdExternalWorkloadDelete() *cobra.Command { + var params = clustermesh.Parameters{ + Writer: os.Stderr, + } + + cmd := &cobra.Command{ + Use: "delete ", + Short: "Delete named external workloads", + Long: ``, + RunE: func(cmd *cobra.Command, args []string) error { + cm := clustermesh.NewK8sClusterMesh(k8sClient, params) + if err := cm.DeleteExternalWorkload(context.Background(), args); err != nil { + fatalf("Unable to remove external workloads: %s", err) + } + return nil + }, + } + + cmd.Flags().StringVar(&contextName, "context", "", "Kubernetes configuration context") + cmd.Flags().BoolVar(¶ms.All, "all", false, "Delete all resources if none are named") + + return cmd +} + +func newCmdExternalWorkloadInstall() *cobra.Command { + var params = clustermesh.Parameters{ + Writer: os.Stderr, + } + + cmd := &cobra.Command{ + Use: "install [output-file]", + Short: "Creates a shell script to install external workloads", + Long: ``, + RunE: func(cmd *cobra.Command, args []string) error { + cm := clustermesh.NewK8sClusterMesh(k8sClient, params) + var writer io.Writer + if len(args) > 0 { + file, err := os.Create(args[0]) + if err != nil { + fatalf("Unable to open file %s: %s", args[0], err) + } + defer func() { + file.Chmod(0775) + file.Close() + }() + writer = file + } else { + writer = os.Stdout + } + if err := cm.WriteExternalWorkloadInstallScript(context.Background(), writer); err != nil { + fatalf("Unable to create external worload install script: %s", err) + } + return nil + }, + } + + cmd.Flags().StringVarP(¶ms.Namespace, "namespace", "n", "kube-system", "Namespace Cilium is running in") + cmd.Flags().StringVar(&contextName, "context", "", "Kubernetes configuration context") + cmd.Flags().BoolVar(¶ms.Wait, "wait", false, "Wait until status is successful") + cmd.Flags().DurationVar(¶ms.WaitDuration, "wait-duration", 15*time.Minute, "Maximum time to wait") + + return cmd +} + +func newCmdExternalWorkloadStatus() *cobra.Command { + var params = clustermesh.Parameters{ + Writer: os.Stdout, + } + + cmd := &cobra.Command{ + Use: "status [name...]", + Short: "Show status of external workloads", + Long: ``, + RunE: func(cmd *cobra.Command, args []string) error { + cm := clustermesh.NewK8sClusterMesh(k8sClient, params) + if err := cm.ExternalWorkloadStatus(context.Background(), args); err != nil { + fatalf("Unable to determine status: %s", err) + } + return nil + }, + } + + cmd.Flags().StringVarP(¶ms.Namespace, "namespace", "n", "kube-system", "Namespace Cilium is running in") + cmd.Flags().StringVar(&contextName, "context", "", "Kubernetes configuration context") + + return cmd +} diff --git a/internal/k8s/client.go b/internal/k8s/client.go index 43f3ef85f3..783273d1e6 100644 --- a/internal/k8s/client.go +++ b/internal/k8s/client.go @@ -403,3 +403,19 @@ func (c *Client) ListCiliumEndpoints(ctx context.Context, namespace string, opti func (c *Client) ListNodes(ctx context.Context, options metav1.ListOptions) (*corev1.NodeList, error) { return c.Clientset.CoreV1().Nodes().List(ctx, options) } + +func (c *Client) ListCiliumExternalWorkloads(ctx context.Context, opts metav1.ListOptions) (*ciliumv2.CiliumExternalWorkloadList, error) { + return c.CiliumClientset.CiliumV2().CiliumExternalWorkloads().List(ctx, opts) +} + +func (c *Client) GetCiliumExternalWorkload(ctx context.Context, name string, opts metav1.GetOptions) (*ciliumv2.CiliumExternalWorkload, error) { + return c.CiliumClientset.CiliumV2().CiliumExternalWorkloads().Get(ctx, name, opts) +} + +func (c *Client) CreateCiliumExternalWorkload(ctx context.Context, cew *ciliumv2.CiliumExternalWorkload, opts metav1.CreateOptions) (*ciliumv2.CiliumExternalWorkload, error) { + return c.CiliumClientset.CiliumV2().CiliumExternalWorkloads().Create(ctx, cew, opts) +} + +func (c *Client) DeleteCiliumExternalWorkload(ctx context.Context, name string, opts metav1.DeleteOptions) error { + return c.CiliumClientset.CiliumV2().CiliumExternalWorkloads().Delete(ctx, name, opts) +} From b3f7f03a3d73d606c9b3bd30a5d4020abd0dbd20 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 19 Mar 2021 10:42:48 -0700 Subject: [PATCH 09/11] clustermesh: Fail external workload install generation if Cilium has tunneling disabled As of now external workload installs rely on vxlan tunneling. Fail the install script generation if Cilium has tunneling disabled of not set to vxlan. In future consider testing with geneve and non-tunneled datapaths. Signed-off-by: Jarno Rajahalme --- clustermesh/clustermesh.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clustermesh/clustermesh.go b/clustermesh/clustermesh.go index 270395e6cf..7c8d4c3cc7 100644 --- a/clustermesh/clustermesh.go +++ b/clustermesh/clustermesh.go @@ -46,6 +46,7 @@ import ( const ( configNameClusterID = "cluster-id" configNameClusterName = "cluster-name" + configNameTunnel = "tunnel" caSuffix = ".etcd-client-ca.crt" keySuffix = ".etcd-client.key" @@ -591,6 +592,7 @@ type accessInformation struct { ClientKey []byte ExternalWorkloadCert []byte ExternalWorkloadKey []byte + Tunnel string } func (ai *accessInformation) etcdConfiguration() string { @@ -676,6 +678,7 @@ func (k *K8sClusterMesh) extractAccessInformation(ctx context.Context, client k8 ExternalWorkloadKey: externalWorkloadKey, ExternalWorkloadCert: externalWorkloadCert, ServiceIPs: []string{}, + Tunnel: cm.Data[configNameTunnel], } switch { @@ -1454,6 +1457,10 @@ func (k *K8sClusterMesh) WriteExternalWorkloadInstallScript(ctx context.Context, if err != nil { return err } + if ai.Tunnel != "" && ai.Tunnel != "vxlan" { + return fmt.Errorf("Cilium datapath not using vxlan, please install Cilium with '--config tunnel=vxlan'") + } + clusterAddr := fmt.Sprintf("%s:%d", ai.ServiceIPs[0], ai.ServicePort) k.Log("✅ Using clustermesh-apiserver service address: %s", clusterAddr) From baaa86eb6b88b4030a8936d603b434343cb45700 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 19 Mar 2021 14:39:05 -0700 Subject: [PATCH 10/11] clustermesh: Allow VM install script to run as root, do not assume syslog Define $SUDO as an empty string if running as root. Use 'local' docker log driver to not depend on syslog. Signed-off-by: Jarno Rajahalme --- clustermesh/clustermesh.go | 64 +++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/clustermesh/clustermesh.go b/clustermesh/clustermesh.go index 7c8d4c3cc7..633fa571bc 100644 --- a/clustermesh/clustermesh.go +++ b/clustermesh/clustermesh.go @@ -1280,29 +1280,35 @@ CLUSTER_ADDR=${2:-%[2]s} set -e shopt -s extglob +# Run without sudo if not available (e.g., running as root) +SUDO= +if [ ! "$(whoami)" = "root" ] ; then + SUDO=sudo +fi + if [ "$1" = "uninstall" ] ; then - if [ -n "$(sudo docker ps -a -q -f name=cilium)" ]; then + if [ -n "$(${SUDO} docker ps -a -q -f name=cilium)" ]; then echo "Shutting down running Cilium agent" - sudo docker rm -f cilium || true + ${SUDO} docker rm -f cilium || true fi if [ -f /usr/bin/cilium ] ; then echo "Removing /usr/bin/cilium" - sudo rm /usr/bin/cilium + ${SUDO} rm /usr/bin/cilium fi pushd /etc if [ -f resolv.conf.orig ] ; then echo "Restoring /etc/resolv.conf" - sudo mv -f resolv.conf.orig resolv.conf + ${SUDO} mv -f resolv.conf.orig resolv.conf elif [ -f resolv.conf.link ] && [ -f $(cat resolv.conf.link) ] ; then echo "Restoring systemd resolved config..." if [ -f /usr/lib/systemd/resolved.conf.d/cilium-kube-dns.conf ] ; then - sudo rm /usr/lib/systemd/resolved.conf.d/cilium-kube-dns.conf + ${SUDO} rm /usr/lib/systemd/resolved.conf.d/cilium-kube-dns.conf fi - sudo systemctl daemon-reload - sudo systemctl reenable systemd-resolved.service - sudo service systemd-resolved restart - sudo ln -fs $(cat resolv.conf.link) resolv.conf - sudo rm resolv.conf.link + ${SUDO} systemctl daemon-reload + ${SUDO} systemctl reenable systemd-resolved.service + ${SUDO} service systemd-resolved restart + ${SUDO} ln -fs $(cat resolv.conf.link) resolv.conf + ${SUDO} rm resolv.conf.link fi popd exit 0 @@ -1338,14 +1344,14 @@ case "$CLUSTER_ADDR" in ;; esac -sudo mkdir -p /var/lib/cilium/etcd -sudo tee /var/lib/cilium/etcd/ca.crt </dev/null +${SUDO} mkdir -p /var/lib/cilium/etcd +${SUDO} tee /var/lib/cilium/etcd/ca.crt </dev/null %[3]sEOF -sudo tee /var/lib/cilium/etcd/tls.crt </dev/null +${SUDO} tee /var/lib/cilium/etcd/tls.crt </dev/null %[4]sEOF -sudo tee /var/lib/cilium/etcd/tls.key </dev/null +${SUDO} tee /var/lib/cilium/etcd/tls.key </dev/null %[5]sEOF -sudo tee /var/lib/cilium/etcd/config.yaml </dev/null +${SUDO} tee /var/lib/cilium/etcd/config.yaml </dev/null --- trusted-ca-file: /var/lib/cilium/etcd/ca.crt cert-file: /var/lib/cilium/etcd/tls.crt @@ -1363,7 +1369,7 @@ if [ -n "$DEBUG" ] ; then CILIUM_OPTS+=" --debug --restore=false" fi -DOCKER_OPTS=" -d --log-driver syslog --restart always" +DOCKER_OPTS=" -d --log-driver local --restart always" DOCKER_OPTS+=" --privileged --network host --cap-add NET_ADMIN --cap-add SYS_MODULE" DOCKER_OPTS+=" --volume /var/lib/cilium/etcd:/var/lib/cilium/etcd" DOCKER_OPTS+=" --volume /var/run/cilium:/var/run/cilium" @@ -1373,16 +1379,16 @@ DOCKER_OPTS+=" --volume /sys/fs/bpf:/sys/fs/bpf" DOCKER_OPTS+=" --volume /run/xtables.lock:/run/xtables.lock" DOCKER_OPTS+=" --add-host clustermesh-apiserver.cilium.io:$CLUSTER_IP" -if [ -n "$(sudo docker ps -a -q -f name=cilium)" ]; then +if [ -n "$(${SUDO} docker ps -a -q -f name=cilium)" ]; then echo "Shutting down running Cilium agent" - sudo docker rm -f cilium || true + ${SUDO} docker rm -f cilium || true fi echo "Launching Cilium agent $CILIUM_IMAGE..." -sudo docker run --name cilium $DOCKER_OPTS $CILIUM_IMAGE cilium-agent $CILIUM_OPTS +${SUDO} docker run --name cilium $DOCKER_OPTS $CILIUM_IMAGE cilium-agent $CILIUM_OPTS # Copy Cilium CLI -sudo docker cp cilium:/usr/bin/cilium /usr/bin/cilium +${SUDO} docker cp cilium:/usr/bin/cilium /usr/bin/cilium # Wait for cilium agent to become available cilium_started=false @@ -1417,24 +1423,24 @@ if [ -n "$kubedns" ] ; then if grep "nameserver $kubedns" /etc/resolv.conf ; then echo "kube-dns IP $kubedns already in /etc/resolv.conf" else - linkval=$(readlink /etc/resolv.conf) && echo "$linkval" | sudo tee /etc/resolv.conf.link || true + linkval=$(readlink /etc/resolv.conf) && echo "$linkval" | ${SUDO} tee /etc/resolv.conf.link || true if [[ "$linkval" == *"/systemd/"* ]] ; then echo "updating systemd resolved with kube-dns IP $kubedns" - sudo mkdir -p /usr/lib/systemd/resolved.conf.d - sudo tee /usr/lib/systemd/resolved.conf.d/cilium-kube-dns.conf </dev/null + ${SUDO} mkdir -p /usr/lib/systemd/resolved.conf.d + ${SUDO} tee /usr/lib/systemd/resolved.conf.d/cilium-kube-dns.conf </dev/null # This file is installed by Cilium to use kube dns server from a non-k8s node. [Resolve] DNS=$kubedns EOF - sudo systemctl daemon-reload - sudo systemctl reenable systemd-resolved.service - sudo service systemd-resolved restart - sudo ln -fs /run/systemd/resolve/resolv.conf /etc/resolv.conf + ${SUDO} systemctl daemon-reload + ${SUDO} systemctl reenable systemd-resolved.service + ${SUDO} service systemd-resolved restart + ${SUDO} ln -fs /run/systemd/resolve/resolv.conf /etc/resolv.conf else echo "Adding kube-dns IP $kubedns to /etc/resolv.conf" - sudo cp /etc/resolv.conf /etc/resolv.conf.orig + ${SUDO} cp /etc/resolv.conf /etc/resolv.conf.orig resolvconf="nameserver $kubedns\n$(cat /etc/resolv.conf)\n" - printf "$resolvconf" | sudo tee /etc/resolv.conf + printf "$resolvconf" | ${SUDO} tee /etc/resolv.conf fi fi else From 98283cd7a00fc61cc9290fa8019c5a5ab183b200 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 22 Mar 2021 08:23:48 -0700 Subject: [PATCH 11/11] clustermesh: Fix error capitalization and typos 'make staticcheck' does not allow error messages starting with a capital letter, so do not use 'Cilium' to start an error message. Correctly spell 'DaemonSet' in error messages. Signed-off-by: Jarno Rajahalme --- clustermesh/clustermesh.go | 4 ++-- status/k8s.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clustermesh/clustermesh.go b/clustermesh/clustermesh.go index 633fa571bc..dd98ebfb57 100644 --- a/clustermesh/clustermesh.go +++ b/clustermesh/clustermesh.go @@ -1455,7 +1455,7 @@ func (k *K8sClusterMesh) WriteExternalWorkloadInstallScript(ctx context.Context, return err } if daemonSet == nil { - return fmt.Errorf("DaemomSet %s is not available", defaults.AgentDaemonSetName) + return fmt.Errorf("DaemonSet %s is not available", defaults.AgentDaemonSetName) } k.Log("✅ Using image from Cilium DaemonSet: %s", daemonSet.Spec.Template.Spec.Containers[0].Image) @@ -1464,7 +1464,7 @@ func (k *K8sClusterMesh) WriteExternalWorkloadInstallScript(ctx context.Context, return err } if ai.Tunnel != "" && ai.Tunnel != "vxlan" { - return fmt.Errorf("Cilium datapath not using vxlan, please install Cilium with '--config tunnel=vxlan'") + return fmt.Errorf("datapath not using vxlan, please install Cilium with '--config tunnel=vxlan'") } clusterAddr := fmt.Sprintf("%s:%d", ai.ServiceIPs[0], ai.ServicePort) diff --git a/status/k8s.go b/status/k8s.go index 26a85c421b..4847b37211 100644 --- a/status/k8s.go +++ b/status/k8s.go @@ -164,7 +164,7 @@ func (k *K8sStatusCollector) daemonSetStatus(ctx context.Context, status *Status } if daemonSet == nil { - return fmt.Errorf("DaemomSet %s is not available", name) + return fmt.Errorf("DaemonSet %s is not available", name) } stateCount := PodStateCount{Type: "DaemonSet"}