cilium · aanm · Jul 1, 2021 · Jun 24, 2021 · Jun 22, 2021 · Jun 23, 2021
diff --git a/.github/workflows/conformance-aks.yaml b/.github/workflows/conformance-aks.yaml
@@ -143,21 +143,43 @@ jobs:
             az account show
 
       - name: Create AKS cluster
+        id: cluster-creation
         run: |
+          # Create group
           az group create \
             --name ${{ env.name }} \
             --location ${{ env.location }} \
             --tags usage=${{ github.repository_owner }}-${{ github.event.repository.name }} owner=${{ steps.vars.outputs.owner }}
+
+          # Create cluster with a 1 node-count (we will remove this node pool
+          # afterwards)
+          # Details: Basic load balancers are not supported with multiple node
+          # pools. Create a cluster with standard load balancer selected to use
+          # multiple node pools, learn more at https://aka.ms/aks/nodepools.
           az aks create \
             --resource-group ${{ env.name }} \
             --name ${{ env.name }} \
             --location ${{ env.location }} \
             --network-plugin azure \
+            --node-count 1 \
+            --load-balancer-sku standard \
+            --generate-ssh-keys
+
+          # Get the name of the node pool that we will delete afterwards
+          echo ::set-output name=nodepool_to_delete::$(az aks nodepool list --cluster-name ${{ env.name }} -g ${{ env.name }} -o json | jq -r '.[0].name')
+
+          # Create a node pool with the taint 'node.cilium.io/agent-not-ready=true:NoSchedule'
+          # and with 'mode=system' as it it the same mode used for the nodepool
+          # created with the cluster.
+          az aks nodepool add \
+            --name nodepool2 \
+            --cluster-name ${{ env.name }} \
+            --resource-group ${{ env.name }} \
             --node-count 2 \
             --node-vm-size Standard_B2s \
             --node-osdisk-size 30 \
-            --load-balancer-sku basic \
-            --generate-ssh-keys
+            --mode system \
+            --node-taints node.cilium.io/agent-not-ready=true:NoSchedule
 
       - name: Get cluster credentials
         run: |
@@ -176,6 +198,17 @@ jobs:
         run: |
           cilium install ${{ steps.vars.outputs.cilium_install_defaults }}
 
+      - name: Delete the first node pool
+        run: |
+          # We can only delete the first node pool after Cilium is installed
+          # because some pods have Pod Disruption Budgets set. If we try to
+          # delete the first node pool without the second node pool being ready,
+          # AKS will not succeed with the pool deletion because some Deployments
+          # can't cease to exist in the cluster.
+          az aks nodepool delete --name ${{ steps.cluster-creation.outputs.nodepool_to_delete }} \
+            --cluster-name ${{ env.name }} \
+            --resource-group ${{ env.name }}
+
       - name: Enable Relay
         run: |
           cilium hubble enable ${{ steps.vars.outputs.hubble_enable_defaults }}

diff --git a/.github/workflows/conformance-aws-cni.yaml b/.github/workflows/conformance-aws-cni.yaml
@@ -138,10 +138,34 @@ jobs:
 
       - name: Create EKS cluster without nodegroup
         run: |
-          eksctl create cluster \
-            --name ${{ env.clusterName }} \
-            --tags "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
-            --without-nodegroup
+          cat <<EOF > eks-config.yaml
+            apiVersion: eksctl.io/v1alpha5
+            kind: ClusterConfig
+
+            metadata:
+              name: ${{ env.clusterName }}
+              region: ${{ env.region }}
+              tags:
+               usage: "${{ github.repository_owner }}-${{ github.event.repository.name }}"
+               owner: "${{ steps.vars.outputs.owner }}"
+
+            managedNodeGroups:
+            - name: ng-1
+              instanceTypes:
+               - t3.medium
+               - t3a.medium
+              desiredCapacity: 2
+              spot: true
+              privateNetworking: true
+              volumeType: "gp3"
+              volumeSize: 10
+              taints:
+               - key: "node.cilium.io/agent-not-ready"
+                 value: "true"
+                 effect: "NoSchedule"
+          EOF
+
+          eksctl create cluster -f ./eks-config.yaml
 
       - name: Update AWS VPC CNI plugin
         run: |
@@ -173,18 +197,6 @@ jobs:
             --set bpf.monitorAggregation=none \
             --set bandwidthManager=false
 
-      - name: Add managed spot nodegroup
-        run: |
-          eksctl create nodegroup \
-            --cluster ${{ env.clusterName }} \
-            --nodes 2 \
-            --instance-types "t3.medium,t3a.medium" \
-            --node-volume-type gp3 \
-            --node-volume-size 10 \
-            --managed \
-            --spot \
-            --node-private-networking
-
       - name: Enable Relay
         run: |
           cd cilium-master/install/kubernetes

diff --git a/.github/workflows/conformance-eks.yaml b/.github/workflows/conformance-eks.yaml
@@ -141,10 +141,34 @@ jobs:
 
       - name: Create EKS cluster without nodegroup
         run: |
-          eksctl create cluster \
-            --name ${{ env.clusterName }} \
-            --tags "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
-            --without-nodegroup
+          cat <<EOF > eks-config.yaml
+          apiVersion: eksctl.io/v1alpha5
+          kind: ClusterConfig
+
+          metadata:
+            name: ${{ env.clusterName }}
+            region: ${{ env.region }}
+            tags:
+             usage: "${{ github.repository_owner }}-${{ github.event.repository.name }}"
+             owner: "${{ steps.vars.outputs.owner }}"
+
+          managedNodeGroups:
+          - name: ng-1
+            instanceTypes:
+             - t3.medium
+             - t3a.medium
+            desiredCapacity: 2
+            spot: true
+            privateNetworking: true
+            volumeType: "gp3"
+            volumeSize: 10
+            taints:
+             - key: "node.cilium.io/agent-not-ready"
+               value: "true"
+               effect: "NoSchedule"
+          EOF
+
+          eksctl create cluster -f ./eks-config.yaml
 
       - name: Wait for images to be available
         timeout-minutes: 10
@@ -157,18 +181,6 @@ jobs:
         run: |
           cilium install ${{ steps.vars.outputs.cilium_install_defaults }}
 
-      - name: Add managed spot nodegroup
-        run: |
-          eksctl create nodegroup \
-            --cluster ${{ env.clusterName }} \
-            --nodes 2 \
-            --instance-types "t3.medium,t3a.medium" \
-            --node-volume-type gp3 \
-            --node-volume-size 10 \
-            --managed \
-            --spot \
-            --node-private-networking
-
       - name: Enable Relay
         run: |
           cilium hubble enable ${{ steps.vars.outputs.hubble_enable_defaults }}

diff --git a/.github/workflows/conformance-gke.yaml b/.github/workflows/conformance-gke.yaml
@@ -147,6 +147,7 @@ jobs:
             --machine-type e2-custom-2-4096 \
             --disk-type pd-standard \
             --disk-size 10GB \
+            --node-taints node.cilium.io/agent-not-ready=true:NoSchedule \
             --preemptible
 
       - name: Get cluster credentials

diff --git a/.github/workflows/conformance-multicluster.yaml b/.github/workflows/conformance-multicluster.yaml
@@ -147,6 +147,7 @@ jobs:
             --machine-type e2-custom-2-4096 \
             --disk-type pd-standard \
             --disk-size 10GB \
+            --node-taints node.cilium.io/agent-not-ready=true:NoSchedule \
             --preemptible
 
       - name: Create GKE cluster 2
@@ -159,6 +160,7 @@ jobs:
             --machine-type e2-custom-2-4096 \
             --disk-type pd-standard \
             --disk-size 10GB \
+            --node-taints node.cilium.io/agent-not-ready=true:NoSchedule \
             --preemptible
 
       - name: Get cluster credentials and setup contexts

diff --git a/Documentation/gettingstarted/k8s-install-default.rst b/Documentation/gettingstarted/k8s-install-default.rst
@@ -39,10 +39,14 @@ to create a Kubernetes cluster locally or using a managed Kubernetes service:
        for instructions on how to install ``gcloud`` and prepare your
        account.
 
-       .. code-block:: shell-session
+       .. code-block:: bash
 
            export NAME="$(whoami)-$RANDOM"
-           gcloud container clusters create "${NAME}" --zone us-west2-a 
+           # Create the node pool with the following taint to guarantee that
+           # Pods are only scheduled in the node when Cilium is ready.
+           gcloud container clusters create "${NAME}" \
+            --node-taints node.cilium.io/agent-not-ready=true:NoSchedule \
+            --zone us-west2-a
            gcloud container clusters get-credentials "${NAME}" --zone us-west2-a
 
     .. group-tab:: AKS
@@ -53,14 +57,52 @@ to create a Kubernetes cluster locally or using a managed Kubernetes service:
        <https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest>`_
        for instructions on how to install ``az`` and prepare your account.
 
-       .. code-block:: shell-session
+       .. code-block:: bash
 
            export NAME="$(whoami)-$RANDOM"
            export AZURE_RESOURCE_GROUP="aks-cilium-group"
            az group create --name "${AZURE_RESOURCE_GROUP}" -l westus2
-           az aks create --resource-group "${AZURE_RESOURCE_GROUP}" --name "${NAME}" --network-plugin azure
+
+           # Details: Basic load balancers are not supported with multiple node
+           # pools. Create a cluster with standard load balancer selected to use
+           # multiple node pools, learn more at aka.ms/aks/nodepools.
+           az aks create \
+           --resource-group "${AZURE_RESOURCE_GROUP}" \
+           --name "${NAME}" \
+           --network-plugin azure \
+           --load-balancer-sku standard
+
+           # Get the name of the node pool that was just created since it will
+           # be deleted after Cilium is installed.
+           nodepool_to_delete=$(az aks nodepool list --cluster-name "${NAME}" -g "${AZURE_RESOURCE_GROUP}" -o json | jq -r '.[0].name')
+
+           # Create a node pool with 'mode=system' as it is the same mode used
+           # for the default nodepool on cluster creation also this new node
+           # pool will have the taint 'node.cilium.io/agent-not-ready=true:NoSchedule'
+           # which will guarantee that pods will only be scheduled on that node
+           # once Cilium is ready.
+           az aks nodepool add \
+             --name "nodepool2" \
+             --cluster-name "${NAME}" \
+             --resource-group "${AZURE_RESOURCE_GROUP}" \
+             --node-count 2 \
+             --mode system \
+             --node-taints node.cilium.io/agent-not-ready=true:NoSchedule
+
+           # Get the credentials to access the cluster with kubectl
            az aks get-credentials --name "${NAME}" --resource-group "${AZURE_RESOURCE_GROUP}"
 
+           # We can only delete the first node pool after Cilium is installed
+           # because some pods have Pod Disruption Budgets set. If we try to
+           # delete the first node pool without the second node pool being ready,
+           # AKS will not succeed with the pool deletion because some Deployments
+           # can't cease to exist in the cluster.
+           #
+           # NOTE: Only delete the nodepool after deploying Cilium
+           az aks nodepool delete --name ${nodepool_to_delete} \
+             --cluster-name "${NAME}" \
+             --resource-group "${AZURE_RESOURCE_GROUP}"
+
        .. attention::
 
            Do NOT specify the ``--network-policy`` flag when creating the
@@ -78,7 +120,26 @@ to create a Kubernetes cluster locally or using a managed Kubernetes service:
        .. code-block:: shell-session
 
            export NAME="$(whoami)-$RANDOM"
-           eksctl create cluster --name "${NAME}" --region eu-west-1 --without-nodegroup
+           cat <<EOF >eks-config.yaml
+           apiVersion: eksctl.io/v1alpha5
+           kind: ClusterConfig
+
+           metadata:
+             name: ${NAME}
+             region: eu-west-1
+
+           managedNodeGroups:
+           - name: ng-1
+             desiredCapacity: 2
+             privateNetworking: true
+             # taint nodes so that application pods are
+             # not scheduled until Cilium is deployed.
+             taints:
+              - key: "node.cilium.io/agent-not-ready"
+                value: "true"
+                effect: "NoSchedule"
+           EOF
+           eksctl create cluster -f ./eks-config.yaml
 
     .. group-tab:: kind
 
@@ -165,14 +226,11 @@ You can install Cilium on any Kubernetes cluster. Pick one of the options below:
 
        **Install Cilium:**
 
-       Install Cilium into the EKS cluster. Set ``--wait=false`` as no nodes
-       exist yet. Then scale up the number of nodes and wait for Cilium to
-       bootstrap successfully.
+       Install Cilium into the EKS cluster.
 
        .. code-block:: shell-session
 
-           cilium install --wait=false
-           eksctl create nodegroup --cluster "${NAME}" --region eu-west-1 --nodes 2 
+           cilium install
            cilium status --wait
 
     .. group-tab:: OpenShift

diff --git a/Documentation/gettingstarted/k8s-install-helm.rst b/Documentation/gettingstarted/k8s-install-helm.rst
@@ -190,10 +190,6 @@ Install Cilium
 
        Cilium is now deployed and you are ready to scale-up the cluster:
 
-       .. code-block:: shell-session
-
-          eksctl create nodegroup --cluster test-cluster --nodes 2
-
     .. group-tab:: OpenShift
 
        .. include:: requirements-openshift.rst

diff --git a/Documentation/gettingstarted/k8s-install-restart-pods.rst b/Documentation/gettingstarted/k8s-install-restart-pods.rst
@@ -1,12 +1,13 @@
 Restart unmanaged Pods
 ======================
 
-If you did not use the ``nodeinit.restartPods=true`` in the Helm options when
-deploying Cilium, then unmanaged pods need to be restarted manually.  Restart
-all already running pods which are not running in host-networking mode to
-ensure that Cilium starts managing them. This is required to ensure that all
-pods which have been running before Cilium was deployed have network
-connectivity provided by Cilium and NetworkPolicy applies to them:
+If you did not create a cluster with the nodes tainted with the taint
+``node.cilium.io/agent-not-ready``, then unmanaged pods need to be restarted
+manually. Restart all already running pods which are not running in
+host-networking mode to ensure that Cilium starts managing them. This is
+required to ensure that all pods which have been running before Cilium was
+deployed have network connectivity provided by Cilium and NetworkPolicy applies
+to them:
 
 .. code-block:: shell-session
 

diff --git a/Documentation/gettingstarted/kubeproxy-free.rst b/Documentation/gettingstarted/kubeproxy-free.rst
@@ -636,6 +636,12 @@ As an instance example, ``m5n.xlarge`` is used in the config ``nodegroup-config.
       desiredCapacity: 2
       ssh:
         allow: true
+      # taint nodes so that application pods are
+      # not scheduled until Cilium is deployed.
+      taints:
+        - key: "node.cilium.io/agent-not-ready"
+          value: "true"
+          effect: "NoSchedule"
 
 The nodegroup is created with:
 

diff --git a/Documentation/gettingstarted/requirements-aks.rst b/Documentation/gettingstarted/requirements-aks.rst
@@ -20,6 +20,9 @@ Direct Routing  Azure IPAM          Kubernetes CRD
   compatibility with Cilium. The Azure network plugin will be replaced with
   Cilium by the installer.
 
+* Node pools must also be created with the taint ``node.cilium.io/agent-not-ready=true:NoSchedule``
+  using ``--node-taints`` option.
+
 **Limitations:**
 
 * All VMs and VM scale sets used in a cluster must belong to the same resource