Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pod MTU #2791

Merged
merged 1 commit into from
Feb 14, 2024
Merged

Pod MTU #2791

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,15 @@ Default: empty
Specify a comma-separated list of IPv4 CIDRs to exclude from SNAT. For every item in the list an `iptables` rule and off\-VPC
IP rule will be applied. If an item is not a valid ipv4 range it will be skipped. This should be used when `AWS_VPC_K8S_CNI_EXTERNALSNAT=false`.

#### `POD_MTU` (v1.x.x+)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Placeholder version


Type: Integer as a String

*Note*: The default value is set to AWS_VPC_ENI_MTU, which defaults to 9001 if unset.
Default: 9001
jchen6585 marked this conversation as resolved.
Show resolved Hide resolved

Used to configure the MTU size for pod virtual interfaces. The valid range is from `576` to `9001`.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

576 is only valid for IPv4, otherwise the minimum MTU has to be 1280 (IPv6)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call @archoversight. @jchen6585 we should update the README for AWS_VPC_ENI_MTU and POD_MTU at the very least, and we can consider adding runtime logging to error if IPv6 is configured and the MTU is < 1280


#### `WARM_ENI_TARGET`

Type: Integer as a String
Expand Down
8 changes: 6 additions & 2 deletions cmd/aws-vpc-cni/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ const (
envHostCniConfDirPath = "HOST_CNI_CONFDIR_PATH"
envVethPrefix = "AWS_VPC_K8S_CNI_VETHPREFIX"
envEniMTU = "AWS_VPC_ENI_MTU"
envPodMTU = "POD_MTU"
envEnablePodEni = "ENABLE_POD_ENI"
envPodSGEnforcingMode = "POD_SECURITY_GROUP_ENFORCING_MODE"
envPluginLogFile = "AWS_VPC_K8S_PLUGIN_LOG_FILE"
Expand Down Expand Up @@ -278,15 +279,18 @@ func generateJSON(jsonFile string, outFile string, getPrimaryIP func(ipv4 bool)
}
}
vethPrefix := utils.GetEnv(envVethPrefix, defaultVethPrefix)
mtu := utils.GetEnv(envEniMTU, defaultMTU)
// Derive pod MTU from ENI MTU by default
eniMTU := utils.GetEnv(envEniMTU, defaultMTU)
// If pod MTU environment variable is set, overwrite ENI MTU.
podMTU := utils.GetEnv(envPodMTU, eniMTU)
podSGEnforcingMode := utils.GetEnv(envPodSGEnforcingMode, defaultPodSGEnforcingMode)
pluginLogFile := utils.GetEnv(envPluginLogFile, defaultPluginLogFile)
pluginLogLevel := utils.GetEnv(envPluginLogLevel, defaultPluginLogLevel)
randomizeSNAT := utils.GetEnv(envRandomizeSNAT, defaultRandomizeSNAT)

netconf := string(byteValue)
netconf = strings.Replace(netconf, "__VETHPREFIX__", vethPrefix, -1)
netconf = strings.Replace(netconf, "__MTU__", mtu, -1)
netconf = strings.Replace(netconf, "__MTU__", podMTU, -1)
netconf = strings.Replace(netconf, "__PODSGENFORCINGMODE__", podSGEnforcingMode, -1)
netconf = strings.Replace(netconf, "__PLUGINLOGFILE__", pluginLogFile, -1)
netconf = strings.Replace(netconf, "__PLUGINLOGLEVEL__", pluginLogLevel, -1)
Expand Down
121 changes: 72 additions & 49 deletions test/integration/cni/host_networking_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,11 @@ import (
"strconv"
"time"

v1 "k8s.io/api/core/v1"

"github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest"
k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/utils"
"github.com/aws/amazon-vpc-cni-k8s/test/integration/common"
v1 "k8s.io/api/core/v1"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
Expand All @@ -31,13 +30,15 @@ import (
// TODO: Instead of passing the list of pods to the test helper, have the test helper get the pod on node
const (
NEW_MTU_VAL = 1300
NEW_POD_MTU = 1280
NEW_VETH_PREFIX = "veth"
podLabelKey = "app"
podLabelVal = "host-networking-test"
)

var err error

var _ = Describe("test host networking", func() {
var err error
var podLabelKey = "app"
var podLabelVal = "host-networking-test"

// For host networking tests, increase WARM_IP_TARGET to prevent long IPAMD warmup.
BeforeEach(func() {
Expand All @@ -57,6 +58,10 @@ var _ = Describe("test host networking", func() {
"AWS_VPC_ENI_MTU": DEFAULT_MTU_VAL,
"AWS_VPC_K8S_CNI_VETHPREFIX": DEFAULT_VETH_PREFIX,
})
k8sUtils.RemoveVarFromDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName,
utils.AwsNodeNamespace, utils.AwsNodeName, map[string]struct{}{
"POD_MTU": {},
})
// After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the latest VETH prefix and MTU.
// Otherwise, the stale value can cause failures in future test cases.
time.Sleep(utils.PollIntervalMedium)
Expand Down Expand Up @@ -104,51 +109,13 @@ var _ = Describe("test host networking", func() {
common.ValidateHostNetworking(common.NetworkingTearDownSucceeds, input, primaryNode.Name, f)
})

It("Validate Host Networking setup after changing MTU and Veth Prefix", func() {
deployment := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry).
Replicas(maxIPPerInterface*2).
PodLabel(podLabelKey, podLabelVal).
NodeName(primaryNode.Name).
Build()

By("Configuring Veth Prefix and MTU value on aws-node daemonset")
k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{
"AWS_VPC_ENI_MTU": strconv.Itoa(NEW_MTU_VAL),
"AWS_VPC_K8S_CNI_VETHPREFIX": NEW_VETH_PREFIX,
Context("Validate Host Networking setup after changing Veth Prefix and", func() {
It("ENI MTU", func() {
mtuValidationTest(false, NEW_MTU_VAL)
})
It("POD MTU", func() {
mtuValidationTest(true, NEW_POD_MTU)
})
// After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the new VETH prefix and MTU.
time.Sleep(utils.PollIntervalMedium)

By("creating a deployment to launch pods")
deployment, err = f.K8sResourceManagers.DeploymentManager().
CreateAndWaitTillDeploymentIsReady(deployment, utils.DefaultDeploymentReadyTimeout)
Expect(err).ToNot(HaveOccurred())

By("getting the list of pods using IP from primary and secondary ENI")
interfaceTypeToPodList :=
common.GetPodsOnPrimaryAndSecondaryInterface(primaryNode, podLabelKey, podLabelVal, f)

By("generating the pod networking validation input to be passed to tester")
podNetworkingValidationInput := common.GetPodNetworkingValidationInput(interfaceTypeToPodList, vpcCIDRs)
podNetworkingValidationInput.VethPrefix = NEW_VETH_PREFIX
podNetworkingValidationInput.ValidateMTU = true
podNetworkingValidationInput.MTU = NEW_MTU_VAL
input, err := podNetworkingValidationInput.Serialize()
Expect(err).NotTo(HaveOccurred())

By("validating host networking setup is setup correctly with MTU check as well")
common.ValidateHostNetworking(common.NetworkingSetupSucceeds, input, primaryNode.Name, f)

By("deleting the deployment to test teardown")
err = f.K8sResourceManagers.DeploymentManager().
DeleteAndWaitTillDeploymentIsDeleted(deployment)
Expect(err).ToNot(HaveOccurred())

By("waiting to allow CNI to tear down networking for terminated pods")
time.Sleep(time.Second * 60)

By("validating host networking is teared down correctly")
common.ValidateHostNetworking(common.NetworkingTearDownSucceeds, input, primaryNode.Name, f)
})
})

Expand Down Expand Up @@ -205,3 +172,59 @@ var _ = Describe("test host networking", func() {
})
})
})

func mtuValidationTest(usePodMTU bool, mtuVal int) {
deployment := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry).
Replicas(maxIPPerInterface*2).
PodLabel(podLabelKey, podLabelVal).
NodeName(primaryNode.Name).
Build()

if usePodMTU {
By("Configuring Veth Prefix and Pod MTU value on aws-node daemonset")
k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{
"AWS_VPC_ENI_MTU": strconv.Itoa(NEW_MTU_VAL),
"POD_MTU": strconv.Itoa(NEW_POD_MTU),
"AWS_VPC_K8S_CNI_VETHPREFIX": NEW_VETH_PREFIX,
})
} else {
By("Configuring Veth Prefix and ENI MTU value on aws-node daemonset")
k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{
"AWS_VPC_ENI_MTU": strconv.Itoa(NEW_MTU_VAL),
"AWS_VPC_K8S_CNI_VETHPREFIX": NEW_VETH_PREFIX,
})
}
// After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the new VETH prefix and MTU.
time.Sleep(utils.PollIntervalMedium)

By("creating a deployment to launch pods")
deployment, err = f.K8sResourceManagers.DeploymentManager().
CreateAndWaitTillDeploymentIsReady(deployment, utils.DefaultDeploymentReadyTimeout)
Expect(err).ToNot(HaveOccurred())

By("getting the list of pods using IP from primary and secondary ENI")
interfaceTypeToPodList :=
common.GetPodsOnPrimaryAndSecondaryInterface(primaryNode, podLabelKey, podLabelVal, f)

By("generating the pod networking validation input to be passed to tester")
podNetworkingValidationInput := common.GetPodNetworkingValidationInput(interfaceTypeToPodList, vpcCIDRs)
podNetworkingValidationInput.VethPrefix = NEW_VETH_PREFIX
podNetworkingValidationInput.ValidateMTU = true
podNetworkingValidationInput.MTU = mtuVal
input, err := podNetworkingValidationInput.Serialize()
Expect(err).NotTo(HaveOccurred())

By("validating host networking setup is setup correctly with MTU check as well")
common.ValidateHostNetworking(common.NetworkingSetupSucceeds, input, primaryNode.Name, f)

By("deleting the deployment to test teardown")
err = f.K8sResourceManagers.DeploymentManager().
DeleteAndWaitTillDeploymentIsDeleted(deployment)
Expect(err).ToNot(HaveOccurred())

By("waiting to allow CNI to tear down networking for terminated pods")
time.Sleep(time.Second * 60)

By("validating host networking is teared down correctly")
common.ValidateHostNetworking(common.NetworkingTearDownSucceeds, input, primaryNode.Name, f)
}
119 changes: 72 additions & 47 deletions test/integration/ipv6/ipv6_host_networking_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,23 +41,30 @@ const (
const (
AWS_VPC_ENI_MTU = "AWS_VPC_ENI_MTU"
AWS_VPC_K8S_CNI_VETHPREFIX = "AWS_VPC_K8S_CNI_VETHPREFIX"
POD_MTU = "POD_MTU"
NEW_MTU_VAL = 1300
NEW_POD_MTU = 1280
NEW_VETH_PREFIX = "veth"
DEFAULT_MTU_VAL = "9001"
DEFAULT_VETH_PREFIX = "eni"
podLabelKey = "app"
podLabelVal = "host-networking-test"
)

var err error

var _ = Describe("[CANARY] test ipv6 host netns setup", func() {
var err error
var podLabelKey = "app"
var podLabelVal = "host-networking-test"

Context("when pods using IP from primary ENI are created", func() {
AfterEach(func() {
k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{
AWS_VPC_ENI_MTU: DEFAULT_MTU_VAL,
AWS_VPC_K8S_CNI_VETHPREFIX: DEFAULT_VETH_PREFIX,
})
k8sUtils.RemoveVarFromDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName,
utils.AwsNodeNamespace, utils.AwsNodeName, map[string]struct{}{
"POD_MTU": {},
})
// After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the latest VETH prefix and MTU.
// Otherwise, the stale value can cause failures in future test cases.
time.Sleep(utils.PollIntervalMedium)
Expand Down Expand Up @@ -98,51 +105,13 @@ var _ = Describe("[CANARY] test ipv6 host netns setup", func() {
ValidateHostNetworking(NetworkingTearDownSucceeds, input)
})

It("Validate host netns setup after changing MTU and Veth Prefix", func() {
deployment := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry).
Replicas(2).
PodLabel(podLabelKey, podLabelVal).
NodeName(primaryNode.Name).
Build()

By("Configuring Veth Prefix and MTU value on aws-node daemonset")
k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{
AWS_VPC_ENI_MTU: strconv.Itoa(NEW_MTU_VAL),
AWS_VPC_K8S_CNI_VETHPREFIX: NEW_VETH_PREFIX,
Context("Validate Host Networking setup after changing Veth Prefix and", func() {
It("ENI MTU", func() {
mtuValidationTest(false, NEW_MTU_VAL)
})
It("POD MTU", func() {
mtuValidationTest(true, NEW_POD_MTU)
})
// After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the new VETH prefix and MTU.
time.Sleep(utils.PollIntervalMedium)

By("creating a deployment to launch pods")
deployment, err = f.K8sResourceManagers.DeploymentManager().
CreateAndWaitTillDeploymentIsReady(deployment, utils.DefaultDeploymentReadyTimeout)
Expect(err).ToNot(HaveOccurred())

By("getting the list of pods using IP from primary and secondary ENI")
interfaceTypeToPodList :=
GetIPv6Pods(podLabelKey, podLabelVal)

By("generating the pod networking validation input to be passed to tester")
podNetworkingValidationInput := GetIPv6PodNetworkingValidationInput(interfaceTypeToPodList)
podNetworkingValidationInput.VethPrefix = NEW_VETH_PREFIX
podNetworkingValidationInput.ValidateMTU = true
podNetworkingValidationInput.MTU = NEW_MTU_VAL
input, err := podNetworkingValidationInput.Serialize()
Expect(err).NotTo(HaveOccurred())

By("validating host networking setup is setup correctly with MTU check as well")
ValidateHostNetworking(NetworkingSetupSucceeds, input)

By("deleting the deployment to test teardown")
err = f.K8sResourceManagers.DeploymentManager().
DeleteAndWaitTillDeploymentIsDeleted(deployment)
Expect(err).ToNot(HaveOccurred())

By("waiting to allow CNI to tear down networking for terminated pods")
time.Sleep(time.Second * 60)

By("validating host networking is teared down correctly")
ValidateHostNetworking(NetworkingTearDownSucceeds, input)
})
})

Expand Down Expand Up @@ -277,3 +246,59 @@ func GetIPv6PodNetworkingValidationInput(podList v1.PodList) input.PodNetworking
}
return ip
}

func mtuValidationTest(usePodMTU bool, mtuVal int) {
deployment := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry).
Replicas(2).
PodLabel(podLabelKey, podLabelVal).
NodeName(primaryNode.Name).
Build()

if usePodMTU {
By("Configuring Veth Prefix and Pod MTU value on aws-node daemonset")
k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{
AWS_VPC_ENI_MTU: strconv.Itoa(NEW_MTU_VAL),
POD_MTU: strconv.Itoa(NEW_POD_MTU),
AWS_VPC_K8S_CNI_VETHPREFIX: NEW_VETH_PREFIX,
})
} else {
By("Configuring Veth Prefix and ENI MTU value on aws-node daemonset")
k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{
AWS_VPC_ENI_MTU: strconv.Itoa(NEW_MTU_VAL),
AWS_VPC_K8S_CNI_VETHPREFIX: NEW_VETH_PREFIX,
})
}
// After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the new VETH prefix and MTU.
time.Sleep(utils.PollIntervalMedium)

By("creating a deployment to launch pods")
deployment, err = f.K8sResourceManagers.DeploymentManager().
CreateAndWaitTillDeploymentIsReady(deployment, utils.DefaultDeploymentReadyTimeout)
Expect(err).ToNot(HaveOccurred())

By("getting the list of pods using IP from primary and secondary ENI")
interfaceTypeToPodList :=
GetIPv6Pods(podLabelKey, podLabelVal)

By("generating the pod networking validation input to be passed to tester")
podNetworkingValidationInput := GetIPv6PodNetworkingValidationInput(interfaceTypeToPodList)
podNetworkingValidationInput.VethPrefix = NEW_VETH_PREFIX
podNetworkingValidationInput.ValidateMTU = true
podNetworkingValidationInput.MTU = mtuVal
input, err := podNetworkingValidationInput.Serialize()
Expect(err).NotTo(HaveOccurred())

By("validating host networking setup is setup correctly with MTU check as well")
ValidateHostNetworking(NetworkingSetupSucceeds, input)

By("deleting the deployment to test teardown")
err = f.K8sResourceManagers.DeploymentManager().
DeleteAndWaitTillDeploymentIsDeleted(deployment)
Expect(err).ToNot(HaveOccurred())

By("waiting to allow CNI to tear down networking for terminated pods")
time.Sleep(time.Second * 60)

By("validating host networking is teared down correctly")
ValidateHostNetworking(NetworkingTearDownSucceeds, input)
}