From 61fd805a6aeab334a633d603d630f62d3201353d Mon Sep 17 00:00:00 2001 From: Zhecheng Date: Fri, 25 Sep 2020 15:46:53 +0800 Subject: [PATCH] [IPv6] adjust MTU for IPv6 overhead If Antrea MTU is too large in IPv6 environment, large packet with overhead exceeds node MTU cannot be transmitted successfully across nodes. IPv6ExtraOverhead, 20 is from observation of IPv4 and IPv6 packets under same situation. --- pkg/agent/agent.go | 44 +++++++++++++++++---------------- pkg/agent/config/node_config.go | 3 ++- pkg/agent/openflow/client.go | 2 ++ 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index f1e19699f4b..30d4d089832 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -577,18 +577,11 @@ func (i *Initializer) initNodeLocalConfig() error { return fmt.Errorf("failed to get local IPNet: %v", err) } - mtu, err := i.getNodeMTU(localIntf) - if err != nil { - return err - } - klog.Infof("Setting Node MTU=%d", mtu) - i.nodeConfig = &config.NodeConfig{ Name: nodeName, OVSBridge: i.ovsBridge, DefaultTunName: defaultTunInterfaceName, NodeIPAddr: localAddr, - NodeMTU: mtu, UplinkNetConfig: new(config.AdapterNetConfig)} if i.networkConfig.TrafficEncapMode.IsNetworkPolicyOnly() { @@ -619,25 +612,31 @@ func (i *Initializer) initNodeLocalConfig() error { klog.V(2).Infof("Configure IPv6 Subnet CIDR %s on this Node", localSubnet.String()) } } - return nil + } else { + // Spec.PodCIDR can be empty due to misconfiguration. + if node.Spec.PodCIDR == "" { + klog.Errorf("Spec.PodCIDR is empty for Node %s. Please make sure --allocate-node-cidrs is enabled "+ + "for kube-controller-manager and --cluster-cidr specifies a sufficient CIDR range", nodeName) + return fmt.Errorf("CIDR string is empty for node %s", nodeName) + } + _, localSubnet, err := net.ParseCIDR(node.Spec.PodCIDR) + if err != nil { + klog.Errorf("Failed to parse subnet from CIDR string %s: %v", node.Spec.PodCIDR, err) + return err + } + if localSubnet.IP.To4() != nil { + i.nodeConfig.PodIPv4CIDR = localSubnet + } else { + i.nodeConfig.PodIPv6CIDR = localSubnet + } } - // Spec.PodCIDR can be empty due to misconfiguration. - if node.Spec.PodCIDR == "" { - klog.Errorf("Spec.PodCIDR is empty for Node %s. Please make sure --allocate-node-cidrs is enabled "+ - "for kube-controller-manager and --cluster-cidr specifies a sufficient CIDR range", nodeName) - return fmt.Errorf("CIDR string is empty for node %s", nodeName) - } - _, localSubnet, err := net.ParseCIDR(node.Spec.PodCIDR) + mtu, err := i.getNodeMTU(localIntf) if err != nil { - klog.Errorf("Failed to parse subnet from CIDR string %s: %v", node.Spec.PodCIDR, err) return err } - if localSubnet.IP.To4() != nil { - i.nodeConfig.PodIPv4CIDR = localSubnet - } else { - i.nodeConfig.PodIPv6CIDR = localSubnet - } + i.nodeConfig.NodeMTU = mtu + klog.Infof("Setting Node MTU=%d", mtu) return nil } @@ -730,6 +729,9 @@ func (i *Initializer) getNodeMTU(localIntf *net.Interface) (int, error) { if i.networkConfig.EnableIPSecTunnel { mtu -= config.IpsecESPOverhead } + if i.nodeConfig.PodIPv6CIDR != nil { + mtu -= config.IPv6ExtraOverhead + } return mtu, nil } diff --git a/pkg/agent/config/node_config.go b/pkg/agent/config/node_config.go index 73eb1a199b4..32a00644d56 100644 --- a/pkg/agent/config/node_config.go +++ b/pkg/agent/config/node_config.go @@ -39,7 +39,8 @@ const ( GREOverhead = 38 // IPsec ESP can add a maximum of 38 bytes to the packet including the ESP // header and trailer. - IpsecESPOverhead = 38 + IpsecESPOverhead = 38 + IPv6ExtraOverhead = 20 ) type GatewayConfig struct { diff --git a/pkg/agent/openflow/client.go b/pkg/agent/openflow/client.go index 3b8daba9be3..3efe8e9c23c 100644 --- a/pkg/agent/openflow/client.go +++ b/pkg/agent/openflow/client.go @@ -301,6 +301,8 @@ func (c *client) InstallNodeFlows(hostname string, flows = append(flows, c.arpResponderFlow(peerGatewayIP, cookie.Node)) } if c.encapMode.NeedsEncapToPeer(tunnelPeerIP, c.nodeConfig.NodeIPAddr) { + // tunnelPeerIP is the Node Internal Address. In a dual-stack setup, whether this address is an IPv4 address or an + // IPv6 one is decided by the address family of Node Internal Address. flows = append(flows, c.l3FwdFlowToRemote(localGatewayMAC, *peerPodCIDR, tunnelPeerIP, tunOFPort, cookie.Node)) } else { flows = append(flows, c.l3FwdFlowToRemoteViaGW(localGatewayMAC, *peerPodCIDR, cookie.Node))