diff --git a/pkg/agent/controller/networkpolicy/networkpolicy_controller.go b/pkg/agent/controller/networkpolicy/networkpolicy_controller.go index 3396f10738e..bf1836864fc 100644 --- a/pkg/agent/controller/networkpolicy/networkpolicy_controller.go +++ b/pkg/agent/controller/networkpolicy/networkpolicy_controller.go @@ -143,7 +143,7 @@ func NewNetworkPolicyController(antreaClientGetter agent.AntreaClientProvider, c.ofClient.RegisterPacketInHandler(uint8(openflow.PacketInReasonNP), "dnsresponse", c.fqdnController) } } - c.reconciler = newReconciler(ofClient, ifaceStore, idAllocator, c.fqdnController, groupCounters, v4Enabled, v6Enabled) + c.reconciler = newReconciler(ofClient, ifaceStore, idAllocator, c.fqdnController, groupCounters, v4Enabled, v6Enabled, antreaPolicyEnabled) c.ruleCache = newRuleCache(c.enqueueRule, podUpdateSubscriber, groupIDUpdates) if statusManagerEnabled { c.statusManager = newStatusController(antreaClientGetter, nodeName, c.ruleCache) diff --git a/pkg/agent/controller/networkpolicy/networkpolicy_controller_test.go b/pkg/agent/controller/networkpolicy/networkpolicy_controller_test.go index b6b9e203430..b7f54207c57 100644 --- a/pkg/agent/controller/networkpolicy/networkpolicy_controller_test.go +++ b/pkg/agent/controller/networkpolicy/networkpolicy_controller_test.go @@ -200,7 +200,20 @@ func newNetworkPolicyWithMultipleRules(name string, uid types.UID, from, to, app } } +func prepareMockTables() { + openflow.InitMockTables( + map[*openflow.Table]uint8{ + openflow.AntreaPolicyEgressRuleTable: uint8(5), + openflow.EgressRuleTable: uint8(6), + openflow.EgressDefaultTable: uint8(7), + openflow.AntreaPolicyIngressRuleTable: uint8(12), + openflow.IngressRuleTable: uint8(13), + openflow.IngressDefaultTable: uint8(14), + }) +} + func TestAddSingleGroupRule(t *testing.T) { + prepareMockTables() controller, clientset, reconciler := newTestController() addressGroupWatcher := watch.NewFake() appliedToGroupWatcher := watch.NewFake() @@ -280,6 +293,7 @@ func TestAddSingleGroupRule(t *testing.T) { } func TestAddMultipleGroupsRule(t *testing.T) { + prepareMockTables() controller, clientset, reconciler := newTestController() addressGroupWatcher := watch.NewFake() appliedToGroupWatcher := watch.NewFake() @@ -359,6 +373,7 @@ func TestAddMultipleGroupsRule(t *testing.T) { } func TestDeleteRule(t *testing.T) { + prepareMockTables() controller, clientset, reconciler := newTestController() addressGroupWatcher := watch.NewFake() appliedToGroupWatcher := watch.NewFake() @@ -406,6 +421,7 @@ func TestDeleteRule(t *testing.T) { } func TestAddNetworkPolicyWithMultipleRules(t *testing.T) { + prepareMockTables() controller, clientset, reconciler := newTestController() addressGroupWatcher := watch.NewFake() appliedToGroupWatcher := watch.NewFake() @@ -488,6 +504,7 @@ func TestAddNetworkPolicyWithMultipleRules(t *testing.T) { } func TestNetworkPolicyMetrics(t *testing.T) { + prepareMockTables() // Initialize NetworkPolicy metrics (prometheus) metrics.InitializeNetworkPolicyMetrics() controller, clientset, reconciler := newTestController() diff --git a/pkg/agent/controller/networkpolicy/reconciler.go b/pkg/agent/controller/networkpolicy/reconciler.go index 3d696392870..16cbf78484b 100644 --- a/pkg/agent/controller/networkpolicy/reconciler.go +++ b/pkg/agent/controller/networkpolicy/reconciler.go @@ -219,16 +219,19 @@ func newReconciler(ofClient openflow.Client, groupCounters []proxytypes.GroupCounter, v4Enabled bool, v6Enabled bool, + antreaPolicyEnabled bool, ) *reconciler { priorityAssigners := map[uint8]*tablePriorityAssigner{} - for _, table := range openflow.GetAntreaPolicyBaselineTierTables() { - priorityAssigners[table.GetID()] = &tablePriorityAssigner{ - assigner: newPriorityAssigner(true), + if antreaPolicyEnabled { + for _, table := range openflow.GetAntreaPolicyBaselineTierTables() { + priorityAssigners[table.GetID()] = &tablePriorityAssigner{ + assigner: newPriorityAssigner(true), + } } - } - for _, table := range openflow.GetAntreaPolicyMultiTierTables() { - priorityAssigners[table.GetID()] = &tablePriorityAssigner{ - assigner: newPriorityAssigner(false), + for _, table := range openflow.GetAntreaPolicyMultiTierTables() { + priorityAssigners[table.GetID()] = &tablePriorityAssigner{ + assigner: newPriorityAssigner(false), + } } } reconciler := &reconciler{ @@ -297,7 +300,7 @@ func (r *reconciler) getOFRuleTable(rule *CompletedRule) uint8 { } return openflow.EgressRuleTable.GetID() } - var ruleTables []binding.Table + var ruleTables []*openflow.Table if rule.Direction == v1beta2.DirectionIn { ruleTables = openflow.GetAntreaPolicyIngressTables() } else { diff --git a/pkg/agent/controller/networkpolicy/reconciler_test.go b/pkg/agent/controller/networkpolicy/reconciler_test.go index f89e22c289e..dc9a4448826 100644 --- a/pkg/agent/controller/networkpolicy/reconciler_test.go +++ b/pkg/agent/controller/networkpolicy/reconciler_test.go @@ -103,11 +103,12 @@ func newTestReconciler(t *testing.T, controller *gomock.Controller, ifaceStore i ch := make(chan string, 100) groupIDAllocator := openflow.NewGroupAllocator(v6Enabled) groupCounters := []proxytypes.GroupCounter{proxytypes.NewGroupCounter(groupIDAllocator, ch)} - r := newReconciler(ofClient, ifaceStore, newIDAllocator(testAsyncDeleteInterval), f, groupCounters, v4Enabled, v6Enabled) + r := newReconciler(ofClient, ifaceStore, newIDAllocator(testAsyncDeleteInterval), f, groupCounters, v4Enabled, v6Enabled, true) return r } func TestReconcilerForget(t *testing.T) { + prepareMockTables() tests := []struct { name string lastRealizeds map[string]*lastRealized diff --git a/pkg/agent/controller/traceflow/packetin.go b/pkg/agent/controller/traceflow/packetin.go index 660ce9eb7d5..f903499468e 100644 --- a/pkg/agent/controller/traceflow/packetin.go +++ b/pkg/agent/controller/traceflow/packetin.go @@ -160,16 +160,17 @@ func (c *Controller) parsePacketIn(pktIn *ofctrl.PacketIn) (*crdv1alpha1.Tracefl obs = append(obs, *ob) } - // Collect Service DNAT and SNAT. + // Collect Service connections. + // - For packet is DNATed only, the final state is that ipDst != ctNwDst (in DNAT CT zone). + // - For packet is both DNATed and SNATed, the first state is also ipDst != ctNwDst (in DNAT CT zone), but the final + // state is that ipSrc != ctNwSrc (in SNAT CT zone). The state in DNAT CT zone cannot be recognized in SNAT CT zone. if !tfState.receiverOnly { - if isValidCtNw(ctNwDst) && ipDst != ctNwDst { + if isValidCtNw(ctNwDst) && ipDst != ctNwDst || isValidCtNw(ctNwSrc) && ipSrc != ctNwSrc { ob := &crdv1alpha1.Observation{ Component: crdv1alpha1.ComponentLB, Action: crdv1alpha1.ActionForwarded, TranslatedDstIP: ipDst, } - // Service SNAT can only happen alongside DNAT - // and only for hairpinned packets at the moment. if isValidCtNw(ctNwSrc) && ipSrc != ctNwSrc { ob.TranslatedSrcIP = ipSrc } diff --git a/pkg/agent/controller/traceflow/packetin_test.go b/pkg/agent/controller/traceflow/packetin_test.go index 2cc437bb340..8f01e33524d 100644 --- a/pkg/agent/controller/traceflow/packetin_test.go +++ b/pkg/agent/controller/traceflow/packetin_test.go @@ -28,7 +28,24 @@ import ( crdv1alpha1 "antrea.io/antrea/pkg/apis/crd/v1alpha1" ) +func prepareMockTables() { + openflow.InitMockTables( + map[*openflow.Table]uint8{ + openflow.AntreaPolicyEgressRuleTable: uint8(5), + openflow.EgressRuleTable: uint8(6), + openflow.EgressDefaultTable: uint8(7), + openflow.EgressMetricTable: uint8(8), + openflow.AntreaPolicyIngressRuleTable: uint8(12), + openflow.IngressRuleTable: uint8(13), + openflow.IngressDefaultTable: uint8(14), + openflow.IngressMetricTable: uint8(15), + openflow.L2ForwardingOutTable: uint8(17), + }) +} + func Test_getNetworkPolicyObservation(t *testing.T) { + prepareMockTables() + type args struct { tableID uint8 ingress bool diff --git a/pkg/agent/flowexporter/connections/conntrack_connections_test.go b/pkg/agent/flowexporter/connections/conntrack_connections_test.go index 8943c25a1ff..f152fd7aec6 100644 --- a/pkg/agent/flowexporter/connections/conntrack_connections_test.go +++ b/pkg/agent/flowexporter/connections/conntrack_connections_test.go @@ -36,7 +36,6 @@ import ( interfacestoretest "antrea.io/antrea/pkg/agent/interfacestore/testing" "antrea.io/antrea/pkg/agent/metrics" "antrea.io/antrea/pkg/agent/openflow" - ofclient "antrea.io/antrea/pkg/agent/openflow" proxytest "antrea.io/antrea/pkg/agent/proxy/testing" agenttypes "antrea.io/antrea/pkg/agent/types" cpv1beta "antrea.io/antrea/pkg/apis/controlplane/v1beta2" @@ -83,7 +82,7 @@ var ( Priority: nil, Name: "", FlowID: uint32(0), - TableID: ofclient.IngressRuleTable.GetID(), + TableID: uint8(10), PolicyRef: &np1, EnableLogging: false, } diff --git a/pkg/agent/openflow/client.go b/pkg/agent/openflow/client.go index 33b11e48b12..935383bb95d 100644 --- a/pkg/agent/openflow/client.go +++ b/pkg/agent/openflow/client.go @@ -436,40 +436,40 @@ func (c *client) InstallNodeFlows(hostname string, // only work for IPv4 addresses. // arpResponderFlow() adds a flow to resolve peer gateway IPs to GlobalVirtualMAC. // This flow replies to ARP requests sent from the local gateway asking for the MAC address of a remote peer gateway. It ensures that the local Node can reach any remote Pod. - flows = append(flows, c.arpResponderFlow(peerGatewayIP, cookie.Node)) + flows = append(flows, c.featurePodConnectivity.arpResponderFlow(peerGatewayIP, GlobalVirtualMAC)) } // tunnelPeerIP is the Node Internal Address. In a dual-stack setup, one Node has 2 Node Internal // Addresses (IPv4 and IPv6) . if (!isIPv6 && c.networkConfig.NeedsTunnelToPeer(tunnelPeerIPs.IPv4, c.nodeConfig.NodeTransportIPv4Addr)) || (isIPv6 && c.networkConfig.NeedsTunnelToPeer(tunnelPeerIPs.IPv6, c.nodeConfig.NodeTransportIPv6Addr)) { - flows = append(flows, c.l3FwdFlowToRemote(localGatewayMAC, *peerPodCIDR, tunnelPeerIP, cookie.Node)) + flows = append(flows, c.featurePodConnectivity.l3FwdFlowToRemoteViaTun(localGatewayMAC, *peerPodCIDR, tunnelPeerIP)) } else { - flows = append(flows, c.l3FwdFlowToRemoteViaRouting(localGatewayMAC, remoteGatewayMAC, cookie.Node, tunnelPeerIP, peerPodCIDR)...) + flows = append(flows, c.featurePodConnectivity.l3FwdFlowToRemoteViaRouting(localGatewayMAC, remoteGatewayMAC, tunnelPeerIP, peerPodCIDR)...) } if c.enableEgress { - flows = append(flows, c.snatSkipNodeFlow(tunnelPeerIP, cookie.Node)) + flows = append(flows, c.featureEgress.snatSkipNodeFlow(tunnelPeerIP)) } if c.connectUplinkToBridge { // flow to catch traffic from AntreaFlexibleIPAM Pod to remote Per-Node IPAM Pod - flows = append(flows, c.l3FwdFlowToRemoteViaGW(remoteGatewayMAC, *peerPodCIDR, cookie.Node, true)) + flows = append(flows, c.featurePodConnectivity.l3FwdFlowToRemoteViaUplink(remoteGatewayMAC, *peerPodCIDR, true)) } } if ipsecTunOFPort != 0 { // When IPsec tunnel is enabled, packets received from the remote Node are // input from the Node's IPsec tunnel port, not the default tunnel port. So, // add a separate tunnelClassifierFlow for the IPsec tunnel port. - flows = append(flows, c.tunnelClassifierFlow(ipsecTunOFPort, cookie.Node)) + flows = append(flows, c.featurePodConnectivity.tunnelClassifierFlow(ipsecTunOFPort)) } - // For Windows Noencap Mode, the OVS flows for Node need be be exactly same as the provided 'flows' slice because + // For Windows Noencap Mode, the OVS flows for Node need to be exactly same as the provided 'flows' slice because // the Node flows may be processed more than once if the MAC annotation is updated. - return c.modifyFlows(c.nodeFlowCache, hostname, flows) + return c.modifyFlows(c.featurePodConnectivity.nodeCachedFlows, hostname, flows) } func (c *client) UninstallNodeFlows(hostname string) error { c.replayMutex.RLock() defer c.replayMutex.RUnlock() - return c.deleteFlows(c.nodeFlowCache, hostname) + return c.deleteFlows(c.featurePodConnectivity.nodeCachedFlows, hostname) } func (c *client) InstallPodFlows(interfaceName string, podInterfaceIPs []net.IP, podInterfaceMAC net.HardwareAddr, ofPort uint32) error { @@ -482,38 +482,38 @@ func (c *client) InstallPodFlows(interfaceName string, podInterfaceIPs []net.IP, localGatewayMAC := c.nodeConfig.GatewayConfig.MAC flows := []binding.Flow{ - c.podClassifierFlow(ofPort, cookie.Pod, isAntreaFlexibleIPAM), - c.l2ForwardCalcFlow(podInterfaceMAC, ofPort, false, cookie.Pod), + c.featurePodConnectivity.podClassifierFlow(ofPort, isAntreaFlexibleIPAM), + c.featurePodConnectivity.l2ForwardCalcFlow(podInterfaceMAC, ofPort), } // Add support for IPv4 ARP responder. if podInterfaceIPv4 != nil { - flows = append(flows, c.arpSpoofGuardFlow(podInterfaceIPv4, podInterfaceMAC, ofPort, cookie.Pod)) + flows = append(flows, c.featurePodConnectivity.arpSpoofGuardFlow(podInterfaceIPv4, podInterfaceMAC, ofPort)) } // Add IP SpoofGuard flows for all validate IPs. - flows = append(flows, c.podIPSpoofGuardFlow(podInterfaceIPs, podInterfaceMAC, ofPort, cookie.Pod)...) + flows = append(flows, c.featurePodConnectivity.podIPSpoofGuardFlow(podInterfaceIPs, podInterfaceMAC, ofPort)...) // Add L3 Routing flows to rewrite Pod's dst MAC for all validate IPs. - flows = append(flows, c.l3FwdFlowToPod(localGatewayMAC, podInterfaceIPs, podInterfaceMAC, cookie.Pod)...) + flows = append(flows, c.featurePodConnectivity.l3FwdFlowToPod(localGatewayMAC, podInterfaceIPs, podInterfaceMAC, isAntreaFlexibleIPAM)...) if c.networkConfig.TrafficEncapMode.IsNetworkPolicyOnly() { // In policy-only mode, traffic to local Pod is routed based on destination IP. flows = append(flows, - c.l3FwdFlowRouteToPod(podInterfaceIPs, podInterfaceMAC, cookie.Pod)..., + c.featurePodConnectivity.l3FwdFlowRouteToPod(podInterfaceIPs, podInterfaceMAC)..., ) } if isAntreaFlexibleIPAM { // Add Pod uplink classifier flows for AntreaFlexibleIPAM Pods. - flows = append(flows, c.podUplinkClassifierFlows(podInterfaceMAC, cookie.Pod)...) + flows = append(flows, c.featurePodConnectivity.podUplinkClassifierFlows(podInterfaceMAC)...) } - return c.addFlows(c.podFlowCache, interfaceName, flows) + return c.addFlows(c.featurePodConnectivity.podCachedFlows, interfaceName, flows) } func (c *client) UninstallPodFlows(interfaceName string) error { c.replayMutex.RLock() defer c.replayMutex.RUnlock() - return c.deleteFlows(c.podFlowCache, interfaceName) + return c.deleteFlows(c.featurePodConnectivity.podCachedFlows, interfaceName) } func (c *client) getFlowKeysFromCache(cache *flowCategoryCache, cacheKey string) []string { @@ -536,18 +536,18 @@ func (c *client) getFlowKeysFromCache(cache *flowCategoryCache, cacheKey string) } func (c *client) GetPodFlowKeys(interfaceName string) []string { - return c.getFlowKeysFromCache(c.podFlowCache, interfaceName) + return c.getFlowKeysFromCache(c.featurePodConnectivity.podCachedFlows, interfaceName) } func (c *client) InstallServiceGroup(groupID binding.GroupIDType, withSessionAffinity bool, endpoints []proxy.Endpoint) error { c.replayMutex.RLock() defer c.replayMutex.RUnlock() - group := c.serviceEndpointGroup(groupID, withSessionAffinity, endpoints...) + group := c.featureService.serviceEndpointGroup(groupID, withSessionAffinity, endpoints...) if err := group.Add(); err != nil { return fmt.Errorf("error when installing Service Endpoints Group: %w", err) } - c.groupCache.Store(groupID, group) + c.featureService.groupCache.Store(groupID, group) return nil } @@ -557,7 +557,7 @@ func (c *client) UninstallServiceGroup(groupID binding.GroupIDType) error { if !c.bridge.DeleteGroup(groupID) { return fmt.Errorf("group %d delete failed", groupID) } - c.groupCache.Delete(groupID) + c.featureService.groupCache.Delete(groupID) return nil } @@ -581,14 +581,14 @@ func (c *client) InstallEndpointFlows(protocol binding.Protocol, endpoints []pro endpointIP := net.ParseIP(endpoint.IP()) portVal := portToUint16(endpointPort) cacheKey := generateEndpointFlowCacheKey(endpoint.IP(), endpointPort, protocol) - flows = append(flows, c.endpointDNATFlow(endpointIP, portVal, protocol)) + flows = append(flows, c.featureService.endpointDNATFlow(endpointIP, portVal, protocol)) if endpoint.GetIsLocal() { - flows = append(flows, c.hairpinSNATFlow(endpointIP)) + flows = append(flows, c.featureService.podHairpinSNATFlow(endpointIP)) } keyToFlows[cacheKey] = flows } - return c.addFlowsWithMultipleKeys(c.serviceFlowCache, keyToFlows) + return c.addFlowsWithMultipleKeys(c.featureService.cachedFlows, keyToFlows) } func (c *client) UninstallEndpointFlows(protocol binding.Protocol, endpoint proxy.Endpoint) error { @@ -600,125 +600,110 @@ func (c *client) UninstallEndpointFlows(protocol binding.Protocol, endpoint prox return fmt.Errorf("error when getting port: %w", err) } cacheKey := generateEndpointFlowCacheKey(endpoint.IP(), port, protocol) - return c.deleteFlows(c.serviceFlowCache, cacheKey) + return c.deleteFlows(c.featureService.cachedFlows, cacheKey) } func (c *client) InstallServiceFlows(groupID binding.GroupIDType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, affinityTimeout uint16, nodeLocalExternal bool, svcType v1.ServiceType) error { c.replayMutex.RLock() defer c.replayMutex.RUnlock() var flows []binding.Flow - flows = append(flows, c.serviceLBFlow(groupID, svcIP, svcPort, protocol, affinityTimeout != 0, nodeLocalExternal, svcType)) + flows = append(flows, c.featureService.serviceLBFlow(groupID, svcIP, svcPort, protocol, affinityTimeout != 0, nodeLocalExternal, svcType)) if affinityTimeout != 0 { - flows = append(flows, c.serviceLearnFlow(groupID, svcIP, svcPort, protocol, affinityTimeout, nodeLocalExternal, svcType)) + flows = append(flows, c.featureService.serviceLearnFlow(groupID, svcIP, svcPort, protocol, affinityTimeout, nodeLocalExternal, svcType)) } cacheKey := generateServicePortFlowCacheKey(svcIP, svcPort, protocol) - return c.addFlows(c.serviceFlowCache, cacheKey, flows) + return c.addFlows(c.featureService.cachedFlows, cacheKey, flows) } func (c *client) UninstallServiceFlows(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error { c.replayMutex.RLock() defer c.replayMutex.RUnlock() cacheKey := generateServicePortFlowCacheKey(svcIP, svcPort, protocol) - return c.deleteFlows(c.serviceFlowCache, cacheKey) + return c.deleteFlows(c.featureService.cachedFlows, cacheKey) } func (c *client) GetServiceFlowKeys(svcIP net.IP, svcPort uint16, protocol binding.Protocol, endpoints []proxy.Endpoint) []string { cacheKey := generateServicePortFlowCacheKey(svcIP, svcPort, protocol) - flowKeys := c.getFlowKeysFromCache(c.serviceFlowCache, cacheKey) + flowKeys := c.getFlowKeysFromCache(c.featureService.cachedFlows, cacheKey) for _, ep := range endpoints { epPort, _ := ep.Port() cacheKey = generateEndpointFlowCacheKey(ep.IP(), epPort, protocol) - flowKeys = append(flowKeys, c.getFlowKeysFromCache(c.serviceFlowCache, cacheKey)...) + flowKeys = append(flowKeys, c.getFlowKeysFromCache(c.featureService.cachedFlows, cacheKey)...) } return flowKeys } func (c *client) InstallDefaultServiceFlows(nodePortAddressesIPv4, nodePortAddressesIPv6 []net.IP) error { flows := []binding.Flow{ - c.serviceNeedLBFlow(), - c.sessionAffinityReselectFlow(), - c.l2ForwardOutputServiceHairpinFlow(), + c.featureService.serviceNeedLBFlow(), + c.featureService.sessionAffinityReselectFlow(), + c.featureService.l2ForwardOutputHairpinServiceFlow(), } - if c.networkConfig.IPv4Enabled { - flows = append(flows, c.serviceHairpinResponseDNATFlow(binding.ProtocolIP)) - flows = append(flows, c.serviceLBBypassFlows(binding.ProtocolIP)...) - flows = append(flows, c.l3FwdServiceDefaultFlowsViaGW(binding.ProtocolIP, cookie.Service)...) - if c.proxyAll { - // The output interface of a packet is the same as where it is from, and the action of the packet should be - // IN_PORT, rather than output. When a packet of Service is from Antrea gateway and its Endpoint is on host - // network, it needs hairpin mark (by setting a register, it will be matched at table L2ForwardingOutTable). - flows = append(flows, c.serviceHairpinRegSetFlows(binding.ProtocolIP)) + + if c.proxyAll { + for _, ipProtocol := range c.ipProtocols { // These flows are used to match the first packet of NodePort. The flows will set a bit of a register to mark // the Service type of the packet as NodePort. The mark will be consumed in table serviceLBTable to match NodePort - flows = append(flows, c.serviceClassifierFlows(nodePortAddressesIPv4, binding.ProtocolIP)...) - } - } - if c.networkConfig.IPv6Enabled { - flows = append(flows, c.serviceHairpinResponseDNATFlow(binding.ProtocolIPv6)) - flows = append(flows, c.serviceLBBypassFlows(binding.ProtocolIPv6)...) - flows = append(flows, c.l3FwdServiceDefaultFlowsViaGW(binding.ProtocolIPv6, cookie.Service)...) - if c.proxyAll { - // As IPv4 above. - flows = append(flows, c.serviceHairpinRegSetFlows(binding.ProtocolIPv6)) - // As IPv4 above. - flows = append(flows, c.serviceClassifierFlows(nodePortAddressesIPv6, binding.ProtocolIPv6)...) + nodePortAddresses := nodePortAddressesIPv4 + if ipProtocol == binding.ProtocolIPv6 { + nodePortAddresses = nodePortAddressesIPv6 + } + flows = append(flows, c.featureService.nodePortMarkFlows(nodePortAddresses, ipProtocol)...) } } if err := c.ofEntryOperations.AddAll(flows); err != nil { return err } - c.defaultServiceFlows = flows + c.featureService.fixedFlows = flows return nil } func (c *client) InstallClusterServiceCIDRFlows(serviceNets []*net.IPNet) error { - flows := c.serviceCIDRDNATFlows(serviceNets) + flows := c.featureService.serviceCIDRDNATFlows(serviceNets) if err := c.ofEntryOperations.AddAll(flows); err != nil { return err } - c.defaultServiceFlows = flows + c.featureService.fixedFlows = flows return nil } func (c *client) InstallGatewayFlows() error { gatewayConfig := c.nodeConfig.GatewayConfig - gatewayIPs := []net.IP{} flows := []binding.Flow{ - c.gatewayClassifierFlow(cookie.Default), - c.l2ForwardCalcFlow(gatewayConfig.MAC, config.HostGatewayOFPort, true, cookie.Default), + c.featurePodConnectivity.gatewayClassifierFlow(), + c.featurePodConnectivity.l2ForwardCalcFlow(gatewayConfig.MAC, config.HostGatewayOFPort), } - flows = append(flows, c.gatewayIPSpoofGuardFlows(cookie.Default)...) + flows = append(flows, c.featurePodConnectivity.gatewayIPSpoofGuardFlows()...) // Add ARP SpoofGuard flow for local gateway interface. if gatewayConfig.IPv4 != nil { - gatewayIPs = append(gatewayIPs, gatewayConfig.IPv4) - flows = append(flows, c.gatewayARPSpoofGuardFlows(gatewayConfig.IPv4, gatewayConfig.MAC, cookie.Default)...) - } - if gatewayConfig.IPv6 != nil { - gatewayIPs = append(gatewayIPs, gatewayConfig.IPv6) + flows = append(flows, c.featurePodConnectivity.arpSpoofGuardFlow(gatewayConfig.IPv4, gatewayConfig.MAC, config.HostGatewayOFPort)) + if c.connectUplinkToBridge { + flows = append(flows, c.featurePodConnectivity.arpSpoofGuardFlow(c.nodeConfig.NodeIPv4Addr.IP, gatewayConfig.MAC, config.HostGatewayOFPort)) + } } // Add flow to ensure the liveness check packet could be forwarded correctly. - flows = append(flows, c.localProbeFlow(gatewayIPs, cookie.Default)...) - flows = append(flows, c.l3FwdFlowToGateway(gatewayIPs, gatewayConfig.MAC, cookie.Default)...) + flows = append(flows, c.featurePodConnectivity.localProbeFlow(c.ovsDatapathType)...) + flows = append(flows, c.featurePodConnectivity.l3FwdFlowToGateway()...) if err := c.ofEntryOperations.AddAll(flows); err != nil { return err } - c.gatewayFlows = flows + c.featurePodConnectivity.fixedFlows = append(c.featurePodConnectivity.fixedFlows, flows...) return nil } func (c *client) InstallDefaultTunnelFlows() error { flows := []binding.Flow{ - c.tunnelClassifierFlow(config.DefaultTunOFPort, cookie.Default), - c.l2ForwardCalcFlow(GlobalVirtualMAC, config.DefaultTunOFPort, true, cookie.Default), + c.featurePodConnectivity.tunnelClassifierFlow(config.DefaultTunOFPort), + c.featurePodConnectivity.l2ForwardCalcFlow(GlobalVirtualMAC, config.DefaultTunOFPort), } if err := c.ofEntryOperations.AddAll(flows); err != nil { return err } - c.defaultTunnelFlows = flows + c.featurePodConnectivity.fixedFlows = append(c.featurePodConnectivity.fixedFlows, flows...) return nil } @@ -726,38 +711,13 @@ func (c *client) initialize() error { if err := c.ofEntryOperations.AddAll(c.defaultFlows()); err != nil { return fmt.Errorf("failed to install default flows: %v", err) } - if err := c.ofEntryOperations.AddAll(c.arpResponderLocalFlows(cookie.Default)); err != nil { - return fmt.Errorf("failed to install arp responder local flows: %v", err) - } - if err := c.ofEntryOperations.Add(c.arpNormalFlow(cookie.Default)); err != nil { - return fmt.Errorf("failed to install arp normal flow: %v", err) - } - if err := c.ofEntryOperations.AddAll(c.ipv6Flows(cookie.Default)); err != nil { - return fmt.Errorf("failed to install ipv6 flows: %v", err) - } - if err := c.ofEntryOperations.AddAll(c.decTTLFlows(cookie.Default)); err != nil { - return fmt.Errorf("failed to install dec TTL flow on source Node: %v", err) - } - if err := c.ofEntryOperations.AddAll(c.l2ForwardOutputFlows(cookie.Default)); err != nil { - return fmt.Errorf("failed to install L2 forward output flows: %v", err) - } - if err := c.ofEntryOperations.AddAll(c.connectionTrackFlows(cookie.Default)); err != nil { - return fmt.Errorf("failed to install connection track flows: %v", err) - } - if err := c.ofEntryOperations.AddAll(c.establishedConnectionFlows(cookie.Default)); err != nil { - return fmt.Errorf("failed to install flows to skip established connections: %v", err) - } - if err := c.ofEntryOperations.AddAll(c.relatedConnectionFlows(cookie.Default)); err != nil { - return fmt.Errorf("failed to install flows to skip related connections: %v", err) - } - if err := c.ofEntryOperations.AddAll(c.rejectBypassNetworkpolicyFlows(cookie.Default)); err != nil { - return fmt.Errorf("failed to install flows to skip generated reject responses: %v", err) - } - if c.networkConfig.TrafficEncapMode.IsNetworkPolicyOnly() { - if err := c.setupPolicyOnlyFlows(); err != nil { - return fmt.Errorf("failed to setup policy only flows: %w", err) + + for _, activeFeature := range c.activatedFeatures { + if err := c.ofEntryOperations.AddAll(activeFeature.initFlows()); err != nil { + return fmt.Errorf("failed to install feature %v initial flows: %v", activeFeature.getFeatureName(), err) } } + if c.ovsMetersAreSupported { if err := c.genPacketInMeter(PacketInMeterIDNP, PacketInMeterRateNP).Add(); err != nil { return fmt.Errorf("failed to install OpenFlow meter entry (meterID:%d, rate:%d) for NetworkPolicy packet-in rate limiting: %v", PacketInMeterIDNP, PacketInMeterRateNP, err) @@ -779,6 +739,10 @@ func (c *client) Initialize(roundInfo types.RoundInfo, nodeConfig *config.NodeCo if networkConfig.IPv6Enabled { c.ipProtocols = append(c.ipProtocols, binding.ProtocolIPv6) } + c.roundInfo = roundInfo + c.cookieAllocator = cookie.NewAllocator(roundInfo.RoundNum) + c.generatePipelines() + c.realizePipelines() // Initiate connections to target OFswitch, and create tables on the switch. connCh := make(chan struct{}) @@ -789,9 +753,6 @@ func (c *client) Initialize(roundInfo types.RoundInfo, nodeConfig *config.NodeCo // Ignore first notification, it is not a "reconnection". <-connCh - c.roundInfo = roundInfo - c.cookieAllocator = cookie.NewAllocator(roundInfo.RoundNum) - // In the normal case, there should be no existing flows with the current round number. This // is needed in case the agent was restarted before we had a chance to increment the round // number (incrementing the round number happens once we are satisfied that stale flows from @@ -814,60 +775,134 @@ func (c *client) Initialize(roundInfo types.RoundInfo, nodeConfig *config.NodeCo return connCh, c.initialize() } -func (c *client) InstallExternalFlows(exceptCIDRs []net.IPNet) error { - localGatewayMAC := c.nodeConfig.GatewayConfig.MAC +// generatePipelines generates table list for every pipeline from all activated features. Note that, tables are not realized +// in OVS bridge in this function. +func (c *client) generatePipelines() { + c.featurePodConnectivity = newFeaturePodConnectivity(c.cookieAllocator, + c.ipProtocols, + c.nodeConfig, + c.networkConfig, + c.connectUplinkToBridge, + c.enableMulticast) + c.activatedFeatures = append(c.activatedFeatures, c.featurePodConnectivity) + c.traceableFeatures = append(c.traceableFeatures, c.featurePodConnectivity) + + c.featureNetworkPolicy = newFeatureNetworkPolicy(c.cookieAllocator, + c.ipProtocols, + c.bridge, + c.ovsMetersAreSupported, + c.enableDenyTracking, + c.enableAntreaPolicy) + c.activatedFeatures = append(c.activatedFeatures, c.featureNetworkPolicy) + c.traceableFeatures = append(c.traceableFeatures, c.featureNetworkPolicy) + + c.featureService = newFeatureService(c.cookieAllocator, + c.ipProtocols, + c.nodeConfig, + c.bridge, + c.enableProxy, + c.proxyAll, + c.connectUplinkToBridge) + c.activatedFeatures = append(c.activatedFeatures, c.featureService) + c.traceableFeatures = append(c.traceableFeatures, c.featureService) + + if c.enableEgress { + c.featureEgress = newFeatureEgress(c.cookieAllocator, c.ipProtocols, c.nodeConfig) + c.activatedFeatures = append(c.activatedFeatures, c.featureEgress) + } - var flows []binding.Flow - var ipv4CIDRs []net.IPNet - var ipv6CIDRs []net.IPNet - for _, cidr := range exceptCIDRs { - if cidr.IP.To4() == nil { - ipv6CIDRs = append(ipv6CIDRs, cidr) - } else { - ipv4CIDRs = append(ipv4CIDRs, cidr) + if c.enableMulticast { + // TODO: add support for IPv6 protocol + c.featureMulticast = newFeatureMulticast(c.cookieAllocator, []binding.Protocol{binding.ProtocolIP}) + c.activatedFeatures = append(c.activatedFeatures, c.featureMulticast) + } + c.featureTraceflow = newFeatureTraceflow() + c.activatedFeatures = append(c.activatedFeatures, c.featureTraceflow) + + // Pipelines to generate. + pipelineIDs := []binding.PipelineID{pipelineRoot, pipelineIP} + if c.networkConfig.IPv4Enabled { + pipelineIDs = append(pipelineIDs, pipelineARP) + if c.enableMulticast { + pipelineIDs = append(pipelineIDs, pipelineMulticast) } } - if c.nodeConfig.NodeIPv4Addr != nil && c.nodeConfig.PodIPv4CIDR != nil { - flows = c.externalFlows(c.nodeConfig.NodeIPv4Addr.IP, *c.nodeConfig.PodIPv4CIDR, localGatewayMAC, ipv4CIDRs) + + // For every pipeline, get required tables from every active feature and store the required tables in a map to avoid + // duplication. + pipelineRequiredTablesMap := make(map[binding.PipelineID]map[*Table]struct{}) + for _, pipelineID := range pipelineIDs { + pipelineRequiredTablesMap[pipelineID] = make(map[*Table]struct{}) } - if c.nodeConfig.NodeIPv6Addr != nil && c.nodeConfig.PodIPv6CIDR != nil { - flows = append(flows, c.externalFlows(c.nodeConfig.NodeIPv6Addr.IP, *c.nodeConfig.PodIPv6CIDR, localGatewayMAC, ipv6CIDRs)...) + pipelineRequiredTablesMap[pipelineRoot][PipelineRootClassifierTable] = struct{}{} + + for _, f := range c.activatedFeatures { + for _, t := range f.getRequiredTables() { + if _, ok := pipelineRequiredTablesMap[t.pipeline]; ok { + pipelineRequiredTablesMap[t.pipeline][t] = struct{}{} + } + } } - if err := c.ofEntryOperations.AddAll(flows); err != nil { - return fmt.Errorf("failed to install flows for external communication: %v", err) + + for pipelineID := firstPipeline; pipelineID <= lastPipeline; pipelineID++ { + if _, ok := pipelineRequiredTablesMap[pipelineID]; !ok { + continue + } + var requiredTables []*Table + // Iterate the table order cache to generate a sorted table list with required tables. + for _, table := range tableOrderCache[pipelineID] { + if _, ok := pipelineRequiredTablesMap[pipelineID][table]; ok { + requiredTables = append(requiredTables, table) + } + } + if len(requiredTables) == 0 { + klog.InfoS("There is no required table for the pipeline ID, skip generating pipeline", "pipeline", pipelineID) + continue + } + // generate a pipeline from the required table list. + c.pipelines[pipelineID] = generatePipeline(pipelineID, requiredTables) + } +} + +func (c *client) InstallExternalFlows(exceptCIDRs []net.IPNet) error { + if c.enableEgress { + flows := c.featureEgress.externalFlows(exceptCIDRs) + if err := c.ofEntryOperations.AddAll(flows); err != nil { + return fmt.Errorf("failed to install flows for external communication: %v", err) + } + c.featureEgress.fixedFlows = append(c.featureEgress.fixedFlows, flows...) } - c.hostNetworkingFlows = append(c.hostNetworkingFlows, flows...) return nil } func (c *client) InstallSNATMarkFlows(snatIP net.IP, mark uint32) error { - flow := c.snatIPFromTunnelFlow(snatIP, mark) + flow := c.featureEgress.snatIPFromTunnelFlow(snatIP, mark) cacheKey := fmt.Sprintf("s%x", mark) c.replayMutex.RLock() defer c.replayMutex.RUnlock() - return c.addFlows(c.snatFlowCache, cacheKey, []binding.Flow{flow}) + return c.addFlows(c.featureEgress.cachedFlows, cacheKey, []binding.Flow{flow}) } func (c *client) UninstallSNATMarkFlows(mark uint32) error { cacheKey := fmt.Sprintf("s%x", mark) c.replayMutex.RLock() defer c.replayMutex.RUnlock() - return c.deleteFlows(c.snatFlowCache, cacheKey) + return c.deleteFlows(c.featureEgress.cachedFlows, cacheKey) } func (c *client) InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32) error { - flows := []binding.Flow{c.snatRuleFlow(ofPort, snatIP, snatMark, c.nodeConfig.GatewayConfig.MAC)} + flows := []binding.Flow{c.featureEgress.snatRuleFlow(ofPort, snatIP, snatMark, c.nodeConfig.GatewayConfig.MAC)} cacheKey := fmt.Sprintf("p%x", ofPort) c.replayMutex.RLock() defer c.replayMutex.RUnlock() - return c.addFlows(c.snatFlowCache, cacheKey, flows) + return c.addFlows(c.featureEgress.cachedFlows, cacheKey, flows) } func (c *client) UninstallPodSNATFlows(ofPort uint32) error { cacheKey := fmt.Sprintf("p%x", ofPort) c.replayMutex.RLock() defer c.replayMutex.RUnlock() - return c.deleteFlows(c.snatFlowCache, cacheKey) + return c.deleteFlows(c.featureEgress.cachedFlows, cacheKey) } func (c *client) ReplayFlows() { @@ -878,55 +913,12 @@ func (c *client) ReplayFlows() { klog.Errorf("Error during flow replay: %v", err) } - addFixedFlows := func(flows []binding.Flow) { - for _, flow := range flows { - flow.Reset() - } - if err := c.ofEntryOperations.AddAll(flows); err != nil { - klog.Errorf("Error when replaying fixed flows: %v", err) - } + c.featureService.replayGroups() - } - - addFixedFlows(c.gatewayFlows) - addFixedFlows(c.defaultServiceFlows) - addFixedFlows(c.defaultTunnelFlows) - // hostNetworkingFlows is used only on Windows. Replay the flows only when there are flows in this cache. - if len(c.hostNetworkingFlows) > 0 { - addFixedFlows(c.hostNetworkingFlows) - } - - installCachedFlows := func(key, value interface{}) bool { - fCache := value.(flowCache) - cachedFlows := make([]binding.Flow, 0) - - for _, flow := range fCache { - flow.Reset() - cachedFlows = append(cachedFlows, flow) - } - - if err := c.ofEntryOperations.AddAll(cachedFlows); err != nil { - klog.Errorf("Error when replaying cached flows: %v", err) - } - return true - } - - c.groupCache.Range(func(id, value interface{}) bool { - group := value.(binding.Group) - group.Reset() - if err := group.Add(); err != nil { - klog.Errorf("Error when replaying cached group %d: %v", id, err) + for _, activeFeature := range c.activatedFeatures { + if err := c.ofEntryOperations.AddAll(activeFeature.replayFlows()); err != nil { + klog.ErrorS(err, "Error when replaying feature flows", "feature", activeFeature.getFeatureName()) } - return true - }) - c.nodeFlowCache.Range(installCachedFlows) - c.podFlowCache.Range(installCachedFlows) - c.serviceFlowCache.Range(installCachedFlows) - - c.replayPolicyFlows() - - if c.enableMulticast { - c.mcastFlowCache.Range(installCachedFlows) } } @@ -943,20 +935,6 @@ func (c *client) DeleteStaleFlows() error { return c.deleteFlowsByRoundNum(*c.roundInfo.PrevRoundNum) } -func (c *client) setupPolicyOnlyFlows() error { - // Rewrites MAC to gw port if the packet received is unmatched by local Pod flows. - flows := c.l3FwdFlowRouteToGW(c.nodeConfig.GatewayConfig.MAC, cookie.Default) - // If IPv6 is enabled, this flow will never get hit. - flows = append(flows, - // Replies any ARP request with the same global virtual MAC. - c.arpResponderStaticFlow(cookie.Default), - ) - if err := c.ofEntryOperations.AddAll(flows); err != nil { - return fmt.Errorf("failed to setup policy-only flows: %w", err) - } - return nil -} - func (c *client) SubscribePacketIn(reason uint8, pktInQueue *binding.PacketInQueue) error { return c.bridge.SubscribePacketIn(reason, pktInQueue) } @@ -1026,16 +1004,23 @@ func (c *client) SendTraceflowPacket(dataplaneTag uint8, packet *binding.Packet, func (c *client) InstallTraceflowFlows(dataplaneTag uint8, liveTraffic, droppedOnly, receiverOnly bool, packet *binding.Packet, ofPort uint32, timeoutSeconds uint16) error { cacheKey := fmt.Sprintf("%x", dataplaneTag) - flows := []binding.Flow{} - flows = append(flows, c.traceflowConnectionTrackFlows(dataplaneTag, receiverOnly, packet, ofPort, timeoutSeconds, cookie.Default)...) - flows = append(flows, c.traceflowL2ForwardOutputFlows(dataplaneTag, liveTraffic, droppedOnly, timeoutSeconds, cookie.Default)...) - flows = append(flows, c.traceflowNetworkPolicyFlows(dataplaneTag, timeoutSeconds, cookie.Default)...) - return c.addFlows(c.tfFlowCache, cacheKey, flows) + var flows []binding.Flow + for _, f := range c.traceableFeatures { + flows = append(flows, f.flowsToTrace(dataplaneTag, + c.ovsMetersAreSupported, + liveTraffic, + droppedOnly, + receiverOnly, + packet, + ofPort, + timeoutSeconds)...) + } + return c.addFlows(c.featureTraceflow.cachedFlows, cacheKey, flows) } func (c *client) UninstallTraceflowFlows(dataplaneTag uint8) error { cacheKey := fmt.Sprintf("%x", dataplaneTag) - return c.deleteFlows(c.tfFlowCache, cacheKey) + return c.deleteFlows(c.featureTraceflow.cachedFlows, cacheKey) } // Add TLV map optClass 0x0104, optType 0x80 optLength 4 tunMetadataIndex 0 to store data plane tag @@ -1196,27 +1181,27 @@ func (c *client) SendUDPPacketOut( } func (c *client) InstallMulticastInitialFlows(pktInReason uint8) error { - flows := c.igmpPktInFlows(pktInReason) - flows = append(flows, c.externalMulticastReceiverFlow()) + flows := c.featureMulticast.igmpPktInFlows(pktInReason) + flows = append(flows, c.featureMulticast.externalMulticastReceiverFlow()) cacheKey := fmt.Sprintf("multicast") c.replayMutex.RLock() defer c.replayMutex.RUnlock() - return c.addFlows(c.mcastFlowCache, cacheKey, flows) + return c.addFlows(c.featureMulticast.mcastFlowCache, cacheKey, flows) } func (c *client) InstallMulticastFlow(multicastIP net.IP) error { - flows := c.localMulticastForwardFlow(multicastIP) + flows := c.featureMulticast.localMulticastForwardFlow(multicastIP) cacheKey := fmt.Sprintf("multicast_%s", multicastIP.String()) c.replayMutex.RLock() defer c.replayMutex.RUnlock() - return c.addFlows(c.mcastFlowCache, cacheKey, flows) + return c.addFlows(c.featureMulticast.mcastFlowCache, cacheKey, flows) } func (c *client) UninstallMulticastFlow(multicastIP net.IP) error { c.replayMutex.RLock() defer c.replayMutex.RUnlock() cacheKey := fmt.Sprintf("multicast_%s", multicastIP.String()) - return c.deleteFlows(c.mcastFlowCache, cacheKey) + return c.deleteFlows(c.featureMulticast.mcastFlowCache, cacheKey) } func (c *client) SendIGMPQueryPacketOut( @@ -1246,11 +1231,11 @@ func (c *client) InstallBridgeUplinkFlows() error { podCIDRMap[binding.ProtocolIP] = *c.nodeConfig.PodIPv4CIDR } //TODO: support IPv6 - flows := c.hostBridgeUplinkFlows(podCIDRMap, cookie.Default) + flows := c.featurePodConnectivity.hostBridgeUplinkFlows(podCIDRMap) if err := c.ofEntryOperations.AddAll(flows); err != nil { return err } - c.hostNetworkingFlows = flows + c.featurePodConnectivity.fixedFlows = append(c.featurePodConnectivity.fixedFlows, flows...) } return nil } diff --git a/pkg/agent/openflow/client_test.go b/pkg/agent/openflow/client_test.go index 18207b1db79..f20ced1c9d9 100644 --- a/pkg/agent/openflow/client_test.go +++ b/pkg/agent/openflow/client_test.go @@ -23,7 +23,6 @@ import ( "testing" "time" - "antrea.io/ofnet/ofctrl" "github.com/golang/mock/gomock" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -43,6 +42,7 @@ var ( bridgeMgmtAddr = binding.GetMgmtAddress(ovsconfig.DefaultOVSRunDir, bridgeName) gwMAC, _ = net.ParseMAC("AA:BB:CC:DD:EE:EE") gwIP, ipNet, _ = net.ParseCIDR("10.0.1.1/24") + _, nodeIP, _ = net.ParseCIDR("192.168.77.100/24") gwIPv6, _, _ = net.ParseCIDR("f00d::b00:0:0:0/80") gatewayConfig = &config.GatewayConfig{ IPv4: gwIP, @@ -52,8 +52,10 @@ var ( nodeConfig = &config.NodeConfig{ GatewayConfig: gatewayConfig, WireGuardConfig: &config.WireGuardConfig{}, + PodIPv4CIDR: ipNet, + NodeIPv4Addr: nodeIP, } - networkConfig = &config.NetworkConfig{} + networkConfig = &config.NetworkConfig{IPv4Enabled: true} ) func installNodeFlows(ofClient Client, cacheKey string) (int, error) { @@ -64,7 +66,7 @@ func installNodeFlows(ofClient Client, cacheKey string) (int, error) { } err := ofClient.InstallNodeFlows(hostName, peerConfigs, &utilip.DualStackIPs{IPv4: peerNodeIP}, 0, nil) client := ofClient.(*client) - fCacheI, ok := client.nodeFlowCache.Load(hostName) + fCacheI, ok := client.featurePodConnectivity.nodeCachedFlows.Load(hostName) if ok { return len(fCacheI.(flowCache)), err } @@ -78,7 +80,7 @@ func installPodFlows(ofClient Client, cacheKey string) (int, error) { ofPort := uint32(10) err := ofClient.InstallPodFlows(containerID, []net.IP{podIP}, podMAC, ofPort) client := ofClient.(*client) - fCacheI, ok := client.podFlowCache.Load(containerID) + fCacheI, ok := client.featurePodConnectivity.podCachedFlows.Load(containerID) if ok { return len(fCacheI.(flowCache)), err } @@ -109,6 +111,8 @@ func TestIdempotentFlowInstallation(t *testing.T) { client.ofEntryOperations = m client.nodeConfig = nodeConfig client.networkConfig = networkConfig + client.ipProtocols = []binding.Protocol{binding.ProtocolIP} + client.generatePipelines() m.EXPECT().AddAll(gomock.Any()).Return(nil).Times(1) // Installing the flows should succeed, and all the flows should be added into the cache. @@ -138,6 +142,8 @@ func TestIdempotentFlowInstallation(t *testing.T) { client.ofEntryOperations = m client.nodeConfig = nodeConfig client.networkConfig = networkConfig + client.ipProtocols = []binding.Protocol{binding.ProtocolIP} + client.generatePipelines() errorCall := m.EXPECT().AddAll(gomock.Any()).Return(errors.New("Bundle error")).Times(1) m.EXPECT().AddAll(gomock.Any()).Return(nil).After(errorCall) @@ -180,6 +186,8 @@ func TestFlowInstallationFailed(t *testing.T) { client.ofEntryOperations = m client.nodeConfig = nodeConfig client.networkConfig = networkConfig + client.ipProtocols = []binding.Protocol{binding.ProtocolIP} + client.generatePipelines() // We generate an error for AddAll call. m.EXPECT().AddAll(gomock.Any()).Return(errors.New("Bundle error")) @@ -215,6 +223,8 @@ func TestConcurrentFlowInstallation(t *testing.T) { client.ofEntryOperations = m client.nodeConfig = nodeConfig client.networkConfig = networkConfig + client.ipProtocols = []binding.Protocol{binding.ProtocolIP} + client.generatePipelines() var concurrentCalls atomic.Value // set to true if we observe concurrent calls timeoutCh := make(chan struct{}) @@ -258,9 +268,6 @@ func TestConcurrentFlowInstallation(t *testing.T) { } func Test_client_InstallTraceflowFlows(t *testing.T) { - type ofSwitch struct { - ofctrl.OFSwitch - } type fields struct { } type args struct { @@ -404,16 +411,22 @@ func prepareTraceflowFlow(ctrl *gomock.Controller) *client { c := ofClient.(*client) c.cookieAllocator = cookie.NewAllocator(0) c.nodeConfig = nodeConfig - m := ovsoftest.NewMockBridge(ctrl) - m.EXPECT().AddFlowsInBundle(gomock.Any(), nil, nil).Return(nil).Times(1) - c.bridge = m + m := oftest.NewMockOFEntryOperations(ctrl) + c.ofEntryOperations = m + c.nodeConfig = nodeConfig + c.networkConfig = networkConfig + c.ipProtocols = []binding.Protocol{binding.ProtocolIP} + c.generatePipelines() + + m.EXPECT().AddAll(gomock.Any()).Return(nil).Times(1) + c.bridge = ovsoftest.NewMockBridge(ctrl) mFlow := ovsoftest.NewMockFlow(ctrl) ctx := &conjMatchFlowContext{dropFlow: mFlow} mFlow.EXPECT().FlowProtocol().Return(binding.Protocol("ip")) - mFlow.EXPECT().CopyToBuilder(priorityNormal+2, false).Return(EgressDefaultTable.BuildFlow(priorityNormal + 2)).Times(1) - c.globalConjMatchFlowCache["mockContext"] = ctx - c.policyCache.Add(&policyRuleConjunction{metricFlows: []binding.Flow{c.denyRuleMetricFlow(123, false)}}) + mFlow.EXPECT().CopyToBuilder(priorityNormal+2, false).Return(EgressDefaultTable.ofTable.BuildFlow(priorityNormal + 2)).Times(1) + c.featureNetworkPolicy.globalConjMatchFlowCache["mockContext"] = ctx + c.featureNetworkPolicy.policyCache.Add(&policyRuleConjunction{metricFlows: []binding.Flow{c.featureNetworkPolicy.denyRuleMetricFlow(123, false)}}) return c } diff --git a/pkg/agent/openflow/cookie/allocator.go b/pkg/agent/openflow/cookie/allocator.go index 5888da12232..72df44b35d8 100644 --- a/pkg/agent/openflow/cookie/allocator.go +++ b/pkg/agent/openflow/cookie/allocator.go @@ -31,30 +31,30 @@ type Category uint64 const ( Default Category = iota - Gateway - Node - Pod + PodConnectivity + NetworkPolicy Service - Policy - SNAT + Egress + Multicast + Traceflow ) func (c Category) String() string { switch c { case Default: return "Default" - case Gateway: - return "Gateway" - case Node: - return "Node" - case Pod: - return "Pod" + case PodConnectivity: + return "PodConnectivity" + case NetworkPolicy: + return "NetworkPolicy" case Service: return "Service" - case Policy: - return "Policy" - case SNAT: - return "SNAT" + case Egress: + return "Egress" + case Multicast: + return "Multicast" + case Traceflow: + return "Traceflow" default: return "Invalid" } diff --git a/pkg/agent/openflow/cookie/allocator_test.go b/pkg/agent/openflow/cookie/allocator_test.go index 1b2543989d6..739839c3216 100644 --- a/pkg/agent/openflow/cookie/allocator_test.go +++ b/pkg/agent/openflow/cookie/allocator_test.go @@ -46,7 +46,7 @@ func TestConcurrentAllocate(t *testing.T) { var seq []Category for i := 0; i < eachTotal; i++ { - seq = append(seq, Pod, Node, Default) + seq = append(seq, NetworkPolicy, Service, PodConnectivity, Default) } rand.Shuffle(len(seq), func(a, b int) { seq[a], seq[b] = seq[b], seq[a] }) diff --git a/pkg/agent/openflow/egress.go b/pkg/agent/openflow/egress.go new file mode 100644 index 00000000000..5dacb866bae --- /dev/null +++ b/pkg/agent/openflow/egress.go @@ -0,0 +1,79 @@ +// Copyright 2022 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "net" + + "antrea.io/antrea/pkg/agent/config" + "antrea.io/antrea/pkg/agent/openflow/cookie" + binding "antrea.io/antrea/pkg/ovs/openflow" +) + +type featureEgress struct { + cookieAllocator cookie.Allocator + ipProtocols []binding.Protocol + + cachedFlows *flowCategoryCache + fixedFlows []binding.Flow + + nodeIPs map[binding.Protocol]net.IP + gatewayMAC net.HardwareAddr + + category cookie.Category +} + +func (f *featureEgress) getFeatureName() string { + return "Egress" +} + +func newFeatureEgress(cookieAllocator cookie.Allocator, + ipProtocols []binding.Protocol, + nodeConfig *config.NodeConfig) *featureEgress { + nodeIPs := make(map[binding.Protocol]net.IP) + for _, ipProtocol := range ipProtocols { + if ipProtocol == binding.ProtocolIP { + nodeIPs[ipProtocol] = nodeConfig.NodeIPv4Addr.IP + } else if ipProtocol == binding.ProtocolIPv6 { + nodeIPs[ipProtocol] = nodeConfig.NodeIPv6Addr.IP + } + } + return &featureEgress{ + cachedFlows: newFlowCategoryCache(), + cookieAllocator: cookieAllocator, + ipProtocols: ipProtocols, + nodeIPs: nodeIPs, + gatewayMAC: nodeConfig.GatewayConfig.MAC, + category: cookie.Egress, + } +} + +func (f *featureEgress) initFlows() []binding.Flow { + return []binding.Flow{} +} + +func (f *featureEgress) replayFlows() []binding.Flow { + var flows []binding.Flow + + // Get fixed flows. + for _, flow := range f.fixedFlows { + flow.Reset() + flows = append(flows, flow) + } + // Get cached flows. + flows = append(flows, getCachedFlows(f.cachedFlows)...) + + return flows +} diff --git a/pkg/agent/openflow/fields.go b/pkg/agent/openflow/fields.go index e608cd1e3f5..2b0da990806 100644 --- a/pkg/agent/openflow/fields.go +++ b/pkg/agent/openflow/fields.go @@ -21,53 +21,62 @@ import ( // Fields using reg. var ( - fromTunnelVal = uint32(0) - fromGatewayVal = uint32(1) - fromLocalVal = uint32(2) - fromUplinkVal = uint32(4) - fromBridgeVal = uint32(5) + tunnelVal = uint32(1) + gatewayVal = uint32(2) + localVal = uint32(3) + uplinkVal = uint32(4) + bridgeVal = uint32(5) // reg0 (NXM_NX_REG0) - // reg0[0..3]: Field to mark the packet source. Marks in this field include, - // - 0: from the tunnel port - // - 1: from antrea-gw0 - // - 2: from the local Pods - // - 4: from the Bridge interface - // - 5: from the uplink interface + // reg0[0..3]: Field to store the packet source. Marks in this field include: + // - 1: from tunnel port. + // - 2: from Antrea gateway port. + // - 3: from local Pods. + // - 4: from uplink port. + // - 5: from bridge local port. PktSourceField = binding.NewRegField(0, 0, 3, "PacketSource") - FromTunnelRegMark = binding.NewRegMark(PktSourceField, fromTunnelVal) - FromGatewayRegMark = binding.NewRegMark(PktSourceField, fromGatewayVal) - FromLocalRegMark = binding.NewRegMark(PktSourceField, fromLocalVal) - FromUplinkRegMark = binding.NewRegMark(PktSourceField, fromUplinkVal) - FromBridgeRegMark = binding.NewRegMark(PktSourceField, fromBridgeVal) - // reg0[16]: Mark to indicate the ofPort number of an interface is found. - OFPortFoundRegMark = binding.NewOneBitRegMark(0, 16, "OFPortFound") - // reg0[18]: Mark to indicate the packet needs DNAT to virtual IP. - // If a packet uses HairpinRegMark, it will be output to the port where it enters OVS pipeline in L2ForwardingOutTable. - HairpinRegMark = binding.NewOneBitRegMark(0, 18, "Hairpin") - // reg0[19]: Mark to indicate the packet's MAC address needs to be rewritten. - RewriteMACRegMark = binding.NewOneBitRegMark(0, 19, "RewriteMAC") - // reg0[20]: Mark to indicate the packet is denied(Drop/Reject). - CnpDenyRegMark = binding.NewOneBitRegMark(0, 20, "CNPDeny") - // reg0[21..22]: Field to indicate disposition of Antrea Policy. It could have more bits to support more disposition - // that Antrea policy support in the future. - // Marks in this field include, + FromTunnelRegMark = binding.NewRegMark(PktSourceField, tunnelVal) + FromGatewayRegMark = binding.NewRegMark(PktSourceField, gatewayVal) + FromLocalRegMark = binding.NewRegMark(PktSourceField, localVal) + FromUplinkRegMark = binding.NewRegMark(PktSourceField, uplinkVal) + FromBridgeRegMark = binding.NewRegMark(PktSourceField, bridgeVal) + // reg0[4..7]: Field to store the packet destination. Marks in this field include: + // - 1: to tunnel port. + // - 2: to Antrea gateway port. + // - 3: to local Pods. + // - 4: to uplink port. + // - 5: to bridge local port. + PktDestinationField = binding.NewRegField(0, 4, 7, "PacketDestination") + ToTunnelRegMark = binding.NewRegMark(PktDestinationField, tunnelVal) + ToGatewayRegMark = binding.NewRegMark(PktDestinationField, gatewayVal) + // reg0[0..7]: Union field of the packet source and destination. It is used to mark hairpin packets. Marks in this + // field include: + // - 0x11: the packet sourced from Antrea gateway interface, and destined for local Node via Antrea gateway interface. + PktUnionField = binding.NewRegField(0, 0, 7, "PacketUnion") + GatewayHairpinRegMark = binding.NewRegMark(PktUnionField, (ToGatewayRegMark.GetValue()< 1. It should be a rule @@ -953,16 +964,16 @@ func (c *client) calculateActionFlowChangesForRule(rule *types.PolicyRule) *poli var actionFlows []binding.Flow var metricFlows []binding.Flow if rule.IsAntreaNetworkPolicyRule() && *rule.Action == crdv1alpha1.RuleActionDrop { - metricFlows = append(metricFlows, c.denyRuleMetricFlow(ruleOfID, isIngress)) - actionFlows = append(actionFlows, c.conjunctionActionDenyFlow(ruleOfID, ruleTable, rule.Priority, DispositionDrop, rule.EnableLogging)) + metricFlows = append(metricFlows, f.denyRuleMetricFlow(ruleOfID, isIngress)) + actionFlows = append(actionFlows, f.conjunctionActionDenyFlow(ruleOfID, ruleTable, rule.Priority, DispositionDrop, rule.EnableLogging)) } else if rule.IsAntreaNetworkPolicyRule() && *rule.Action == crdv1alpha1.RuleActionReject { - metricFlows = append(metricFlows, c.denyRuleMetricFlow(ruleOfID, isIngress)) - actionFlows = append(actionFlows, c.conjunctionActionDenyFlow(ruleOfID, ruleTable, rule.Priority, DispositionRej, rule.EnableLogging)) + metricFlows = append(metricFlows, f.denyRuleMetricFlow(ruleOfID, isIngress)) + actionFlows = append(actionFlows, f.conjunctionActionDenyFlow(ruleOfID, ruleTable, rule.Priority, DispositionRej, rule.EnableLogging)) } else if rule.IsAntreaNetworkPolicyRule() && *rule.Action == crdv1alpha1.RuleActionPass { - actionFlows = append(actionFlows, c.conjunctionActionPassFlow(ruleOfID, ruleTable, rule.Priority, rule.EnableLogging)) + actionFlows = append(actionFlows, f.conjunctionActionPassFlow(ruleOfID, ruleTable, rule.Priority, rule.EnableLogging)) } else { - metricFlows = append(metricFlows, c.allowRulesMetricFlows(ruleOfID, isIngress)...) - actionFlows = append(actionFlows, c.conjunctionActionFlow(ruleOfID, ruleTable, dropTable.GetNext(), rule.Priority, rule.EnableLogging)...) + metricFlows = append(metricFlows, f.allowRulesMetricFlows(ruleOfID, isIngress)...) + actionFlows = append(actionFlows, f.conjunctionActionFlow(ruleOfID, ruleTable, dropTable.GetNext(), rule.Priority, rule.EnableLogging)...) } conj.actionFlows = actionFlows conj.metricFlows = metricFlows @@ -971,33 +982,33 @@ func (c *client) calculateActionFlowChangesForRule(rule *types.PolicyRule) *poli } // calculateMatchFlowChangesForRule calculates the contextChanges for the policyRule, and updates the context status in case of batch install. -func (c *client) calculateMatchFlowChangesForRule(conj *policyRuleConjunction, rule *types.PolicyRule) []*conjMatchFlowContextChange { +func (f *featureNetworkPolicy) calculateMatchFlowChangesForRule(conj *policyRuleConjunction, rule *types.PolicyRule) []*conjMatchFlowContextChange { // Calculate the conjMatchFlowContext changes. The changed Openflow entries are included in the conjMatchFlowContext change. - ctxChanges := conj.calculateChangesForRuleCreation(c, rule) + ctxChanges := conj.calculateChangesForRuleCreation(f, rule) return ctxChanges } // addRuleToConjunctiveMatch adds a rule's clauses to corresponding conjunctive match contexts. // Unlike calculateMatchFlowChangesForRule, it updates the context status directly and doesn't calculate flow changes. // It's used in initial batch install where we first add all rules then calculates flows change based on final state. -func (c *client) addRuleToConjunctiveMatch(conj *policyRuleConjunction, rule *types.PolicyRule) { +func (f *featureNetworkPolicy) addRuleToConjunctiveMatch(conj *policyRuleConjunction, rule *types.PolicyRule) { if conj.fromClause != nil { for _, addr := range rule.From { match := generateAddressConjMatch(conj.fromClause.ruleTable.GetID(), addr, types.SrcAddress, rule.Priority) - c.addActionToConjunctiveMatch(conj.fromClause, match) + f.addActionToConjunctiveMatch(conj.fromClause, match) } } if conj.toClause != nil { for _, addr := range rule.To { match := generateAddressConjMatch(conj.toClause.ruleTable.GetID(), addr, types.DstAddress, rule.Priority) - c.addActionToConjunctiveMatch(conj.toClause, match) + f.addActionToConjunctiveMatch(conj.toClause, match) } } if conj.serviceClause != nil { for _, port := range rule.Service { - matches := generateServicePortConjMatches(conj.serviceClause.ruleTable.GetID(), port, rule.Priority, c.networkConfig.IPv4Enabled, c.networkConfig.IPv6Enabled, false) + matches := generateServicePortConjMatches(conj.serviceClause.ruleTable.GetID(), port, rule.Priority, f.ipProtocols, false) for _, match := range matches { - c.addActionToConjunctiveMatch(conj.serviceClause, match) + f.addActionToConjunctiveMatch(conj.serviceClause, match) } } } @@ -1006,7 +1017,7 @@ func (c *client) addRuleToConjunctiveMatch(conj *policyRuleConjunction, rule *ty // addActionToConjunctiveMatch adds a clause to corresponding conjunctive match context. // It updates the context status directly and doesn't calculate the match flow, which is supposed to be calculated after // all actions are added. It's used in initial batch install only. -func (c *client) addActionToConjunctiveMatch(clause *clause, match *conjunctiveMatch) { +func (f *featureNetworkPolicy) addActionToConjunctiveMatch(clause *clause, match *conjunctiveMatch) { matcherKey := match.generateGlobalMapKey() _, found := clause.matches[matcherKey] if found { @@ -1016,18 +1027,18 @@ func (c *client) addActionToConjunctiveMatch(clause *clause, match *conjunctiveM var context *conjMatchFlowContext // Get conjMatchFlowContext from globalConjMatchFlowCache. If it doesn't exist, create a new one and add into the cache. - context, found = c.globalConjMatchFlowCache[matcherKey] + context, found = f.globalConjMatchFlowCache[matcherKey] if !found { context = &conjMatchFlowContext{ - conjunctiveMatch: match, - actions: make(map[uint32]*conjunctiveAction), - client: c, + conjunctiveMatch: match, + actions: make(map[uint32]*conjunctiveAction), + featureNetworkPolicy: f, } // Generate the default drop flow if dropTable is not nil. if clause.dropTable != nil { - context.dropFlow = context.client.defaultDropFlow(clause.dropTable, match.matchKey, match.matchValue) + context.dropFlow = context.featureNetworkPolicy.defaultDropFlow(clause.dropTable, match.matchKey, match.matchValue) } - c.globalConjMatchFlowCache[matcherKey] = context + f.globalConjMatchFlowCache[matcherKey] = context } clause.matches[matcherKey] = context @@ -1052,14 +1063,14 @@ func (c *client) BatchInstallPolicyRuleFlows(ofPolicyRules []*types.PolicyRule) var conjunctions []*policyRuleConjunction for _, rule := range ofPolicyRules { - conj := c.calculateActionFlowChangesForRule(rule) - c.addRuleToConjunctiveMatch(conj, rule) + conj := c.featureNetworkPolicy.calculateActionFlowChangesForRule(rule) + c.featureNetworkPolicy.addRuleToConjunctiveMatch(conj, rule) allFlows = append(allFlows, conj.actionFlows...) allFlows = append(allFlows, conj.metricFlows...) conjunctions = append(conjunctions, conj) } - for _, ctx := range c.globalConjMatchFlowCache { + for _, ctx := range c.featureNetworkPolicy.globalConjMatchFlowCache { // In theory there must be at least one action but InstallPolicyRuleFlows currently handles the 1 clause case // and we do the same in addRuleToConjunctiveMatch. The check is added only for consistency. Later we should // return error if clients install a rule with only 1 clause, and should remove the extra code for processing it. @@ -1068,7 +1079,7 @@ func (c *client) BatchInstallPolicyRuleFlows(ofPolicyRules []*types.PolicyRule) for _, action := range ctx.actions { actions = append(actions, action) } - ctx.flow = c.conjunctiveMatchFlow(ctx.tableID, ctx.matchKey, ctx.matchValue, ctx.priority, actions) + ctx.flow = c.featureNetworkPolicy.conjunctiveMatchFlow(ctx.tableID, ctx.matchKey, ctx.matchValue, ctx.priority, actions) allFlows = append(allFlows, ctx.flow) } if ctx.dropFlow != nil { @@ -1080,21 +1091,21 @@ func (c *client) BatchInstallPolicyRuleFlows(ofPolicyRules []*types.PolicyRule) if err := c.ofEntryOperations.AddAll(allFlows); err != nil { // Reset the global conjunctive match flow cache since the OpenFlow bundle, which contains // all the match flows to be installed, was not applied successfully. - c.globalConjMatchFlowCache = map[string]*conjMatchFlowContext{} + c.featureNetworkPolicy.globalConjMatchFlowCache = map[string]*conjMatchFlowContext{} return err } // Update conjMatchFlowContexts as the expected status. for _, conj := range conjunctions { // Add the policyRuleConjunction into policyCache - c.policyCache.Add(conj) + c.featureNetworkPolicy.policyCache.Add(conj) } return nil } // applyConjunctiveMatchFlows installs OpenFlow entries on the OVS bridge, and then updates the conjMatchFlowContext. -func (c *client) applyConjunctiveMatchFlows(flowChanges []*conjMatchFlowContextChange) error { +func (f *featureNetworkPolicy) applyConjunctiveMatchFlows(flowChanges []*conjMatchFlowContextChange) error { // Send the OpenFlow entries to the OVS bridge. - if err := c.sendConjunctiveFlows(flowChanges, []binding.Flow{}); err != nil { + if err := f.sendConjunctiveFlows(flowChanges, []binding.Flow{}); err != nil { return err } // Update conjunctiveMatchContext. @@ -1105,7 +1116,7 @@ func (c *client) applyConjunctiveMatchFlows(flowChanges []*conjMatchFlowContextC } // sendConjunctiveFlows sends all the changed OpenFlow entries to the OVS bridge in a single Bundle. -func (c *client) sendConjunctiveFlows(changes []*conjMatchFlowContextChange, flows []binding.Flow) error { +func (f *featureNetworkPolicy) sendConjunctiveFlows(changes []*conjMatchFlowContextChange, flows []binding.Flow) error { var addFlows, modifyFlows, deleteFlows []binding.Flow var flowChanges []*flowChange addFlows = flows @@ -1128,7 +1139,7 @@ func (c *client) sendConjunctiveFlows(changes []*conjMatchFlowContextChange, flo deleteFlows = append(deleteFlows, fc.flow) } } - return c.bridge.AddFlowsInBundle(addFlows, modifyFlows, deleteFlows) + return f.bridge.AddFlowsInBundle(addFlows, modifyFlows, deleteFlows) } // ActionFlowPriorities returns the OF priorities of the actionFlows in the policyRuleConjunction @@ -1156,15 +1167,15 @@ func (c *policyRuleConjunction) newClause(clauseID uint8, nClause uint8, ruleTab // calculateClauses configures the policyRuleConjunction's clauses according to the PolicyRule. The Openflow entries are // not installed on the OVS bridge when calculating the clauses. -func (c *policyRuleConjunction) calculateClauses(rule *types.PolicyRule, clnt *client) (uint8, binding.Table, binding.Table) { +func (c *policyRuleConjunction) calculateClauses(rule *types.PolicyRule) (uint8, binding.Table, binding.Table) { var dropTable binding.Table var isEgressRule = false switch rule.Direction { case v1beta2.DirectionOut: - dropTable = EgressDefaultTable + dropTable = EgressDefaultTable.ofTable isEgressRule = true default: - dropTable = IngressDefaultTable + dropTable = IngressDefaultTable.ofTable } ruleTable := getTableByID(rule.TableID) @@ -1209,16 +1220,16 @@ func (c *policyRuleConjunction) calculateClauses(rule *types.PolicyRule, clnt *c // calculateChangesForRuleCreation returns the conjMatchFlowContextChanges of the new policyRuleConjunction. It // will calculate the expected conjMatchFlowContext status, and the changed Openflow entries. -func (c *policyRuleConjunction) calculateChangesForRuleCreation(clnt *client, rule *types.PolicyRule) []*conjMatchFlowContextChange { +func (c *policyRuleConjunction) calculateChangesForRuleCreation(featureNetworkPolicy *featureNetworkPolicy, rule *types.PolicyRule) []*conjMatchFlowContextChange { var ctxChanges []*conjMatchFlowContextChange if c.fromClause != nil { - ctxChanges = append(ctxChanges, c.fromClause.addAddrFlows(clnt, types.SrcAddress, rule.From, rule.Priority)...) + ctxChanges = append(ctxChanges, c.fromClause.addAddrFlows(featureNetworkPolicy, types.SrcAddress, rule.From, rule.Priority)...) } if c.toClause != nil { - ctxChanges = append(ctxChanges, c.toClause.addAddrFlows(clnt, types.DstAddress, rule.To, rule.Priority)...) + ctxChanges = append(ctxChanges, c.toClause.addAddrFlows(featureNetworkPolicy, types.DstAddress, rule.To, rule.Priority)...) } if c.serviceClause != nil { - ctxChanges = append(ctxChanges, c.serviceClause.addServiceFlows(clnt, rule.Service, rule.Priority, false)...) + ctxChanges = append(ctxChanges, c.serviceClause.addServiceFlows(featureNetworkPolicy, rule.Service, rule.Priority, false)...) } return ctxChanges } @@ -1269,8 +1280,8 @@ func (c *policyRuleConjunction) getAllFlowKeys() []string { return append(flowKeys, dropFlowKeys...) } -func (c *client) getPolicyRuleConjunction(ruleID uint32) *policyRuleConjunction { - conj, found, _ := c.policyCache.GetByKey(fmt.Sprint(ruleID)) +func (f *featureNetworkPolicy) getPolicyRuleConjunction(ruleID uint32) *policyRuleConjunction { + conj, found, _ := f.policyCache.GetByKey(fmt.Sprint(ruleID)) if !found { return nil } @@ -1278,7 +1289,7 @@ func (c *client) getPolicyRuleConjunction(ruleID uint32) *policyRuleConjunction } func (c *client) GetPolicyInfoFromConjunction(ruleID uint32) (string, string) { - conjunction := c.getPolicyRuleConjunction(ruleID) + conjunction := c.featureNetworkPolicy.getPolicyRuleConjunction(ruleID) if conjunction == nil { return "", "" } @@ -1296,12 +1307,12 @@ func (c *client) UninstallPolicyRuleFlows(ruleID uint32) ([]string, error) { c.replayMutex.RLock() defer c.replayMutex.RUnlock() - conj := c.getPolicyRuleConjunction(ruleID) + conj := c.featureNetworkPolicy.getPolicyRuleConjunction(ruleID) if conj == nil { klog.V(2).Infof("policyRuleConjunction with ID %d not found", ruleID) return nil, nil } - staleOFPriorities := c.getStalePriorities(conj) + staleOFPriorities := c.featureNetworkPolicy.getStalePriorities(conj) // Delete action flows from the OVS bridge. if err := c.ofEntryOperations.DeleteAll(conj.actionFlows); err != nil { return nil, err @@ -1310,30 +1321,30 @@ func (c *client) UninstallPolicyRuleFlows(ruleID uint32) ([]string, error) { return nil, err } - c.conjMatchFlowLock.Lock() - defer c.conjMatchFlowLock.Unlock() + c.featureNetworkPolicy.conjMatchFlowLock.Lock() + defer c.featureNetworkPolicy.conjMatchFlowLock.Unlock() // Get the conjMatchFlowContext changes. ctxChanges := conj.calculateChangesForRuleDeletion() // Send the changed OpenFlow entries to the OVS bridge and update the conjMatchFlowContext. - if err := c.applyConjunctiveMatchFlows(ctxChanges); err != nil { + if err := c.featureNetworkPolicy.applyConjunctiveMatchFlows(ctxChanges); err != nil { return nil, err } - c.policyCache.Delete(conj) + c.featureNetworkPolicy.policyCache.Delete(conj) return staleOFPriorities, nil } // getStalePriorities returns the ofPriorities that will be stale on the rule table where the // policyRuleConjunction is installed, after the deletion of that policyRuleConjunction. -func (c *client) getStalePriorities(conj *policyRuleConjunction) (staleOFPriorities []string) { +func (f *featureNetworkPolicy) getStalePriorities(conj *policyRuleConjunction) (staleOFPriorities []string) { var ofPrioritiesPotentiallyStale []string - if conj.ruleTableID != IngressRuleTable.GetID() && conj.ruleTableID != EgressRuleTable.GetID() { + if conj.ruleTableID != IngressRuleTable.ofTable.GetID() && conj.ruleTableID != EgressRuleTable.ofTable.GetID() { ofPrioritiesPotentiallyStale = conj.ActionFlowPriorities() } klog.V(4).Infof("Potential stale ofpriority %v found", ofPrioritiesPotentiallyStale) for _, p := range ofPrioritiesPotentiallyStale { // Filter out all the policyRuleConjuctions created at the ofPriority across all CNP tables. - conjs, _ := c.policyCache.ByIndex(priorityIndex, p) + conjs, _ := f.policyCache.ByIndex(priorityIndex, p) priorityStale := true for i := 0; i < len(conjs); i++ { conjFiltered := conjs[i].(*policyRuleConjunction) @@ -1352,7 +1363,7 @@ func (c *client) getStalePriorities(conj *policyRuleConjunction) (staleOFPriorit return staleOFPriorities } -func (c *client) replayPolicyFlows() { +func (f *featureNetworkPolicy) replayFlows() []binding.Flow { var flows []binding.Flow addActionFlows := func(conj *policyRuleConjunction) { for _, flow := range conj.actionFlows { @@ -1367,7 +1378,7 @@ func (c *client) replayPolicyFlows() { } } - for _, conj := range c.policyCache.List() { + for _, conj := range f.policyCache.List() { addActionFlows(conj.(*policyRuleConjunction)) addMetricFlows(conj.(*policyRuleConjunction)) } @@ -1383,12 +1394,10 @@ func (c *client) replayPolicyFlows() { } } - for _, ctx := range c.globalConjMatchFlowCache { + for _, ctx := range f.globalConjMatchFlowCache { addMatchFlows(ctx) } - if err := c.ofEntryOperations.AddAll(flows); err != nil { - klog.Errorf("Error when replaying flows: %v", err) - } + return flows } // AddPolicyRuleAddress adds one or multiple addresses to the specified NetworkPolicy rule. If addrType is srcAddress, the @@ -1397,7 +1406,7 @@ func (c *client) AddPolicyRuleAddress(ruleID uint32, addrType types.AddressType, c.replayMutex.RLock() defer c.replayMutex.RUnlock() - conj := c.getPolicyRuleConjunction(ruleID) + conj := c.featureNetworkPolicy.getPolicyRuleConjunction(ruleID) // If policyRuleConjunction doesn't exist in client's policyCache return not found error. It should not happen, since // NetworkPolicyController will guarantee the policyRuleConjunction is created before this method is called. The check // here is for safety. @@ -1410,10 +1419,10 @@ func (c *client) AddPolicyRuleAddress(ruleID uint32, addrType types.AddressType, return fmt.Errorf("no clause is using addrType %d", addrType) } - c.conjMatchFlowLock.Lock() - defer c.conjMatchFlowLock.Unlock() - flowChanges := clause.addAddrFlows(c, addrType, addresses, priority) - return c.applyConjunctiveMatchFlows(flowChanges) + c.featureNetworkPolicy.conjMatchFlowLock.Lock() + defer c.featureNetworkPolicy.conjMatchFlowLock.Unlock() + flowChanges := clause.addAddrFlows(c.featureNetworkPolicy, addrType, addresses, priority) + return c.featureNetworkPolicy.applyConjunctiveMatchFlows(flowChanges) } // DeletePolicyRuleAddress removes addresses from the specified NetworkPolicy rule. If addrType is srcAddress, the addresses @@ -1422,7 +1431,7 @@ func (c *client) DeletePolicyRuleAddress(ruleID uint32, addrType types.AddressTy c.replayMutex.RLock() defer c.replayMutex.RUnlock() - conj := c.getPolicyRuleConjunction(ruleID) + conj := c.featureNetworkPolicy.getPolicyRuleConjunction(ruleID) // If policyRuleConjunction doesn't exist in client's policyCache return not found error. It should not happen, since // NetworkPolicyController will guarantee the policyRuleConjunction is created before this method is called. The check // here is for safety. @@ -1436,25 +1445,25 @@ func (c *client) DeletePolicyRuleAddress(ruleID uint32, addrType types.AddressTy return fmt.Errorf("no clause is using addrType %d", addrType) } - c.conjMatchFlowLock.Lock() - defer c.conjMatchFlowLock.Unlock() + c.featureNetworkPolicy.conjMatchFlowLock.Lock() + defer c.featureNetworkPolicy.conjMatchFlowLock.Unlock() // Remove policyRuleConjunction to actions of conjunctive match using specific address. changes := clause.deleteAddrFlows(addrType, addresses, priority) // Update the Openflow entries on the OVS bridge, and update local cache. - return c.applyConjunctiveMatchFlows(changes) + return c.featureNetworkPolicy.applyConjunctiveMatchFlows(changes) } func (c *client) GetNetworkPolicyFlowKeys(npName, npNamespace string) []string { flowKeys := []string{} // Hold replayMutex write lock to protect flows from being modified by - // NetworkPolicy updates and replayPolicyFlows. This is more for logic + // NetworkPolicy updates and replayFlows. This is more for logic // cleanliness, as: for now flow updates do not impact the matching string // generation; NetworkPolicy updates do not change policyRuleConjunction.actionFlows; // and last for protection of clause flows, conjMatchFlowLock is good enough. c.replayMutex.Lock() defer c.replayMutex.Unlock() - for _, conjObj := range c.policyCache.List() { + for _, conjObj := range c.featureNetworkPolicy.policyCache.List() { conj := conjObj.(*policyRuleConjunction) // If the NetworkPolicyReference in the policyRuleConjunction is nil then that entry in client's // policyCache should be ignored because here we need to dump flows of NetworkPolicy. @@ -1502,7 +1511,7 @@ func getMatchFlowUpdates(conj *policyRuleConjunction, newPriority uint16) (add, // be re-assigned priority 98. This operation will issue a delFlow , which // would essentially void the add flow for conj=1. // In this case, we remove the conflicting delFlow and set addFlow as a modifyFlow. -func (c *client) processFlowUpdates(addFlows, delFlows []binding.Flow) (add, update, del []binding.Flow) { +func (f *featureNetworkPolicy) processFlowUpdates(addFlows, delFlows []binding.Flow) (add, update, del []binding.Flow) { for _, a := range addFlows { matched := false for i := 0; i < len(delFlows); i++ { @@ -1526,7 +1535,7 @@ func (c *client) processFlowUpdates(addFlows, delFlows []binding.Flow) (add, upd // updateConjunctionActionFlows constructs a new policyRuleConjunction with actionFlows updated to be // stored in the policyCache. -func (c *client) updateConjunctionActionFlows(conj *policyRuleConjunction, updates flowUpdates) *policyRuleConjunction { +func (f *featureNetworkPolicy) updateConjunctionActionFlows(conj *policyRuleConjunction, updates flowUpdates) *policyRuleConjunction { newActionFlows := make([]binding.Flow, len(conj.actionFlows)) copy(newActionFlows, updates.newActionFlows) newConj := &policyRuleConjunction{ @@ -1542,14 +1551,14 @@ func (c *client) updateConjunctionActionFlows(conj *policyRuleConjunction, updat } // updateConjunctionMatchFlows updates the conjuctiveMatchFlows in a policyRuleConjunction. -func (c *client) updateConjunctionMatchFlows(conj *policyRuleConjunction, newPriority uint16) { +func (f *featureNetworkPolicy) updateConjunctionMatchFlows(conj *policyRuleConjunction, newPriority uint16) { allClause := []*clause{conj.fromClause, conj.toClause, conj.serviceClause} for _, cl := range allClause { if cl == nil { continue } for i, ctx := range cl.matches { - delete(c.globalConjMatchFlowCache, ctx.generateGlobalMapKey()) + delete(f.globalConjMatchFlowCache, ctx.generateGlobalMapKey()) f := ctx.flow updatedFlow := f.CopyToBuilder(newPriority, true).Done() cl.matches[i].flow = updatedFlow @@ -1557,18 +1566,18 @@ func (c *client) updateConjunctionMatchFlows(conj *policyRuleConjunction, newPri } // update the globalConjMatchFlowCache so that the keys are updated for _, ctx := range cl.matches { - c.globalConjMatchFlowCache[ctx.generateGlobalMapKey()] = ctx + f.globalConjMatchFlowCache[ctx.generateGlobalMapKey()] = ctx } } } // calculateFlowUpdates calculates the flow updates required for the priority re-assignments specified in the input map. -func (c *client) calculateFlowUpdates(updates map[uint16]uint16, table uint8) (addFlows, delFlows []binding.Flow, +func (f *featureNetworkPolicy) calculateFlowUpdates(updates map[uint16]uint16, table uint8) (addFlows, delFlows []binding.Flow, conjFlowUpdates map[uint32]flowUpdates) { conjFlowUpdates = map[uint32]flowUpdates{} for original, newPriority := range updates { originalPriorityStr := strconv.Itoa(int(original)) - conjs, _ := c.policyCache.ByIndex(priorityIndex, originalPriorityStr) + conjs, _ := f.policyCache.ByIndex(priorityIndex, originalPriorityStr) for _, conjObj := range conjs { conj := conjObj.(*policyRuleConjunction) // Only re-assign flow priorities for flows in the table specified. @@ -1603,19 +1612,19 @@ func (c *client) calculateFlowUpdates(updates map[uint16]uint16, table uint8) (a // ReassignFlowPriorities takes a list of priority updates, and update the actionFlows to replace // the old priority with the desired one, for each priority update. func (c *client) ReassignFlowPriorities(updates map[uint16]uint16, table uint8) error { - addFlows, delFlows, conjFlowUpdates := c.calculateFlowUpdates(updates, table) - add, update, del := c.processFlowUpdates(addFlows, delFlows) + addFlows, delFlows, conjFlowUpdates := c.featureNetworkPolicy.calculateFlowUpdates(updates, table) + add, update, del := c.featureNetworkPolicy.processFlowUpdates(addFlows, delFlows) // Commit the flows updates calculated. err := c.bridge.AddFlowsInBundle(add, update, del) if err != nil { return err } for conjID, actionUpdates := range conjFlowUpdates { - originalConj, _, _ := c.policyCache.GetByKey(fmt.Sprint(conjID)) + originalConj, _, _ := c.featureNetworkPolicy.policyCache.GetByKey(fmt.Sprint(conjID)) conj := originalConj.(*policyRuleConjunction) - updatedConj := c.updateConjunctionActionFlows(conj, actionUpdates) - c.updateConjunctionMatchFlows(updatedConj, actionUpdates.newPriority) - c.policyCache.Update(updatedConj) + updatedConj := c.featureNetworkPolicy.updateConjunctionActionFlows(conj, actionUpdates) + c.featureNetworkPolicy.updateConjunctionMatchFlows(updatedConj, actionUpdates.newPriority) + c.featureNetworkPolicy.policyCache.Update(updatedConj) } return nil } @@ -1679,8 +1688,8 @@ func parseMetricFlow(flow string) (uint32, types.RuleMetric) { func (c *client) NetworkPolicyMetrics() map[uint32]*types.RuleMetric { result := map[uint32]*types.RuleMetric{} - egressFlows, _ := c.ovsctlClient.DumpTableFlows(EgressMetricTable.GetID()) - ingressFlows, _ := c.ovsctlClient.DumpTableFlows(IngressMetricTable.GetID()) + egressFlows, _ := c.ovsctlClient.DumpTableFlows(EgressMetricTable.ofTable.GetID()) + ingressFlows, _ := c.ovsctlClient.DumpTableFlows(IngressMetricTable.ofTable.GetID()) collectMetricsFromFlows := func(flows []string) { for _, flow := range flows { @@ -1706,3 +1715,66 @@ func (c *client) NetworkPolicyMetrics() map[uint32]*types.RuleMetric { collectMetricsFromFlows(ingressFlows) return result } + +type featureNetworkPolicy struct { + cookieAllocator cookie.Allocator + ipProtocols []binding.Protocol + bridge binding.Bridge + + // globalConjMatchFlowCache is a global map for conjMatchFlowContext. The key is a string generated from the + // conjMatchFlowContext. + globalConjMatchFlowCache map[string]*conjMatchFlowContext + conjMatchFlowLock sync.Mutex // Lock for access globalConjMatchFlowCache + // policyCache is a storage that supports listing policyRuleConjunction with different indexers. + // It's guaranteed that one policyRuleConjunction is processed by at most one goroutine at any given time. + policyCache cache.Indexer + // egressTables map records all IDs of tables related to egress rules. + egressTables map[uint8]struct{} + + ovsMetersAreSupported bool + enableDenyTracking bool + enableAntreaPolicy bool + // deterministic represents whether to generate flows deterministically. + // For example, if a flow has multiple actions, setting it to true can get consistent flow. + // Enabling it may carry a performance impact. It's disabled by default and should only be used in testing. + deterministic bool + + category cookie.Category +} + +func (f *featureNetworkPolicy) getFeatureName() string { + return "NetworkPolicy" +} + +func newFeatureNetworkPolicy( + cookieAllocator cookie.Allocator, + ipProtocols []binding.Protocol, + bridge binding.Bridge, + ovsMetersAreSupported, + enableDenyTracking, + enableAntreaPolicy bool) *featureNetworkPolicy { + return &featureNetworkPolicy{ + cookieAllocator: cookieAllocator, + ipProtocols: ipProtocols, + bridge: bridge, + globalConjMatchFlowCache: make(map[string]*conjMatchFlowContext), + policyCache: cache.NewIndexer(policyConjKeyFunc, cache.Indexers{priorityIndex: priorityIndexFunc}), + ovsMetersAreSupported: ovsMetersAreSupported, + enableDenyTracking: enableDenyTracking, + enableAntreaPolicy: enableAntreaPolicy, + category: cookie.NetworkPolicy, + } +} + +func (f *featureNetworkPolicy) initFlows() []binding.Flow { + f.egressTables = map[uint8]struct{}{EgressRuleTable.GetID(): {}, EgressDefaultTable.GetID(): {}} + if f.enableAntreaPolicy { + f.egressTables[AntreaPolicyEgressRuleTable.GetID()] = struct{}{} + } + var flows []binding.Flow + flows = append(flows, f.establishedConnectionFlows()...) + flows = append(flows, f.relatedConnectionFlows()...) + flows = append(flows, f.rejectBypassNetworkpolicyFlows()...) + flows = append(flows, f.ingressClassifierFlows()...) + return flows +} diff --git a/pkg/agent/openflow/network_policy_test.go b/pkg/agent/openflow/network_policy_test.go index 1e9a5774bb9..3ac8a806da4 100644 --- a/pkg/agent/openflow/network_policy_test.go +++ b/pkg/agent/openflow/network_policy_test.go @@ -43,12 +43,12 @@ import ( ) var ( - c *client - cnpOutTable *mocks.MockTable - outTable *mocks.MockTable - outDropTable *mocks.MockTable - outAllowTable *mocks.MockTable - metricTable *mocks.MockTable + c *client + mockAntreaPolicyEgressRuleTable *mocks.MockTable + mockEgressRuleTable *mocks.MockTable + mockEgressDefaultTable *mocks.MockTable + mockL3ForwardingTable *mocks.MockTable + mockEgressMetricTable *mocks.MockTable ruleFlowBuilder *mocks.MockFlowBuilder ruleFlow *mocks.MockFlow @@ -69,6 +69,11 @@ var ( protocolTCP = v1beta2.ProtocolTCP priority100 = uint16(100) priority200 = uint16(200) + + mockFeaturePodConnectivity = featurePodConnectivity{} + mockFeatureNetworkPolicy = featureNetworkPolicy{enableAntreaPolicy: true} + activeFeatures = []feature{&mockFeaturePodConnectivity, &mockFeatureNetworkPolicy} + pipelineMap = map[binding.PipelineID]binding.Pipeline{} ) type expectConjunctionTimes struct { @@ -81,54 +86,55 @@ type expectConjunctionTimes struct { func TestPolicyRuleConjunction(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + preparePipelines() + c = prepareClient(ctrl, false) - c = prepareClient(ctrl) ruleID1 := uint32(1001) conj1 := &policyRuleConjunction{ id: ruleID1, } clauseID := uint8(1) nClause := uint8(3) - clause1 := conj1.newClause(clauseID, nClause, outTable, outDropTable) + clause1 := conj1.newClause(clauseID, nClause, mockEgressRuleTable, mockEgressDefaultTable) - outDropTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockDropFlowBuilder(ctrl)).AnyTimes() - outTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockRuleFlowBuilder(ctrl)).AnyTimes() - metricTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockMetricFlowBuilder(ctrl)).AnyTimes() + mockEgressDefaultTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockDropFlowBuilder(ctrl)).AnyTimes() + mockEgressRuleTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockRuleFlowBuilder(ctrl)).AnyTimes() + mockEgressMetricTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockMetricFlowBuilder(ctrl)).AnyTimes() var addedAddrs = parseAddresses([]string{"192.168.1.3", "192.168.1.30", "192.168.2.0/24", "103", "104"}) expectConjunctionsCount([]*expectConjunctionTimes{{5, ruleID1, clauseID, nClause}}) - flowChanges1 := clause1.addAddrFlows(c, types.SrcAddress, addedAddrs, nil) - err := c.applyConjunctiveMatchFlows(flowChanges1) + flowChanges1 := clause1.addAddrFlows(c.featureNetworkPolicy, types.SrcAddress, addedAddrs, nil) + err := c.featureNetworkPolicy.applyConjunctiveMatchFlows(flowChanges1) require.Nil(t, err, "Failed to invoke addAddrFlows") checkFlowCount(t, len(addedAddrs)) for _, addr := range addedAddrs { checkConjMatchFlowActions(t, c, clause1, addr, types.SrcAddress, 1, 0) } - var currentFlowCount = len(c.globalConjMatchFlowCache) + var currentFlowCount = len(c.featureNetworkPolicy.globalConjMatchFlowCache) var deletedAddrs = parseAddresses([]string{"192.168.1.3", "103"}) flowChanges2 := clause1.deleteAddrFlows(types.SrcAddress, deletedAddrs, nil) - err = c.applyConjunctiveMatchFlows(flowChanges2) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(flowChanges2) require.Nil(t, err, "Failed to invoke deleteAddrFlows") checkFlowCount(t, currentFlowCount-len(deletedAddrs)) - currentFlowCount = len(c.globalConjMatchFlowCache) + currentFlowCount = len(c.featureNetworkPolicy.globalConjMatchFlowCache) ruleID2 := uint32(1002) conj2 := &policyRuleConjunction{ id: ruleID2, } clauseID2 := uint8(2) - clause2 := conj2.newClause(clauseID2, nClause, outTable, outDropTable) + clause2 := conj2.newClause(clauseID2, nClause, mockEgressRuleTable, mockEgressDefaultTable) var addedAddrs2 = parseAddresses([]string{"192.168.1.30", "192.168.1.50"}) expectConjunctionsCount([]*expectConjunctionTimes{{2, ruleID2, clauseID2, nClause}}) expectConjunctionsCount([]*expectConjunctionTimes{{1, ruleID1, clauseID, nClause}}) - flowChanges3 := clause2.addAddrFlows(c, types.SrcAddress, addedAddrs2, nil) - err = c.applyConjunctiveMatchFlows(flowChanges3) + flowChanges3 := clause2.addAddrFlows(c.featureNetworkPolicy, types.SrcAddress, addedAddrs2, nil) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(flowChanges3) require.Nil(t, err, "Failed to invoke addAddrFlows") testAddr := NewIPAddress(net.ParseIP("192.168.1.30")) checkConjMatchFlowActions(t, c, clause2, testAddr, types.SrcAddress, 2, 0) checkFlowCount(t, currentFlowCount+1) - currentFlowCount = len(c.globalConjMatchFlowCache) + currentFlowCount = len(c.featureNetworkPolicy.globalConjMatchFlowCache) ruleID3 := uint32(1003) conj3 := &policyRuleConjunction{ @@ -136,15 +142,15 @@ func TestPolicyRuleConjunction(t *testing.T) { } clauseID3 := uint8(1) nClause3 := uint8(1) - clause3 := conj3.newClause(clauseID3, nClause3, outTable, outDropTable) + clause3 := conj3.newClause(clauseID3, nClause3, mockEgressRuleTable, mockEgressDefaultTable) var addedAddrs3 = parseAddresses([]string{"192.168.1.30"}) - flowChanges4 := clause3.addAddrFlows(c, types.SrcAddress, addedAddrs3, nil) - err = c.applyConjunctiveMatchFlows(flowChanges4) + flowChanges4 := clause3.addAddrFlows(c.featureNetworkPolicy, types.SrcAddress, addedAddrs3, nil) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(flowChanges4) require.Nil(t, err, "Failed to invoke addAddrFlows") checkConjMatchFlowActions(t, c, clause3, testAddr, types.SrcAddress, 2, 1) checkFlowCount(t, currentFlowCount) flowChanges5 := clause3.deleteAddrFlows(types.SrcAddress, addedAddrs3, nil) - err = c.applyConjunctiveMatchFlows(flowChanges5) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(flowChanges5) require.Nil(t, err, "Failed to invoke deleteAddrFlows") checkConjMatchFlowActions(t, c, clause3, testAddr, types.SrcAddress, 2, 0) checkFlowCount(t, currentFlowCount) @@ -154,10 +160,11 @@ func TestInstallPolicyRuleFlows(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() - c = prepareClient(ctrl) + preparePipelines() + c = prepareClient(ctrl, false) c.nodeConfig = &config.NodeConfig{PodIPv4CIDR: podIPv4CIDR, PodIPv6CIDR: nil} - c.networkConfig = &config.NetworkConfig{IPv4Enabled: true} - c.ipProtocols = []binding.Protocol{binding.ProtocolIP} + c.networkConfig = &config.NetworkConfig{} + c.pipelines = pipelineMap defaultAction := crdv1alpha1.RuleActionAllow // Create a policyRuleConjunction for the dns response interception flows // to ensure nil NetworkPolicyReference is handled correctly by GetNetworkPolicyFlowKeys. @@ -171,7 +178,7 @@ func TestInstallPolicyRuleFlows(t *testing.T) { Action: &defaultAction, Priority: nil, FlowID: ruleID1, - TableID: EgressRuleTable.GetID(), + TableID: EgressRuleTable.ofTable.GetID(), PolicyRef: &v1beta2.NetworkPolicyReference{ Type: v1beta2.K8sNetworkPolicy, Namespace: "ns1", @@ -180,21 +187,21 @@ func TestInstallPolicyRuleFlows(t *testing.T) { }, } - outDropTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockDropFlowBuilder(ctrl)).AnyTimes() - outTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockRuleFlowBuilder(ctrl)).AnyTimes() - metricTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockMetricFlowBuilder(ctrl)).AnyTimes() + mockEgressDefaultTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockDropFlowBuilder(ctrl)).AnyTimes() + mockEgressRuleTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockRuleFlowBuilder(ctrl)).AnyTimes() + mockEgressMetricTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockMetricFlowBuilder(ctrl)).AnyTimes() conj := &policyRuleConjunction{id: ruleID1} - conj.calculateClauses(rule1, c) + conj.calculateClauses(rule1) require.Nil(t, conj.toClause) require.Nil(t, conj.serviceClause) - ctxChanges := conj.calculateChangesForRuleCreation(c, rule1) + ctxChanges := conj.calculateChangesForRuleCreation(c.featureNetworkPolicy, rule1) assert.Equal(t, len(rule1.From), len(ctxChanges)) matchFlows, dropFlows := getChangedFlows(ctxChanges) assert.Equal(t, len(rule1.From), getChangedFlowCount(dropFlows)) assert.Equal(t, 0, getChangedFlowCount(matchFlows)) assert.Equal(t, 2, getDenyAllRuleOPCount(matchFlows, insertion)) - err := c.applyConjunctiveMatchFlows(ctxChanges) + err := c.featureNetworkPolicy.applyConjunctiveMatchFlows(ctxChanges) require.Nil(t, err) ruleID2 := uint32(102) @@ -204,7 +211,7 @@ func TestInstallPolicyRuleFlows(t *testing.T) { Action: &defaultAction, To: parseAddresses([]string{"0.0.0.0/0"}), FlowID: ruleID2, - TableID: EgressRuleTable.GetID(), + TableID: EgressRuleTable.ofTable.GetID(), PolicyRef: &v1beta2.NetworkPolicyReference{ Type: v1beta2.K8sNetworkPolicy, Namespace: "ns1", @@ -213,19 +220,19 @@ func TestInstallPolicyRuleFlows(t *testing.T) { }, } conj2 := &policyRuleConjunction{id: ruleID2} - conj2.calculateClauses(rule2, c) + conj2.calculateClauses(rule2) require.NotNil(t, conj2.toClause) require.Nil(t, conj2.serviceClause) ruleFlowBuilder.EXPECT().MatchConjID(ruleID2).MaxTimes(1) ruleFlowBuilder.EXPECT().MatchPriority(priorityLow).MaxTimes(1) expectConjunctionsCount([]*expectConjunctionTimes{{1, ruleID2, 2, 2}}) expectConjunctionsCount([]*expectConjunctionTimes{{2, ruleID2, 1, 2}}) - ctxChanges2 := conj2.calculateChangesForRuleCreation(c, rule2) + ctxChanges2 := conj2.calculateChangesForRuleCreation(c.featureNetworkPolicy, rule2) matchFlows2, dropFlows2 := getChangedFlows(ctxChanges2) assert.Equal(t, 1, getChangedFlowCount(dropFlows2)) assert.Equal(t, 3, getChangedFlowCount(matchFlows2)) assert.Equal(t, 3, getChangedFlowOPCount(matchFlows2, insertion)) - err = c.applyConjunctiveMatchFlows(ctxChanges2) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(ctxChanges2) require.Nil(t, err) assert.Equal(t, 0, len(c.GetNetworkPolicyFlowKeys("np1", "ns1"))) @@ -248,7 +255,7 @@ func TestInstallPolicyRuleFlows(t *testing.T) { Action: &defaultAction, Service: []v1beta2.Service{npPort1, npPort2}, FlowID: ruleID3, - TableID: EgressRuleTable.GetID(), + TableID: EgressRuleTable.ofTable.GetID(), PolicyRef: &v1beta2.NetworkPolicyReference{ Type: v1beta2.K8sNetworkPolicy, Namespace: "ns1", @@ -257,20 +264,20 @@ func TestInstallPolicyRuleFlows(t *testing.T) { }, } conj3 := &policyRuleConjunction{id: ruleID3} - conj3.calculateClauses(rule3, c) + conj3.calculateClauses(rule3) ruleFlowBuilder.EXPECT().MatchConjID(ruleID3).MaxTimes(3) ruleFlowBuilder.EXPECT().MatchPriority(priorityLow).MaxTimes(3) expectConjunctionsCount([]*expectConjunctionTimes{{1, ruleID2, 1, 2}}) expectConjunctionsCount([]*expectConjunctionTimes{{1, ruleID3, 2, 3}}) expectConjunctionsCount([]*expectConjunctionTimes{{2, ruleID3, 1, 3}}) expectConjunctionsCount([]*expectConjunctionTimes{{2, ruleID3, 3, 3}}) - ctxChanges3 := conj3.calculateChangesForRuleCreation(c, rule3) + ctxChanges3 := conj3.calculateChangesForRuleCreation(c.featureNetworkPolicy, rule3) matchFlows3, dropFlows3 := getChangedFlows(ctxChanges3) assert.Equal(t, 1, getChangedFlowOPCount(dropFlows3, insertion)) assert.Equal(t, 5, getChangedFlowCount(matchFlows3)) assert.Equal(t, 4, getChangedFlowOPCount(matchFlows3, insertion)) assert.Equal(t, 1, getChangedFlowOPCount(matchFlows3, modification)) - err = c.applyConjunctiveMatchFlows(ctxChanges3) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(ctxChanges3) require.Nil(t, err) err = c.InstallPolicyRuleFlows(rule3) @@ -282,7 +289,7 @@ func TestInstallPolicyRuleFlows(t *testing.T) { matchFlows4, dropFlows4 := getChangedFlows(ctxChanges4) assert.Equal(t, 1, getChangedFlowOPCount(dropFlows4, deletion)) assert.Equal(t, 2, getDenyAllRuleOPCount(matchFlows4, deletion)) - err = c.applyConjunctiveMatchFlows(ctxChanges4) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(ctxChanges4) require.Nil(t, err) expectConjunctionsCount([]*expectConjunctionTimes{{1, ruleID3, 1, 3}}) @@ -292,12 +299,13 @@ func TestInstallPolicyRuleFlows(t *testing.T) { assert.Equal(t, 3, getChangedFlowCount(matchFlows5)) assert.Equal(t, 2, getChangedFlowOPCount(matchFlows5, deletion)) assert.Equal(t, 1, getChangedFlowOPCount(matchFlows5, modification)) - err = c.applyConjunctiveMatchFlows(ctxChanges5) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(ctxChanges5) assert.Equal(t, 11, len(c.GetNetworkPolicyFlowKeys("np1", "ns1"))) require.Nil(t, err) } func TestBatchInstallPolicyRuleFlows(t *testing.T) { + preparePipelines() tests := []struct { name string rules []*types.PolicyRule @@ -337,55 +345,54 @@ func TestBatchInstallPolicyRuleFlows(t *testing.T) { }, }, expectedFlowsFn: func(c *client) []binding.Flow { - cookiePolicy := c.cookieAllocator.Request(cookie.Policy).Raw() - cookieDefault := c.cookieAllocator.Request(cookie.Default).Raw() + cookiePolicy := c.cookieAllocator.Request(cookie.NetworkPolicy).Raw() return []binding.Flow{ - EgressRuleTable.BuildFlow(priorityLow).Cookie(cookiePolicy). + EgressRuleTable.ofTable.BuildFlow(priorityLow).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchConjID(10). Action().LoadToRegField(TFEgressConjIDField, 10). Action().CT(true, EgressMetricTable.GetID(), CtZone).LoadToLabelField(10, EgressRuleCTLabel).CTDone().Done(), - EgressRuleTable.BuildFlow(priorityLow).Cookie(cookiePolicy). + EgressRuleTable.ofTable.BuildFlow(priorityLow).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchConjID(11). Action().LoadToRegField(TFEgressConjIDField, 11). Action().CT(true, EgressMetricTable.GetID(), CtZone).LoadToLabelField(11, EgressRuleCTLabel).CTDone().Done(), - EgressRuleTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + EgressRuleTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchSrcIP(net.ParseIP("192.168.1.40")). Action().Conjunction(10, 1, 2). Action().Conjunction(11, 1, 3).Done(), - EgressRuleTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + EgressRuleTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchSrcIP(net.ParseIP("192.168.1.50")). Action().Conjunction(10, 1, 2).Done(), - EgressRuleTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + EgressRuleTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchSrcIP(net.ParseIP("192.168.1.51")). Action().Conjunction(11, 1, 3).Done(), - EgressRuleTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + EgressRuleTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchDstIPNet(*ip.MustParseCIDR("0.0.0.0/0")). Action().Conjunction(10, 2, 2). Action().Conjunction(11, 2, 3).Done(), - EgressRuleTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + EgressRuleTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolTCP).MatchDstPort(8080, nil). Action().Conjunction(11, 3, 3).Done(), - EgressDefaultTable.BuildFlow(priorityNormal).Cookie(cookieDefault). + EgressDefaultTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchSrcIP(net.ParseIP("192.168.1.40")). Action().Drop().Done(), - EgressDefaultTable.BuildFlow(priorityNormal).Cookie(cookieDefault). + EgressDefaultTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchSrcIP(net.ParseIP("192.168.1.50")). Action().Drop().Done(), - EgressDefaultTable.BuildFlow(priorityNormal).Cookie(cookieDefault). + EgressDefaultTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchSrcIP(net.ParseIP("192.168.1.51")). Action().Drop().Done(), - EgressMetricTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + EgressMetricTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchCTStateNew(true).MatchCTLabelField(0, uint64(10)<<32, EgressRuleCTLabel). - Action().GotoTable(L3ForwardingTable.GetID()).Done(), - EgressMetricTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + Action().NextTable().Done(), + EgressMetricTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchCTStateNew(false).MatchCTLabelField(0, uint64(10)<<32, EgressRuleCTLabel). - Action().GotoTable(L3ForwardingTable.GetID()).Done(), - EgressMetricTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + Action().NextTable().Done(), + EgressMetricTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchCTStateNew(true).MatchCTLabelField(0, uint64(11)<<32, EgressRuleCTLabel). - Action().GotoTable(L3ForwardingTable.GetID()).Done(), - EgressMetricTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + Action().NextTable().Done(), + EgressMetricTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchCTStateNew(false).MatchCTLabelField(0, uint64(11)<<32, EgressRuleCTLabel). - Action().GotoTable(L3ForwardingTable.GetID()).Done(), + Action().NextTable().Done(), } }, }, @@ -443,64 +450,64 @@ func TestBatchInstallPolicyRuleFlows(t *testing.T) { }, }, expectedFlowsFn: func(c *client) []binding.Flow { - cookiePolicy := c.cookieAllocator.Request(cookie.Policy).Raw() + cookiePolicy := c.cookieAllocator.Request(cookie.NetworkPolicy).Raw() return []binding.Flow{ - AntreaPolicyIngressRuleTable.BuildFlow(priority100).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority100).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchConjID(10). Action().LoadToRegField(TFIngressConjIDField, 10). Action().CT(true, IngressMetricTable.GetID(), CtZone).LoadToLabelField(10, IngressRuleCTLabel).CTDone().Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority100).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority100).Cookie(cookiePolicy). MatchConjID(11). Action().LoadToRegField(CNPDenyConjIDField, 11). Action().LoadRegMark(CnpDenyRegMark). Action().GotoTable(IngressMetricTable.GetID()).Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority200).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority200).Cookie(cookiePolicy). MatchConjID(12). Action().LoadToRegField(CNPDenyConjIDField, 12). Action().LoadRegMark(CnpDenyRegMark). Action().GotoTable(IngressMetricTable.GetID()).Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority100).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority100).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchSrcIP(net.ParseIP("192.168.1.40")). Action().Conjunction(10, 1, 2). Action().Conjunction(11, 1, 3).Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority200).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority200).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchSrcIP(net.ParseIP("192.168.1.40")). Action().Conjunction(12, 1, 3).Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority100).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority100).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchSrcIP(net.ParseIP("192.168.1.50")). Action().Conjunction(10, 1, 2).Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority100).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority100).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchSrcIP(net.ParseIP("192.168.1.51")). Action().Conjunction(11, 1, 3).Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority100).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority100).Cookie(cookiePolicy). MatchRegFieldWithValue(TargetOFPortField, uint32(1)). Action().Conjunction(10, 2, 2). Action().Conjunction(11, 2, 3).Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority200).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority200).Cookie(cookiePolicy). MatchRegFieldWithValue(TargetOFPortField, uint32(1)). Action().Conjunction(12, 2, 3).Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority100).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority100).Cookie(cookiePolicy). MatchRegFieldWithValue(TargetOFPortField, uint32(2)). Action().Conjunction(10, 2, 2).Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority100).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority100).Cookie(cookiePolicy). MatchRegFieldWithValue(TargetOFPortField, uint32(3)). Action().Conjunction(11, 2, 3).Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority100).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority100).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolTCP).MatchDstPort(8080, nil). Action().Conjunction(11, 3, 3).Done(), - AntreaPolicyIngressRuleTable.BuildFlow(priority200).Cookie(cookiePolicy). + AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priority200).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolTCP).MatchDstPort(8080, nil). Action().Conjunction(12, 3, 3).Done(), - IngressMetricTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + IngressMetricTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchCTStateNew(true).MatchCTLabelField(0, 10, IngressRuleCTLabel). - Action().GotoTable(ConntrackCommitTable.GetID()).Done(), - IngressMetricTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + Action().NextTable().Done(), + IngressMetricTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchProtocol(binding.ProtocolIP).MatchCTStateNew(false).MatchCTLabelField(0, 10, IngressRuleCTLabel). - Action().GotoTable(ConntrackCommitTable.GetID()).Done(), - IngressMetricTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + Action().NextTable().Done(), + IngressMetricTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchRegMark(CnpDenyRegMark).MatchRegFieldWithValue(CNPDenyConjIDField, 11). Action().Drop().Done(), - IngressMetricTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). + IngressMetricTable.ofTable.BuildFlow(priorityNormal).Cookie(cookiePolicy). MatchRegMark(CnpDenyRegMark).MatchRegFieldWithValue(CNPDenyConjIDField, 12). Action().Drop().Done(), } @@ -519,7 +526,22 @@ func TestBatchInstallPolicyRuleFlows(t *testing.T) { c.nodeConfig = &config.NodeConfig{PodIPv4CIDR: podIPv4CIDR, PodIPv6CIDR: nil} c.networkConfig = &config.NetworkConfig{IPv4Enabled: true} c.ipProtocols = []binding.Protocol{binding.ProtocolIP} - c.deterministic = true + mockFeaturePodConnectivity.cookieAllocator = c.cookieAllocator + mockFeaturePodConnectivity.ipProtocols = c.ipProtocols + mockFeatureNetworkPolicy.cookieAllocator = c.cookieAllocator + mockFeatureNetworkPolicy.ipProtocols = c.ipProtocols + mockFeatureNetworkPolicy.bridge = c.bridge + c.featurePodConnectivity = &mockFeaturePodConnectivity + c.featureNetworkPolicy = &mockFeatureNetworkPolicy + c.featureNetworkPolicy.deterministic = true + c.featureNetworkPolicy.policyCache = cache.NewIndexer(policyConjKeyFunc, cache.Indexers{priorityIndex: priorityIndexFunc}) + c.featureNetworkPolicy.globalConjMatchFlowCache = map[string]*conjMatchFlowContext{} + + bridge := mocks.NewMockBridge(ctrl) + bridge.EXPECT().CreateTable(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes() + c.bridge = bridge + c.pipelines = pipelineMap + c.realizePipelines() expectedFlows := tt.expectedFlowsFn(c) // For better readability when debugging failure. @@ -551,6 +573,16 @@ func dumpFlows(flows []binding.Flow) string { } func BenchmarkBatchInstallPolicyRuleFlows(b *testing.B) { + ctrl := gomock.NewController(b) + defer ctrl.Finish() + + preparePipelines() + c = prepareClient(ctrl, false) + // Make it return error so no change gets committed to cache. + mockOperations := oftest.NewMockOFEntryOperations(ctrl) + mockOperations.EXPECT().AddAll(gomock.Any()).Return(errors.New("fake error")).AnyTimes() + c.ofEntryOperations = mockOperations + var commonIPs []types.Address for i := 0; i < 250; i++ { commonIPs = append(commonIPs, NewIPAddress(net.ParseIP(fmt.Sprintf("192.168.0.%d", i)))) @@ -577,17 +609,6 @@ func BenchmarkBatchInstallPolicyRuleFlows(b *testing.B) { }, }) } - ctrl := gomock.NewController(b) - defer ctrl.Finish() - mockOperations := oftest.NewMockOFEntryOperations(ctrl) - ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, false, true, false, false, false, false, false) - c = ofClient.(*client) - c.cookieAllocator = cookie.NewAllocator(0) - c.ofEntryOperations = mockOperations - c.nodeConfig = &config.NodeConfig{PodIPv4CIDR: podIPv4CIDR, PodIPv6CIDR: nil} - c.ipProtocols = []binding.Protocol{binding.ProtocolIP} - // Make it return error so no change gets committed to cache. - mockOperations.EXPECT().AddAll(gomock.Any()).Return(errors.New("fake error")).AnyTimes() b.ReportAllocs() b.ResetTimer() @@ -600,9 +621,10 @@ func TestConjMatchFlowContextKeyConflict(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() - c = prepareClient(ctrl) - outDropTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockDropFlowBuilder(ctrl)).AnyTimes() - outTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockRuleFlowBuilder(ctrl)).AnyTimes() + preparePipelines() + c = prepareClient(ctrl, false) + mockEgressDefaultTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockDropFlowBuilder(ctrl)).AnyTimes() + mockEgressRuleTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockRuleFlowBuilder(ctrl)).AnyTimes() ruleAction.EXPECT().Conjunction(gomock.Any(), gomock.Any(), gomock.Any()).Return(ruleFlowBuilder).MaxTimes(3) ip, ipNet, _ := net.ParseCIDR("192.168.2.30/32") @@ -611,22 +633,22 @@ func TestConjMatchFlowContextKeyConflict(t *testing.T) { conj1 := &policyRuleConjunction{ id: ruleID1, } - clause1 := conj1.newClause(1, 3, outTable, outDropTable) - flowChange1 := clause1.addAddrFlows(c, types.DstAddress, parseAddresses([]string{ip.String()}), nil) - err := c.applyConjunctiveMatchFlows(flowChange1) + clause1 := conj1.newClause(1, 3, mockEgressRuleTable, mockEgressDefaultTable) + flowChange1 := clause1.addAddrFlows(c.featureNetworkPolicy, types.DstAddress, parseAddresses([]string{ip.String()}), nil) + err := c.featureNetworkPolicy.applyConjunctiveMatchFlows(flowChange1) require.Nil(t, err, "no error expect in applyConjunctiveMatchFlows") ruleID2 := uint32(12) conj2 := &policyRuleConjunction{ id: ruleID2, } - clause2 := conj2.newClause(1, 3, outTable, outDropTable) - flowChange2 := clause2.addAddrFlows(c, types.DstAddress, parseAddresses([]string{ipNet.String()}), nil) - err = c.applyConjunctiveMatchFlows(flowChange2) + clause2 := conj2.newClause(1, 3, mockEgressRuleTable, mockEgressDefaultTable) + flowChange2 := clause2.addAddrFlows(c.featureNetworkPolicy, types.DstAddress, parseAddresses([]string{ipNet.String()}), nil) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(flowChange2) require.Nil(t, err, "no error expect in applyConjunctiveMatchFlows") - expectedMatchKey := fmt.Sprintf("table:%d,priority:%s,type:%v,value:%s", EgressRuleTable.GetID(), strconv.Itoa(int(priorityNormal)), MatchDstIPNet, ipNet.String()) - ctx, found := c.globalConjMatchFlowCache[expectedMatchKey] + expectedMatchKey := fmt.Sprintf("table:%d,priority:%s,type:%v,value:%s", EgressRuleTable.ofTable.GetID(), strconv.Itoa(int(priorityNormal)), MatchDstIPNet, ipNet.String()) + ctx, found := c.featureNetworkPolicy.globalConjMatchFlowCache[expectedMatchKey] assert.True(t, found) assert.Equal(t, 2, len(ctx.actions)) act1, found := ctx.actions[ruleID1] @@ -641,7 +663,8 @@ func TestInstallPolicyRuleFlowsInDualStackCluster(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() - c = prepareClient(ctrl) + preparePipelines() + c = prepareClient(ctrl, true) c.nodeConfig = &config.NodeConfig{PodIPv4CIDR: podIPv4CIDR, PodIPv6CIDR: podIPv6CIDR} c.networkConfig = &config.NetworkConfig{IPv4Enabled: true, IPv6Enabled: true} c.ipProtocols = []binding.Protocol{binding.ProtocolIP, binding.ProtocolIPv6} @@ -653,7 +676,7 @@ func TestInstallPolicyRuleFlowsInDualStackCluster(t *testing.T) { Action: &defaultAction, Priority: nil, FlowID: ruleID1, - TableID: EgressRuleTable.GetID(), + TableID: EgressRuleTable.ofTable.GetID(), PolicyRef: &v1beta2.NetworkPolicyReference{ Type: v1beta2.K8sNetworkPolicy, Namespace: "ns1", @@ -662,21 +685,21 @@ func TestInstallPolicyRuleFlowsInDualStackCluster(t *testing.T) { }, } - outDropTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockDropFlowBuilder(ctrl)).AnyTimes() - outTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockRuleFlowBuilder(ctrl)).AnyTimes() - metricTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockMetricFlowBuilder(ctrl)).AnyTimes() + mockEgressDefaultTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockDropFlowBuilder(ctrl)).AnyTimes() + mockEgressRuleTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockRuleFlowBuilder(ctrl)).AnyTimes() + mockEgressMetricTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockMetricFlowBuilder(ctrl)).AnyTimes() conj := &policyRuleConjunction{id: ruleID1} - conj.calculateClauses(rule1, c) + conj.calculateClauses(rule1) require.Nil(t, conj.toClause) require.Nil(t, conj.serviceClause) - ctxChanges := conj.calculateChangesForRuleCreation(c, rule1) + ctxChanges := conj.calculateChangesForRuleCreation(c.featureNetworkPolicy, rule1) assert.Equal(t, len(rule1.From), len(ctxChanges)) matchFlows, dropFlows := getChangedFlows(ctxChanges) assert.Equal(t, len(rule1.From), getChangedFlowCount(dropFlows)) assert.Equal(t, 0, getChangedFlowCount(matchFlows)) assert.Equal(t, len(rule1.From), getDenyAllRuleOPCount(matchFlows, insertion)) - err := c.applyConjunctiveMatchFlows(ctxChanges) + err := c.featureNetworkPolicy.applyConjunctiveMatchFlows(ctxChanges) require.Nil(t, err) ruleID2 := uint32(102) @@ -686,7 +709,7 @@ func TestInstallPolicyRuleFlowsInDualStackCluster(t *testing.T) { Action: &defaultAction, To: parseAddresses([]string{"0.0.0.0/0"}), FlowID: ruleID2, - TableID: EgressRuleTable.GetID(), + TableID: EgressRuleTable.ofTable.GetID(), PolicyRef: &v1beta2.NetworkPolicyReference{ Type: v1beta2.K8sNetworkPolicy, Namespace: "ns1", @@ -695,19 +718,19 @@ func TestInstallPolicyRuleFlowsInDualStackCluster(t *testing.T) { }, } conj2 := &policyRuleConjunction{id: ruleID2} - conj2.calculateClauses(rule2, c) + conj2.calculateClauses(rule2) require.NotNil(t, conj2.toClause) require.Nil(t, conj2.serviceClause) ruleFlowBuilder.EXPECT().MatchConjID(ruleID2).MaxTimes(1) ruleFlowBuilder.EXPECT().MatchPriority(priorityLow).MaxTimes(1) expectConjunctionsCount([]*expectConjunctionTimes{{len(rule2.To), ruleID2, 2, 2}}) expectConjunctionsCount([]*expectConjunctionTimes{{len(rule2.From), ruleID2, 1, 2}}) - ctxChanges2 := conj2.calculateChangesForRuleCreation(c, rule2) + ctxChanges2 := conj2.calculateChangesForRuleCreation(c.featureNetworkPolicy, rule2) matchFlows2, dropFlows2 := getChangedFlows(ctxChanges2) assert.Equal(t, 2, getChangedFlowCount(dropFlows2)) assert.Equal(t, 4, getChangedFlowCount(matchFlows2)) assert.Equal(t, 4, getChangedFlowOPCount(matchFlows2, insertion)) - err = c.applyConjunctiveMatchFlows(ctxChanges2) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(ctxChanges2) require.Nil(t, err) assert.Equal(t, 0, len(c.GetNetworkPolicyFlowKeys("np1", "ns1"))) @@ -729,7 +752,7 @@ func TestInstallPolicyRuleFlowsInDualStackCluster(t *testing.T) { Action: &defaultAction, Service: []v1beta2.Service{npPort1, npPort2}, FlowID: ruleID3, - TableID: EgressRuleTable.GetID(), + TableID: EgressRuleTable.ofTable.GetID(), PolicyRef: &v1beta2.NetworkPolicyReference{ Type: v1beta2.K8sNetworkPolicy, Namespace: "ns1", @@ -738,20 +761,20 @@ func TestInstallPolicyRuleFlowsInDualStackCluster(t *testing.T) { }, } conj3 := &policyRuleConjunction{id: ruleID3} - conj3.calculateClauses(rule3, c) + conj3.calculateClauses(rule3) ruleFlowBuilder.EXPECT().MatchConjID(ruleID3).MaxTimes(3) ruleFlowBuilder.EXPECT().MatchPriority(priorityLow).MaxTimes(3) expectConjunctionsCount([]*expectConjunctionTimes{{1, ruleID2, 1, 2}}) expectConjunctionsCount([]*expectConjunctionTimes{{1, ruleID3, 2, 3}}) expectConjunctionsCount([]*expectConjunctionTimes{{2, ruleID3, 1, 3}}) expectConjunctionsCount([]*expectConjunctionTimes{{4, ruleID3, 3, 3}}) - ctxChanges3 := conj3.calculateChangesForRuleCreation(c, rule3) + ctxChanges3 := conj3.calculateChangesForRuleCreation(c.featureNetworkPolicy, rule3) matchFlows3, dropFlows3 := getChangedFlows(ctxChanges3) assert.Equal(t, 1, getChangedFlowOPCount(dropFlows3, insertion)) assert.Equal(t, 7, getChangedFlowCount(matchFlows3)) assert.Equal(t, 6, getChangedFlowOPCount(matchFlows3, insertion)) assert.Equal(t, 1, getChangedFlowOPCount(matchFlows3, modification)) - err = c.applyConjunctiveMatchFlows(ctxChanges3) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(ctxChanges3) require.Nil(t, err) err = c.InstallPolicyRuleFlows(rule3) @@ -763,7 +786,7 @@ func TestInstallPolicyRuleFlowsInDualStackCluster(t *testing.T) { matchFlows4, dropFlows4 := getChangedFlows(ctxChanges4) assert.Equal(t, 2, getChangedFlowOPCount(dropFlows4, deletion)) assert.Equal(t, 3, getDenyAllRuleOPCount(matchFlows4, deletion)) - err = c.applyConjunctiveMatchFlows(ctxChanges4) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(ctxChanges4) require.Nil(t, err) expectConjunctionsCount([]*expectConjunctionTimes{{1, ruleID3, 1, 3}}) @@ -773,7 +796,7 @@ func TestInstallPolicyRuleFlowsInDualStackCluster(t *testing.T) { assert.Equal(t, 4, getChangedFlowCount(matchFlows5)) assert.Equal(t, 3, getChangedFlowOPCount(matchFlows5, deletion)) assert.Equal(t, 1, getChangedFlowOPCount(matchFlows5, modification)) - err = c.applyConjunctiveMatchFlows(ctxChanges5) + err = c.featureNetworkPolicy.applyConjunctiveMatchFlows(ctxChanges5) assert.Equal(t, 15, len(c.GetNetworkPolicyFlowKeys("np1", "ns1"))) require.Nil(t, err) } @@ -822,7 +845,7 @@ func getDenyAllRuleOPCount(flows []*flowChange, operType changeType) int { } func checkConjunctionConfig(t *testing.T, ruleID uint32, actionFlowCount, fromMatchCount, toMatchCount, serviceMatchCount int) { - conj := c.getPolicyRuleConjunction(ruleID) + conj := c.featureNetworkPolicy.getPolicyRuleConjunction(ruleID) require.NotNil(t, conj, "Failed to add policyRuleConjunction into client cache") assert.Equal(t, actionFlowCount, len(conj.actionFlows), fmt.Sprintf("Incorrect number of conjunction action flows, expect: %d, actual: %d", actionFlowCount, len(conj.actionFlows))) if fromMatchCount > 0 { @@ -837,13 +860,13 @@ func checkConjunctionConfig(t *testing.T, ruleID uint32, actionFlowCount, fromMa } func checkFlowCount(t *testing.T, expectCount int) { - actualCount := len(c.globalConjMatchFlowCache) + actualCount := len(c.featureNetworkPolicy.globalConjMatchFlowCache) assert.Equal(t, expectCount, actualCount, fmt.Sprintf("Incorrect count of conjunctive match flow context into global cache, expect: %d, actual: %d", expectCount, actualCount)) } func checkConjMatchFlowActions(t *testing.T, client *client, c *clause, address types.Address, addressType types.AddressType, actionCount int, anyDropRuleCount int) { addrMatch := generateAddressConjMatch(c.ruleTable.GetID(), address, addressType, nil) - context, found := client.globalConjMatchFlowCache[addrMatch.generateGlobalMapKey()] + context, found := client.featureNetworkPolicy.globalConjMatchFlowCache[addrMatch.generateGlobalMapKey()] require.True(t, found, "Failed to add conjunctive match flow to global cache") assert.Equal(t, actionCount, len(context.actions), fmt.Sprintf("Incorrect policyRuleConjunction action number, expect: %d, actual: %d", actionCount, len(context.actions))) assert.Equal(t, anyDropRuleCount, len(context.denyAllRules), fmt.Sprintf("Incorrect policyRuleConjunction anyDropRule number, expect: %d, actual: %d", anyDropRuleCount, len(context.denyAllRules))) @@ -911,7 +934,7 @@ func newMockMetricFlowBuilder(ctrl *gomock.Controller) *mocks.MockFlowBuilder { metricFlowBuilder.EXPECT().MatchCTStateNew(gomock.Any()).Return(metricFlowBuilder).AnyTimes() metricFlowBuilder.EXPECT().MatchCTLabelField(gomock.Any(), gomock.Any(), gomock.Any()).Return(metricFlowBuilder).AnyTimes() metricAction = mocks.NewMockAction(ctrl) - metricAction.EXPECT().GotoTable(gomock.Any()).Return(metricFlowBuilder).AnyTimes() + metricAction.EXPECT().NextTable().Return(metricFlowBuilder).AnyTimes() metricAction.EXPECT().LoadToRegField(gomock.Any(), gomock.Any()).Return(metricFlowBuilder).AnyTimes() metricAction.EXPECT().Drop().Return(metricFlowBuilder).AnyTimes() metricFlowBuilder.EXPECT().Action().Return(metricAction).AnyTimes() @@ -941,39 +964,69 @@ func parseAddresses(addrs []string) []types.Address { return addresses } -func createMockTable(ctrl *gomock.Controller, tableID uint8, nextTable uint8, missAction binding.MissActionType) *mocks.MockTable { - table := mocks.NewMockTable(ctrl) - table.EXPECT().GetID().Return(tableID).AnyTimes() - table.EXPECT().GetNext().Return(nextTable).AnyTimes() - table.EXPECT().GetMissAction().Return(missAction).AnyTimes() - table.EXPECT().GetName().Return("table").AnyTimes() - ofTableCache.Update(table) - return table +func preparePipelines() { + pipelineID := pipelineIP + requiredTablesMap := make(map[*Table]struct{}) + for _, f := range activeFeatures { + for _, t := range f.getRequiredTables() { + requiredTablesMap[t] = struct{}{} + } + } + + var requiredTables []*Table + for _, table := range tableOrderCache[pipelineID] { + if _, ok := requiredTablesMap[table]; ok { + requiredTables = append(requiredTables, table) + } + } + pipelineMap[pipelineID] = generatePipeline(pipelineID, requiredTables) + + mockFeatureNetworkPolicy.egressTables = map[uint8]struct{}{EgressRuleTable.GetID(): {}, EgressDefaultTable.GetID(): {}} + if mockFeatureNetworkPolicy.enableAntreaPolicy { + mockFeatureNetworkPolicy.egressTables[AntreaPolicyEgressRuleTable.GetID()] = struct{}{} + } + mockFeatureNetworkPolicy.category = cookie.NetworkPolicy + mockFeaturePodConnectivity.category = cookie.PodConnectivity } -func prepareClient(ctrl *gomock.Controller) *client { - policyCache := cache.NewIndexer( - policyConjKeyFunc, - cache.Indexers{priorityIndex: priorityIndexFunc}, - ) +func prepareClient(ctrl *gomock.Controller, dualStack bool) *client { bridge := mocks.NewMockBridge(ctrl) bridge.EXPECT().AddFlowsInBundle(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes() - cnpOutTable = createMockTable(ctrl, AntreaPolicyEgressRuleTable.GetID(), EgressRuleTable.GetID(), binding.TableMissActionNext) - outTable = createMockTable(ctrl, EgressRuleTable.GetID(), EgressDefaultTable.GetID(), binding.TableMissActionNext) - outDropTable = createMockTable(ctrl, EgressDefaultTable.GetID(), EgressMetricTable.GetID(), binding.TableMissActionNext) - metricTable = createMockTable(ctrl, EgressMetricTable.GetID(), L3ForwardingTable.GetID(), binding.TableMissActionNext) - outAllowTable = createMockTable(ctrl, L3ForwardingTable.GetID(), L2ForwardingCalcTable.GetID(), binding.TableMissActionNext) + ipProtocols := []binding.Protocol{binding.ProtocolIP} + if dualStack { + ipProtocols = append(ipProtocols, binding.ProtocolIPv6) + } c = &client{ - policyCache: policyCache, - globalConjMatchFlowCache: map[string]*conjMatchFlowContext{}, - bridge: bridge, - ovsDatapathType: ovsconfig.OVSDatapathNetdev, + bridge: bridge, + ovsDatapathType: ovsconfig.OVSDatapathNetdev, + ipProtocols: ipProtocols, } c.cookieAllocator = cookie.NewAllocator(0) m := oftest.NewMockOFEntryOperations(ctrl) m.EXPECT().AddAll(gomock.Any()).Return(nil).AnyTimes() m.EXPECT().DeleteAll(gomock.Any()).Return(nil).AnyTimes() c.ofEntryOperations = m + mockFeaturePodConnectivity.cookieAllocator = c.cookieAllocator + mockFeaturePodConnectivity.ipProtocols = c.ipProtocols + mockFeatureNetworkPolicy.cookieAllocator = c.cookieAllocator + mockFeatureNetworkPolicy.ipProtocols = c.ipProtocols + mockFeatureNetworkPolicy.bridge = c.bridge + c.featurePodConnectivity = &mockFeaturePodConnectivity + c.featureNetworkPolicy = &mockFeatureNetworkPolicy + c.featureNetworkPolicy.deterministic = true + c.featureNetworkPolicy.policyCache = cache.NewIndexer(policyConjKeyFunc, cache.Indexers{priorityIndex: priorityIndexFunc}) + c.featureNetworkPolicy.globalConjMatchFlowCache = map[string]*conjMatchFlowContext{} + c.pipelines = pipelineMap + + setMockOFTables(ctrl, + map[*Table]**mocks.MockTable{ + AntreaPolicyEgressRuleTable: &mockAntreaPolicyEgressRuleTable, + EgressRuleTable: &mockEgressRuleTable, + EgressDefaultTable: &mockEgressDefaultTable, + EgressMetricTable: &mockEgressMetricTable, + L3ForwardingTable: &mockL3ForwardingTable, + }, + ) return c } @@ -1100,12 +1153,13 @@ func TestNetworkPolicyMetrics(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() - c = prepareClient(ctrl) + preparePipelines() + c = prepareClient(ctrl, false) mockOVSClient := ovsctltest.NewMockOVSCtlClient(ctrl) c.ovsctlClient = mockOVSClient gomock.InOrder( - mockOVSClient.EXPECT().DumpTableFlows(EgressMetricTable.GetID()).Return(tt.egressFlows, nil), - mockOVSClient.EXPECT().DumpTableFlows(IngressMetricTable.GetID()).Return(tt.ingressFlows, nil), + mockOVSClient.EXPECT().DumpTableFlows(EgressMetricTable.ofTable.GetID()).Return(tt.egressFlows, nil), + mockOVSClient.EXPECT().DumpTableFlows(IngressMetricTable.ofTable.GetID()).Return(tt.ingressFlows, nil), ) got := c.NetworkPolicyMetrics() assert.Equal(t, tt.want, got) @@ -1116,17 +1170,18 @@ func TestNetworkPolicyMetrics(t *testing.T) { func TestGetMatchFlowUpdates(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() - - c = prepareClient(ctrl) + preparePipelines() + c = prepareClient(ctrl, false) c.nodeConfig = &config.NodeConfig{PodIPv4CIDR: podIPv4CIDR, PodIPv6CIDR: nil} c.networkConfig = &config.NetworkConfig{TrafficEncapMode: config.TrafficEncapModeEncap, IPv4Enabled: true} c.ipProtocols = []binding.Protocol{binding.ProtocolIP} - outDropTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockDropFlowBuilder(ctrl)).AnyTimes() - cnpOutTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockRuleFlowBuilder(ctrl)).AnyTimes() + mockEgressDefaultTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockDropFlowBuilder(ctrl)).AnyTimes() + mockAntreaPolicyEgressRuleTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockRuleFlowBuilder(ctrl)).AnyTimes() ruleFlowBuilder.EXPECT().MatchRegFieldWithValue(gomock.Any(), gomock.Any()).Return(ruleFlowBuilder).AnyTimes() ruleAction.EXPECT().LoadRegMark(gomock.Any()).Return(ruleFlowBuilder).AnyTimes() ruleAction.EXPECT().Conjunction(gomock.Any(), gomock.Any(), gomock.Any()).Return(ruleFlowBuilder).AnyTimes() - metricTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockMetricFlowBuilder(ctrl)).AnyTimes() + ruleAction.EXPECT().ResubmitToTables(gomock.Any()).Return(ruleFlowBuilder).AnyTimes() + mockEgressMetricTable.EXPECT().BuildFlow(gomock.Any()).Return(newMockMetricFlowBuilder(ctrl)).AnyTimes() metricFlowBuilder.EXPECT().MatchRegMark(gomock.Any()).Return(metricFlowBuilder).AnyTimes() metricFlowBuilder.EXPECT().MatchRegFieldWithValue(gomock.Any(), gomock.Any()).Return(metricFlowBuilder).AnyTimes() rules := []*types.PolicyRule{ @@ -1137,7 +1192,7 @@ func TestGetMatchFlowUpdates(t *testing.T) { Priority: &priority100, To: []types.Address{NewOFPortAddress(1), NewOFPortAddress(2)}, FlowID: uint32(10), - TableID: AntreaPolicyEgressRuleTable.GetID(), + TableID: AntreaPolicyEgressRuleTable.ofTable.GetID(), PolicyRef: &v1beta2.NetworkPolicyReference{ Type: v1beta2.AntreaNetworkPolicy, Namespace: "ns1", @@ -1154,7 +1209,7 @@ func TestGetMatchFlowUpdates(t *testing.T) { To: []types.Address{NewOFPortAddress(1), NewOFPortAddress(3)}, Service: []v1beta2.Service{{Protocol: &protocolTCP, Port: &port8080}}, FlowID: uint32(11), - TableID: AntreaPolicyEgressRuleTable.GetID(), + TableID: AntreaPolicyEgressRuleTable.ofTable.GetID(), PolicyRef: &v1beta2.NetworkPolicyReference{ Type: v1beta2.AntreaNetworkPolicy, Namespace: "ns1", @@ -1171,7 +1226,7 @@ func TestGetMatchFlowUpdates(t *testing.T) { To: []types.Address{NewOFPortAddress(1)}, Service: []v1beta2.Service{{Protocol: &protocolTCP, Port: &port8080}}, FlowID: uint32(12), - TableID: AntreaPolicyEgressRuleTable.GetID(), + TableID: AntreaPolicyEgressRuleTable.ofTable.GetID(), PolicyRef: &v1beta2.NetworkPolicyReference{ Type: v1beta2.AntreaNetworkPolicy, Namespace: "ns1", @@ -1186,6 +1241,19 @@ func TestGetMatchFlowUpdates(t *testing.T) { priority100: 101, priority200: 202, } - err = c.ReassignFlowPriorities(updatedPriorities, AntreaPolicyEgressRuleTable.GetID()) + err = c.ReassignFlowPriorities(updatedPriorities, AntreaPolicyEgressRuleTable.ofTable.GetID()) assert.Nil(t, err) } + +// setMockOFTables is used to generate mock OF tables. +func setMockOFTables(ctrl *gomock.Controller, tableMap map[*Table]**mocks.MockTable) { + for table, mockTable := range tableMap { + t := mocks.NewMockTable(ctrl) + t.EXPECT().GetID().Return(table.GetID()).AnyTimes() + t.EXPECT().GetNext().Return(table.GetNext()).AnyTimes() + t.EXPECT().GetMissAction().Return(table.GetMissAction()).AnyTimes() + t.EXPECT().GetName().Return("table").AnyTimes() + tableCache.Update(table) + *mockTable = t // Update the value with generated mock table. + } +} diff --git a/pkg/agent/openflow/openflow_test_utils.go b/pkg/agent/openflow/openflow_test_utils.go new file mode 100644 index 00000000000..59457e26b64 --- /dev/null +++ b/pkg/agent/openflow/openflow_test_utils.go @@ -0,0 +1,48 @@ +// Copyright 2022 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + binding "antrea.io/antrea/pkg/ovs/openflow" +) + +// InitMockTables is used to init mock tables. +func InitMockTables(tableMap map[*Table]uint8) { + for ft, id := range tableMap { + ft.ofTable = binding.NewOFTable(id, ft.name, 0, 0, 0) + } +} + +// ResetOFTable is used for integration tests. +func ResetOFTable() { + binding.ResetTableID() +} + +// CleanOFTableCache is used to reset ofTableCache and only used in integration tests. When all integration tests about +// openflow run in batch, unexpected flows could be installed on OVS due to stale ofTableCache, which may cause some tests +// to fail. For example, for TestFuncA, EgressMarkTable is needed; for TestFuncB, EgressMarkTable is not needed. If TestFuncB is run +// after TestFuncA, since ofTableCache (EgressMarkTable is added by TestFuncA) is not reset, default flow of EgressMarkTable will also +// be realized on OVS when running TestFuncB (see "func (c *client) defaultFlows() (flows []binding.Flow)"). Note that, +// the unexpected flows are not included in the map tableCache of OFBridge defined in pkg/ovs/openflow/ofctrl_bridge.go, +// because the bridge will be destroyed after every test. For some tests, function checkOVSFlowMetrics (defined in +// test/integration/agent/openflow_test.go) is used to check the flow number of every installed table. The expected table +// list is read from the map tableCache of OFBridge, but the actual table list is dumped from OVS bridge (including the +// unexpected flow). They are different, and as a result, TestFuncB will fail. +func CleanOFTableCache() { + objs := tableCache.List() + for i := 0; i < len(objs); i++ { + tableCache.Delete(objs[i]) + } +} diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index 06a7a9db641..e082b43b61d 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -41,38 +41,144 @@ import ( ) var ( - ClassifierTable = binding.NewOFTable(0, "Classification") - SpoofGuardTable = binding.NewOFTable(10, "SpoofGuard") - arpResponderTable = binding.NewOFTable(20, "ARPResponder") - IPv6Table = binding.NewOFTable(21, "IPv6") - MulticastTable = binding.NewOFTable(22, "Multicast") - ServiceHairpinTable = binding.NewOFTable(23, "ServiceHairpin") - ServiceConntrackTable = binding.NewOFTable(24, "ServiceConntrack") // serviceConntrackTable use a new ct_zone to transform SNAT'd connections. - ConntrackTable = binding.NewOFTable(30, "ConntrackZone") - ConntrackStateTable = binding.NewOFTable(31, "ConntrackState") - ServiceClassifierTable = binding.NewOFTable(35, "ServiceClassifier") - SessionAffinityTable = binding.NewOFTable(40, "SessionAffinity") - DNATTable = binding.NewOFTable(40, "DNAT(SessionAffinity)") - ServiceLBTable = binding.NewOFTable(41, "ServiceLB") - EndpointDNATTable = binding.NewOFTable(42, "EndpointDNAT") - AntreaPolicyEgressRuleTable = binding.NewOFTable(45, "AntreaPolicyEgressRule") - DefaultTierEgressRuleTable = binding.NewOFTable(49, "DefaultTierEgressRule") - EgressRuleTable = binding.NewOFTable(50, "EgressRule") - EgressDefaultTable = binding.NewOFTable(60, "EgressDefaultRule") - EgressMetricTable = binding.NewOFTable(61, "EgressMetric") - L3ForwardingTable = binding.NewOFTable(70, "L3Forwarding") - SNATTable = binding.NewOFTable(71, "SNAT") - L3DecTTLTable = binding.NewOFTable(72, "IPTTLDec") - L2ForwardingCalcTable = binding.NewOFTable(80, "L2Forwarding") - AntreaPolicyIngressRuleTable = binding.NewOFTable(85, "AntreaPolicyIngressRule") - DefaultTierIngressRuleTable = binding.NewOFTable(89, "DefaultTierIngressRule") - IngressRuleTable = binding.NewOFTable(90, "IngressRule") - IngressDefaultTable = binding.NewOFTable(100, "IngressDefaultRule") - IngressMetricTable = binding.NewOFTable(101, "IngressMetric") - ConntrackCommitTable = binding.NewOFTable(105, "ConntrackCommit") - ServiceConntrackCommitTable = binding.NewOFTable(106, "ServiceConntrackCommit") - HairpinSNATTable = binding.NewOFTable(108, "HairpinSNAT") - L2ForwardingOutTable = binding.NewOFTable(110, "Output") + // _ _ _ _ _ _ + // / \ | |_| |_ ___ _ __ | |_(_) ___ _ __ | | + // / _ \| __| __/ _ \ '_ \| __| |/ _ \| '_ \ | | + // / ___ \ |_| || __/ | | | |_| | (_) | | | | |_| + // /_/ \_\__|\__\___|_| |_|\__|_|\___/|_| |_| (_) + // + // Before adding a new table in FlexiblePipeline, please read the following instructions carefully. + // + // - Double confirm the necessity of adding a new table, and consider reusing an existing table to implement the + // functionality alternatively. + // - Choose a name that can help users to understand the function of the table. + // - Choose a stage. Existing stageIDs are defined in file pkg/agent/openflow/framework.go. If you want to add a new + // stage, please discuss with maintainers or OVS pipeline developers of Antrea. + // - Choose a pipeline. Existing pipelineIDs are defined in file pkg/agent/openflow/framework.go. If you want to add + // a new pipeline, please discuss with maintainers or OVS pipeline developers of Antrea. + // - Decide where to add the new table in the pipeline. The order table declaration decides the order of tables in the + // stage. For example: + // * If you want to add a table called `FooTable` between `SpoofGuardTable` and `IPv6Table` in pipelineIP, then + // the table should be declared after `SpoofGuardTable` and before `IPv6Table`: + // ```go + // SpoofGuardTable = newTable("SpoofGuard", stageValidation, pipelineIP) + // FooTable = newTable("Foo", stageValidation, pipelineIP) + // IPv6Table = newTable("IPv6", stageValidation, pipelineIP) + // ``` + // * If you want to add a table called `FooTable` just before `ARPResponderTable` in pipelineARP, then the table + // should be declared before `ARPResponderTable`: + // ```go + // FooTable = newTable("Foo", stageOutput, binding.PipelineARP) + // ARPResponderTable = newTable("ARPResponder", stageOutput, binding.PipelineARP) + // ``` + // * If you want to add a table called `FooTable` just after `ConntrackStateTable` in pipelineARP, then the + // table should be declared after `ConntrackStateTable`: + // ```go + // SNATConntrackTable = newTable("SNATConntrackZone", stageConntrackState, pipelineIP) + // ConntrackTable = newTable("ConntrackZone", stageConntrackState, pipelineIP) + // ConntrackStateTable = newTable("ConntrackState", stageConntrackState, pipelineIP) + // FooTable = newTable("Foo", stageConntrackState, pipelineIP) + // ``` + // - Reference the new table in a feature in file pkg/agent/openflow/framework.go. The table can be referenced by multiple + // features if multiple features need to install flows in the table. Note that, if the newly added table is not + // referenced by any feature or the features referencing the table are all inactivated, then the table will not + // be realized in OVS; if at least one feature referencing the table is activated, then the table will be realized + // at the desired position in OVS pipeline. + // - By default, the miss action of the new table is to forward packets to next table. If the miss action needs to + // drop packets, add argument defaultDrop when creating the new table. + // + // How to forward packet between tables with a proper action in FlexiblePipeline? + // + // | table A | | table B | | table C | | table D | | table E | | table F | | table G | + // | stage S1 | | stage S2 | | stage S4 | + // + // - NextTable is used to forward packets to the next table. E.g. A -> B, B -> C, C -> D, etc. + // - GotoTable is used to forward packets to a specific table, and the target table ID should be greater than the + // current table ID. Within a stage, GotoTable should be used to forward packets to a specific table, e.g. B -> D, + // C -> E. Today we do not have the case, but if in future there is a case that a packet needs to be forwarded to + // a table in another stage directly, e.g. A -> C, B -> G, GotoTable can also be used. + // - GotoStage is used to forward packets to a specific stage. Note that, packets are forwarded to the first table of + // the target stage, and the first table ID of the target stage should be greater than the current table ID. E.g. + // A -> S4 (F), D -> S4 (F) are fine, but D -> S1 (A), F -> S2 (B) are not allowed. It is recommended to use + // GotoStage to forward packets across stages. + // - ResubmitToTables is used to forward packets to one or multiple tables. It should be used only when the target + // table ID is smaller than the current table ID, like E -> B; or when forwarding packets to multiple tables, + // like B - > D E; otherwise, in all other cases GotoTable should be used. + + // Tables of PipelineRoot are declared below. + + // PipelineRootClassifierTable is the only table of pipelineRoot at this moment and its table ID should be 0. Packets + // are forwarded to pipelineIP or pipelineARP in this table. + PipelineRootClassifierTable = newTable("PipelineRootClassifier", stageStart, pipelineRoot, defaultDrop) + + // Tables of pipelineARP are declared below. + + // Tables in stageValidation: + ARPSpoofGuardTable = newTable("ARPSpoofGuard", stageValidation, pipelineARP) + + // Tables in stageOutput: + ARPResponderTable = newTable("ARPResponder", stageOutput, pipelineARP) + + // Tables of pipelineIP are declared below. + + // Tables in stageClassifier: + ClassifierTable = newTable("Classifier", stageClassifier, pipelineIP, defaultDrop) + + // Tables in stageValidation: + SpoofGuardTable = newTable("SpoofGuard", stageValidation, pipelineIP, defaultDrop) + IPv6Table = newTable("IPv6", stageValidation, pipelineIP) + PipelineIPClassifierTable = newTable("PipelineIPClassifier", stageValidation, pipelineIP) + + // Tables in stageConntrackState: + SNATConntrackTable = newTable("SNATConntrackZone", stageConntrackState, pipelineIP) + ConntrackTable = newTable("ConntrackZone", stageConntrackState, pipelineIP) + ConntrackStateTable = newTable("ConntrackState", stageConntrackState, pipelineIP) + + // Tables in stagePreRouting: + // When proxy is enabled. + PreRoutingClassifierTable = newTable("PreRoutingClassifier", stagePreRouting, pipelineIP) + NodePortMarkTable = newTable("NodePortMark", stagePreRouting, pipelineIP) + SessionAffinityTable = newTable("SessionAffinity", stagePreRouting, pipelineIP) + ServiceLBTable = newTable("ServiceLB", stagePreRouting, pipelineIP) + EndpointDNATTable = newTable("EndpointDNAT", stagePreRouting, pipelineIP) + // When proxy is disabled. + DNATTable = newTable("DNAT", stagePreRouting, pipelineIP) + + // Tables in stageEgressSecurity: + AntreaPolicyEgressRuleTable = newTable("AntreaPolicyEgressRule", stageEgressSecurity, pipelineIP) + EgressRuleTable = newTable("EgressRule", stageEgressSecurity, pipelineIP) + EgressDefaultTable = newTable("EgressDefaultRule", stageEgressSecurity, pipelineIP) + EgressMetricTable = newTable("EgressMetric", stageEgressSecurity, pipelineIP) + + // Tables in stageRouting: + L3ForwardingTable = newTable("L3Forwarding", stageRouting, pipelineIP) + EgressMarkTable = newTable("EgressMark", stageRouting, pipelineIP) + L3DecTTLTable = newTable("L3DecTTL", stageRouting, pipelineIP) + + // Tables in stagePostRouting: + ServiceMarkTable = newTable("ServiceMark", stagePostRouting, pipelineIP) + SNATConntrackCommitTable = newTable("SNATConntrackCommit", stagePostRouting, pipelineIP) + + // Tables in stageSwitching: + L2ForwardingCalcTable = newTable("L2ForwardingCalc", stageSwitching, pipelineIP) + + // Tables in stageIngressSecurity: + IngressSecurityClassifierTable = newTable("IngressSecurityClassifier", stageIngressSecurity, pipelineIP) + AntreaPolicyIngressRuleTable = newTable("AntreaPolicyIngressRule", stageIngressSecurity, pipelineIP) + IngressRuleTable = newTable("IngressRule", stageIngressSecurity, pipelineIP) + IngressDefaultTable = newTable("IngressDefaultRule", stageIngressSecurity, pipelineIP) + IngressMetricTable = newTable("IngressMetric", stageIngressSecurity, pipelineIP) + + // Tables in stageConntrack: + ConntrackCommitTable = newTable("ConntrackCommit", stageConntrack, pipelineIP) + + // Tables in stageOutput: + L2ForwardingOutTable = newTable("Output", stageOutput, pipelineIP) + + // Tables of pipelineMulticast are declared below. Do don't declare any tables of other pipelines here! + + // Tables in stageRouting: + MulticastTable = newTable("Multicast", stageRouting, pipelineMulticast) // Flow priority level priorityHigh = uint16(210) @@ -119,35 +225,25 @@ func (a ofAction) String() string { } } -var ( - // egressTables map records all IDs of tables related to - // egress rules. - egressTables = map[uint8]struct{}{ - AntreaPolicyEgressRuleTable.GetID(): {}, - EgressRuleTable.GetID(): {}, - EgressDefaultTable.GetID(): {}, - } - - // ofTableCache caches the OpenFlow tables used in the pipeline, and it supports using the table ID and name as the index to query the OpenFlow table. - ofTableCache = cache.NewIndexer(tableIDKeyFunc, cache.Indexers{tableNameIndex: tableNameIndexFunc}) -) +// tableCache caches the OpenFlow tables used in pipelines, and it supports using the table ID and name as the index to query the OpenFlow table. +var tableCache = cache.NewIndexer(tableIDKeyFunc, cache.Indexers{tableNameIndex: tableNameIndexFunc}) func tableNameIndexFunc(obj interface{}) ([]string, error) { - ofTable := obj.(binding.Table) - return []string{ofTable.GetName()}, nil + table := obj.(*Table) + return []string{table.GetName()}, nil } func tableIDKeyFunc(obj interface{}) (string, error) { - ofTable := obj.(binding.Table) - return fmt.Sprintf("%d", ofTable.GetID()), nil + table := obj.(*Table) + return fmt.Sprintf("%d", table.GetID()), nil } func getTableByID(id uint8) binding.Table { - obj, exists, _ := ofTableCache.GetByKey(fmt.Sprintf("%d", id)) + obj, exists, _ := tableCache.GetByKey(fmt.Sprintf("%d", id)) if !exists { return nil } - return obj.(binding.Table) + return obj.(*Table).ofTable } // GetFlowTableName returns the flow table name given the table ID. An empty @@ -164,7 +260,7 @@ func GetFlowTableName(tableID uint8) string { // returns the flow table number if the table is found. Otherwise TableIDAll is // returned if the table cannot be found. func GetFlowTableID(tableName string) uint8 { - objs, _ := ofTableCache.ByIndex(tableNameIndex, tableName) + objs, _ := tableCache.ByIndex(tableNameIndex, tableName) if len(objs) == 0 { return binding.TableIDAll } @@ -173,53 +269,36 @@ func GetFlowTableID(tableName string) uint8 { func GetTableList() []binding.Table { tables := make([]binding.Table, 0) - for _, obj := range ofTableCache.List() { + for _, obj := range tableCache.List() { t := obj.(binding.Table) tables = append(tables, t) } return tables } -// CleanOFTableCache is used to reset ofTableCache and only used in integration tests. When all integration tests about -// openflow run in batch, unexpected flows could be installed on OVS due to stale ofTableCache, which may cause some tests -// to fail. For example, for TestFuncA, SNATTable is needed; for TestFuncB, SNATTable is not needed. If TestFuncB is run -// after TestFuncA, since ofTableCache (SNATTable is added by TestFuncA) is not reset, default flow of SNATTable will also -// be realized on OVS when running TestFuncB (see "func (c *client) defaultFlows() (flows []binding.Flow)"). Note that, -// the unexpected flows are not included in the map tableCache of OFBridge defined in pkg/ovs/openflow/ofctrl_bridge.go, -// because the bridge will be destroyed after every test. For some tests, function checkOVSFlowMetrics (defined in -// test/integration/agent/openflow_test.go) is used to check the flow number of every installed table. The expected table -// list is read from the map tableCache of OFBridge, but the actual table list is dumped from OVS bridge (including the -// unexpected flow). They are different, and as a result, TestFuncB will fail. -func CleanOFTableCache() { - objs := ofTableCache.List() - for i := 0; i < len(objs); i++ { - ofTableCache.Delete(objs[i]) - } -} - -func GetAntreaPolicyEgressTables() []binding.Table { - return []binding.Table{ +func GetAntreaPolicyEgressTables() []*Table { + return []*Table{ AntreaPolicyEgressRuleTable, EgressDefaultTable, } } -func GetAntreaPolicyIngressTables() []binding.Table { - return []binding.Table{ +func GetAntreaPolicyIngressTables() []*Table { + return []*Table{ AntreaPolicyIngressRuleTable, IngressDefaultTable, } } -func GetAntreaPolicyBaselineTierTables() []binding.Table { - return []binding.Table{ +func GetAntreaPolicyBaselineTierTables() []*Table { + return []*Table{ EgressDefaultTable, IngressDefaultTable, } } -func GetAntreaPolicyMultiTierTables() []binding.Table { - return []binding.Table{ +func GetAntreaPolicyMultiTierTables() []*Table { + return []*Table{ AntreaPolicyEgressRuleTable, AntreaPolicyIngressRuleTable, } @@ -271,10 +350,6 @@ var ( snatPktMarkRange = &binding.Range{0, 7} GlobalVirtualMAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:ff") - hairpinIP = net.ParseIP("169.254.169.252").To4() - hairpinIPv6 = net.ParseIP("fc00::aabb:ccdd:eeff").To16() - - _, mcastCIDR, _ = net.ParseCIDR("224.0.0.0/4") ) type OFEntryOperations interface { @@ -309,36 +384,31 @@ type client struct { enableAntreaPolicy bool enableDenyTracking bool enableEgress bool - enableWireGuard bool enableMulticast bool connectUplinkToBridge bool roundInfo types.RoundInfo cookieAllocator cookie.Allocator bridge binding.Bridge - egressEntryTable uint8 - ingressEntryTable uint8 - // Flow caches for corresponding deletions. - nodeFlowCache, podFlowCache, serviceFlowCache, snatFlowCache, tfFlowCache, mcastFlowCache *flowCategoryCache - // "fixed" flows installed by the agent after initialization and which do not change during - // the lifetime of the client. - gatewayFlows, defaultServiceFlows, defaultTunnelFlows, hostNetworkingFlows []binding.Flow + + featurePodConnectivity *featurePodConnectivity + featureService *featureService + featureEgress *featureEgress + featureNetworkPolicy *featureNetworkPolicy + featureMulticast *featureMulticast + activatedFeatures []feature + + featureTraceflow *featureTraceflow + traceableFeatures []traceableFeature + + pipelines map[binding.PipelineID]binding.Pipeline + // ofEntryOperations is a wrapper interface for OpenFlow entry Add / Modify / Delete operations. It // enables convenient mocking in unit tests. ofEntryOperations OFEntryOperations - // policyCache is a storage that supports listing policyRuleConjunction with different indexers. - // It's guaranteed that one policyRuleConjunction is processed by at most one goroutine at any given time. - policyCache cache.Indexer - conjMatchFlowLock sync.Mutex // Lock for access globalConjMatchFlowCache - groupCache sync.Map - // globalConjMatchFlowCache is a global map for conjMatchFlowContext. The key is a string generated from the - // conjMatchFlowContext. - globalConjMatchFlowCache map[string]*conjMatchFlowContext // replayMutex provides exclusive access to the OFSwitch to the ReplayFlows method. replayMutex sync.RWMutex nodeConfig *config.NodeConfig networkConfig *config.NetworkConfig - egressConfig *config.EgressConfig - gatewayOFPort uint32 // ovsDatapathType is the type of the datapath used by the bridge. ovsDatapathType ovsconfig.OVSDatapathType // ovsMetersAreSupported indicates whether the OVS datapath supports OpenFlow meters. @@ -350,10 +420,6 @@ type client struct { ipProtocols []binding.Protocol // ovsctlClient is the interface for executing OVS "ovs-ofctl" and "ovs-appctl" commands. ovsctlClient ovsctl.OVSCtlClient - // deterministic represents whether to generate flows deterministically. - // For example, if a flow has multiple actions, setting it to true can get consistent flow. - // Enabling it may carry a performance impact. It's disabled by default and should only be used in testing. - deterministic bool } func (c *client) GetTunnelVirtualMAC() net.HardwareAddr { @@ -454,388 +520,411 @@ func (c *client) DeleteOFEntries(ofEntries []binding.OFEntry) error { return c.changeOFEntries(ofEntries, del) } -// defaultFlows generates the default flows of all tables. -func (c *client) defaultFlows() (flows []binding.Flow) { - for _, obj := range ofTableCache.List() { - table := obj.(binding.Table) - flowBuilder := table.BuildFlow(priorityMiss) - switch table.GetMissAction() { - case binding.TableMissActionNext: - flowBuilder = flowBuilder.Action().GotoTable(table.GetNext()) - case binding.TableMissActionNormal: - flowBuilder = flowBuilder.Action().Normal() - case binding.TableMissActionDrop: - flowBuilder = flowBuilder.Action().Drop() - case binding.TableMissActionNone: - fallthrough - default: - continue +func (c *client) defaultFlows() []binding.Flow { + cookieID := c.cookieAllocator.Request(cookie.Default).Raw() + var flows []binding.Flow + for id, pipeline := range c.pipelines { + // This generates the default flow for every table in every pipeline. + for _, table := range pipeline.ListAllTables() { + flowBuilder := table.BuildFlow(priorityMiss).Cookie(cookieID) + switch table.GetMissAction() { + case binding.TableMissActionNext: + flowBuilder = flowBuilder.Action().NextTable() + case binding.TableMissActionNormal: + flowBuilder = flowBuilder.Action().Normal() + case binding.TableMissActionDrop: + flowBuilder = flowBuilder.Action().Drop() + case binding.TableMissActionNone: + fallthrough + default: + continue + } + flows = append(flows, flowBuilder.Done()) + } + + switch id { + case pipelineIP: + // This generates the flow to match IPv4 / IPv6 packets and forward them to the first table of pipelineIP in + // PipelineRootClassifierTable. + for _, ipProtocol := range c.ipProtocols { + flows = append(flows, pipelineClassifyFlow(cookieID, ipProtocol, pipeline)) + } + case pipelineARP: + // This generates the flow to match ARP packets and forward them to the first table of pipelineARP in + // PipelineRootClassifierTable. + flows = append(flows, pipelineClassifyFlow(cookieID, binding.ProtocolARP, pipeline)) + case pipelineMulticast: + // This generates the flow to match multicast packets and forward them to the first table of pipelineMulticast + // in PipelineIPClassifierTable. Note that, PipelineIPClassifierTable is in stageValidation of pipeline for IP. In another word, + // pipelineMulticast is forked from PipelineIPClassifierTable in pipelineIP. + flows = append(flows, multicastPipelineClassifyFlow(cookieID, pipeline)) } - flows = append(flows, flowBuilder.Cookie(c.cookieAllocator.Request(cookie.Default).Raw()).Done()) } + return flows } -// tunnelClassifierFlow generates the flow to mark traffic comes from the tunnelOFPort. -func (c *client) tunnelClassifierFlow(tunnelOFPort uint32, category cookie.Category) binding.Flow { - nextTable := ConntrackTable - if c.proxyAll { - nextTable = ServiceConntrackTable - } - return ClassifierTable.BuildFlow(priorityNormal). +// tunnelClassifierFlow generates the flow to mark the packets from tunnel port. +func (f *featurePodConnectivity) tunnelClassifierFlow(tunnelOFPort uint32) binding.Flow { + return ClassifierTable.ofTable.BuildFlow(priorityNormal). + Cookie(f.cookieAllocator.Request(f.category).Raw()). MatchInPort(tunnelOFPort). Action().LoadRegMark(FromTunnelRegMark). Action().LoadRegMark(RewriteMACRegMark). - Action().GotoTable(nextTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). + Action().GotoStage(stageConntrackState). Done() } -// gatewayClassifierFlow generates the flow to mark traffic comes from the gatewayOFPort. -func (c *client) gatewayClassifierFlow(category cookie.Category) binding.Flow { - return ClassifierTable.BuildFlow(priorityNormal). +// gatewayClassifierFlow generates the flow to mark the packets from the Antrea gateway port. +func (f *featurePodConnectivity) gatewayClassifierFlow() binding.Flow { + return ClassifierTable.ofTable.BuildFlow(priorityNormal). + Cookie(f.cookieAllocator.Request(f.category).Raw()). MatchInPort(config.HostGatewayOFPort). Action().LoadRegMark(FromGatewayRegMark). - Action().GotoTable(ClassifierTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). + Action().GotoStage(stageValidation). Done() } -// podClassifierFlow generates the flow to mark traffic comes from the podOFPort. -func (c *client) podClassifierFlow(podOFPort uint32, category cookie.Category, isAntreaFlexibleIPAM bool) binding.Flow { - flowBuilder := ClassifierTable.BuildFlow(priorityLow). +// podClassifierFlow generates the flow to mark the packets from a local Pod port. +func (f *featurePodConnectivity) podClassifierFlow(podOFPort uint32, isAntreaFlexibleIPAM bool) binding.Flow { + flowBuilder := ClassifierTable.ofTable.BuildFlow(priorityLow). + Cookie(f.cookieAllocator.Request(f.category).Raw()). MatchInPort(podOFPort). Action().LoadRegMark(FromLocalRegMark). - Action().GotoTable(ClassifierTable.GetNext()) + Action().GotoStage(stageValidation) if isAntreaFlexibleIPAM { - // mark traffic from local AntreaFlexibleIPAM Pod - flowBuilder = flowBuilder.Action().LoadRegMark(AntreaFlexibleIPAMRegMark) - } - return flowBuilder.Cookie(c.cookieAllocator.Request(category).Raw()).Done() -} - -// podUplinkClassifierFlow generates the flows to mark traffic from uplink and bridge ports, which are needed when -// uplink is connected to OVS bridge when AntreaFlexibleIPAM is configured. -func (c *client) podUplinkClassifierFlows(dstMAC net.HardwareAddr, category cookie.Category) (flows []binding.Flow) { - flows = append(flows, ClassifierTable.BuildFlow(priorityHigh). - MatchInPort(config.UplinkOFPort). - MatchDstMAC(dstMAC). - Action().LoadRegMark(FromUplinkRegMark). - Action().GotoTable(ServiceHairpinTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - flows = append(flows, ClassifierTable.BuildFlow(priorityHigh). - MatchInPort(config.BridgeOFPort). - MatchDstMAC(dstMAC). - Action().LoadRegMark(FromBridgeRegMark). - Action().GotoTable(ServiceHairpinTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - return -} - -// connectionTrackFlows generates flows that redirect traffic to ct_zone and handle traffic according to ct_state: -// 1) commit new connections to ct_zone(0xfff0) in the ConntrackCommitTable. -// 2) Add ct_mark on the packet if it is sent to the switch from the host gateway. -// 3) Allow traffic if it hits ct_mark and is sent from the host gateway. -// 4) Drop all invalid traffic. -// 5) Let other traffic go to the SessionAffinityTable first and then the ServiceLBTable. -// The SessionAffinityTable is a side-effect table which means traffic will not -// be resubmitted to any table. serviceLB does Endpoint selection for traffic -// to a Service. -// 6) Add a flow to bypass reject response packet sent by the controller. -func (c *client) connectionTrackFlows(category cookie.Category) []binding.Flow { - flows := c.conntrackBasicFlows(category) - if c.enableProxy { - // Replace the default flow with multiple resubmits actions. - if c.proxyAll { - flows = append(flows, ConntrackStateTable.BuildFlow(priorityMiss). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().ResubmitToTable(ServiceClassifierTable.GetID()). - Action().ResubmitToTable(SessionAffinityTable.GetID()). - Action().ResubmitToTable(ServiceLBTable.GetID()). - Done()) - } else { - flows = append(flows, ConntrackStateTable.BuildFlow(priorityMiss). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().ResubmitToTable(SessionAffinityTable.GetID()). - Action().ResubmitToTable(ServiceLBTable.GetID()). - Done()) - } - - for _, proto := range c.ipProtocols { - gatewayIP := c.nodeConfig.GatewayConfig.IPv4 - serviceVirtualIP := config.VirtualServiceIPv4 - snatZone := SNATCtZone - ctZone := CtZone - if proto == binding.ProtocolIPv6 { - gatewayIP = c.nodeConfig.GatewayConfig.IPv6 - serviceVirtualIP = config.VirtualServiceIPv6 - snatZone = SNATCtZoneV6 - ctZone = CtZoneV6 - } - flows = append(flows, - // This flow is used to maintain DNAT conntrack for Service traffic. - ConntrackTable.BuildFlow(priorityNormal).MatchProtocol(proto). - Action().CT(false, ConntrackTable.GetNext(), ctZone).NAT().CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ConntrackCommitTable.BuildFlow(priorityLow).MatchProtocol(proto). - MatchCTStateTrk(true). - MatchCTMark(ServiceCTMark). - MatchRegMark(EpSelectedRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().GotoTable(ConntrackCommitTable.GetNext()). - Done(), - ) - - if c.proxyAll { - flows = append(flows, - // This flow is used to match the Service traffic from Antrea gateway. The Service traffic from gateway - // should enter table serviceConntrackCommitTable, otherwise it will be matched by other flows in - // table connectionTrackCommit. - ConntrackCommitTable.BuildFlow(priorityNormal).MatchProtocol(proto). - MatchCTMark(ServiceCTMark). - MatchRegMark(FromGatewayRegMark). - Action().GotoTable(ServiceConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // This flow is used to maintain SNAT conntrack for Service traffic. - ServiceConntrackTable.BuildFlow(priorityNormal).MatchProtocol(proto). - Action().CT(false, ServiceConntrackTable.GetNext(), snatZone).NAT().CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // This flow is used to match the following cases: - // - The first packet of NodePort/LoadBalancer whose Endpoint is not on local Pod CIDR or any remote - // Pod CIDRs. Note that, this flow will change the behavior of the packet that NodePort/LoadBalancer - // whose externalTrafficPolicy is Local and the Endpoint is on host network. According to the definition - // of externalTrafficPolicy Local, the source IP should be retained. If the Endpoint is on host network, - // there should be only one backend Pod of the Service on a Node (It is impossible to have more than - // one Pods which listen on the same port on host network), so it is not useful to expose the Pod as - // NodePort Service, as it makes no difference to access it directly. - // - The first packet of ClusterIP and the Endpoint is not on local Pod CIDR or any remote Pod CIDRs. - // As the packet is from Antrea gateway, and it will pass through Antrea gateway, a virtual IP is used - // to perform SNAT for the packet, rather than Antrea gateway's IP. - ServiceConntrackCommitTable.BuildFlow(priorityHigh).MatchProtocol(proto). - MatchRegMark(ToGatewayRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()). - MatchCTStateNew(true). - MatchCTStateTrk(true). - Action().CT(true, ServiceConntrackCommitTable.GetNext(), snatZone). - SNAT(&binding.IPRange{StartIP: serviceVirtualIP, EndIP: serviceVirtualIP}, nil). - CTDone(). - Done(), - // This flow is used to match the first packet of NodePort/LoadBalancer whose output port is not - // Antrea gateway, and externalTrafficPolicy is Cluster. This packet requires SNAT. Antrea gateway - // IP is used to perform SNAT for the packet. - ServiceConntrackCommitTable.BuildFlow(priorityNormal).MatchProtocol(proto). - MatchRegMark(ServiceNeedSNATRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()). - MatchCTStateNew(true). - MatchCTStateTrk(true). - Action().CT(true, ServiceConntrackCommitTable.GetNext(), snatZone). - SNAT(&binding.IPRange{StartIP: gatewayIP, EndIP: gatewayIP}, nil). - CTDone(). - Done(), - // This flow is used to match the consequent request packets of Service traffic whose first request packet has been committed - // and performed SNAT. For example: - /* - * 192.168.77.1 is the IP address of client. - * 192.168.77.100 is the IP address of k8s node. - * 30001 is a NodePort port. - * 10.10.0.1 is the IP address of Antrea gateway. - * 10.10.0.3 is the Endpoint of NodePort Service. - - * pkt 1 (request) - * client 192.168.77.1:12345->192.168.77.100:30001 - * ct zone SNAT 65521 192.168.77.1:12345->192.168.77.100:30001 - * ct zone DNAT 65520 192.168.77.1:12345->192.168.77.100:30001 - * ct commit DNAT zone 65520 192.168.77.1:12345->192.168.77.100:30001 => 192.168.77.1:12345->10.10.0.3:80 - * ct commit SNAT zone 65521 192.168.77.1:12345->10.10.0.3:80 => 10.10.0.1:12345->10.10.0.3:80 - * output - * pkt 2 (response) - * Pod 10.10.0.3:80->10.10.0.1:12345 - * ct zone SNAT 65521 10.10.0.3:80->10.10.0.1:12345 => 10.10.0.3:80->192.168.77.1:12345 - * ct zone DNAT 65520 10.10.0.3:80->192.168.77.1:12345 => 192.168.77.1:30001->192.168.77.1:12345 - * output - * pkt 3 (request) - * client 192.168.77.1:12345->192.168.77.100:30001 - * ct zone SNAT 65521 192.168.77.1:12345->192.168.77.100:30001 - * ct zone DNAT 65520 192.168.77.1:12345->10.10.0.3:80 - * ct zone SNAT 65521 192.168.77.1:12345->10.10.0.3:80 => 10.10.0.1:12345->10.10.0.3:80 - * output - * pkt ... - - The source IP address of pkt 3 cannot be transformed through zone 65521 as there is no connection track about - 192.168.77.1:12345<->192.168.77.100:30001, and the source IP is still 192.168.77.100. - Before output, pkt 3 needs SNAT, but the connection has been committed. The flow is for pkt 3 to perform SNAT. - */ - ServiceConntrackCommitTable.BuildFlow(priorityNormal).MatchProtocol(proto). - Cookie(c.cookieAllocator.Request(category).Raw()). - MatchCTStateNew(false). - MatchCTStateTrk(true). - Action().CT(false, ServiceConntrackCommitTable.GetNext(), snatZone). - NAT(). - CTDone(). - Done(), - ) - } - } - } else { - flows = append(flows, c.kubeProxyFlows(category)...) + // This is used to mark the packets from a local Antrea IPAM Pod port. + flowBuilder = flowBuilder.Action().LoadRegMark(AntreaFlexibleIPAMRegMark). + Action().LoadRegMark(RewriteMACRegMark) } - - return flows + return flowBuilder.Done() } -// dnsResponseBypassConntrackFlow generates a flow which is used to bypass the -// dns response packetout from conntrack, to avoid unexpected packet drop. -func (c *client) dnsResponseBypassConntrackFlow() binding.Flow { - table := ConntrackTable - if c.proxyAll { - table = ServiceConntrackTable +// podUplinkClassifierFlows generates the flows to mark the packets with target destination MAC address from uplink/bridge +// port, which are needed when uplink is connected to OVS bridge and Antrea IPAM is configured. +func (f *featurePodConnectivity) podUplinkClassifierFlows(dstMAC net.HardwareAddr) []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() + return []binding.Flow{ + // This generates the flow to mark the packets from uplink port. + ClassifierTable.ofTable.BuildFlow(priorityHigh). + Cookie(cookieID). + MatchInPort(config.UplinkOFPort). + MatchDstMAC(dstMAC). + Action().LoadRegMark(FromUplinkRegMark). + Action().GotoStage(stageConntrackState). + Done(), + // This generates the flow to mark the packets from bridge local port. + ClassifierTable.ofTable.BuildFlow(priorityHigh). + Cookie(cookieID). + MatchInPort(config.BridgeOFPort). + MatchDstMAC(dstMAC). + Action().LoadRegMark(FromBridgeRegMark). + Action().GotoStage(stageConntrackState). + Done(), } - return table.BuildFlow(priorityHigh). - MatchRegFieldWithValue(CustomReasonField, CustomReasonDNS). - Cookie(c.cookieAllocator.Request(cookie.Default).Raw()). - Action().ResubmitToTable(L2ForwardingCalcTable.GetID()). - Done() } -// dnsResponseBypassPacketInFlow generates a flow which is used to bypass the -// dns packetIn conjunction flow for dns response packetOut. This packetOut -// should be sent directly to the requesting client without being intercepted -// again. -func (c *client) dnsResponseBypassPacketInFlow() binding.Flow { - // TODO: use a unified register bit to mark packetOuts. The pipeline does not need to be - // aware of why the packetOut is being set by the controller, it just needs to be aware that - // this is a packetOut message and that some pipeline stages (conntrack, policy enforcement) - // should therefore be skipped. - return AntreaPolicyIngressRuleTable.BuildFlow(priorityDNSBypass). - MatchRegFieldWithValue(CustomReasonField, CustomReasonDNS). - Cookie(c.cookieAllocator.Request(cookie.Default).Raw()). - Action().ResubmitToTable(L2ForwardingOutTable.GetID()). - Done() -} - -func (c *client) conntrackBasicFlows(category cookie.Category) []binding.Flow { +// conntrackFlows generates the flows about conntrack for feature PodConnectivity. +func (f *featurePodConnectivity) conntrackFlows() []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() var flows []binding.Flow - for _, proto := range c.ipProtocols { - ctZone := CtZone - if proto == binding.ProtocolIPv6 { - ctZone = CtZoneV6 - } + for _, ipProtocol := range f.ipProtocols { flows = append(flows, - ConntrackStateTable.BuildFlow(priorityLow).MatchProtocol(proto). - MatchCTStateInv(true).MatchCTStateTrk(true). + // This generates the flow to transform the destination IP of request packets or source IP of reply packets + // from tracked connections in CT zone. + ConntrackTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). + Action().CT(false, ConntrackTable.GetNext(), f.ctZones[ipProtocol]). + NAT(). + CTDone(). + Done(), + // This generates the flow to match the packets of tracked non-Service connection and forward them to + // stageEgressSecurity directly to bypass stagePreRouting. The first packet of non-Service connection passes + // through stagePreRouting, and the subsequent packets go to stageEgressSecurity directly. + ConntrackStateTable.ofTable.BuildFlow(priorityLow). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchCTStateNew(false). + MatchCTStateTrk(true). + Action().GotoStage(stageEgressSecurity). + Done(), + // This generates the flow to drop invalid packets. + ConntrackStateTable.ofTable.BuildFlow(priorityLow). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchCTStateInv(true). + MatchCTStateTrk(true). Action().Drop(). - Cookie(c.cookieAllocator.Request(category).Raw()). Done(), - ConntrackCommitTable.BuildFlow(priorityLow).MatchProtocol(proto). - MatchCTStateNew(true).MatchCTStateTrk(true). - Action().CT(true, ConntrackCommitTable.GetNext(), ctZone). + // This generates the flow to match the first packet of non-Service connection and mark the source of the connection + // by copying PktSourceField to ConnSourceCTMarkField. + ConntrackCommitTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchCTStateNew(true). + MatchCTStateTrk(true). + Action().CT(true, ConntrackCommitTable.GetNext(), f.ctZones[ipProtocol]). MoveToCtMarkField(PktSourceField, ConnSourceCTMarkField). CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). Done(), ) } + // This generates default flow to match the first packet of a new connection and forward it to stagePreRouting. + flows = append(flows, ConntrackStateTable.ofTable.BuildFlow(priorityMiss). + Cookie(cookieID). + Action().GotoStage(stagePreRouting). + Done()) + return flows } -func (c *client) kubeProxyFlows(category cookie.Category) []binding.Flow { +// conntrackFlows generates the flows about conntrack for feature Service. +func (f *featureService) conntrackFlows() []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() var flows []binding.Flow - for _, proto := range c.ipProtocols { - ctZone := CtZone - if proto == binding.ProtocolIPv6 { - ctZone = CtZoneV6 - } + for _, ipProtocol := range f.ipProtocols { flows = append(flows, - ConntrackTable.BuildFlow(priorityNormal).MatchProtocol(proto). - Action().CT(false, ConntrackTable.GetNext(), ctZone).CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). + // This generates the flow to mark tracked DNATed Service connection with RewriteMACRegMark (load-balanced by + // AntreaProxy) and forward the packets to stageEgressSecurity directly to bypass stagePreRouting. + ConntrackStateTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchCTMark(ServiceCTMark). + MatchCTStateNew(false). + MatchCTStateTrk(true). + Action().LoadRegMark(RewriteMACRegMark). + Action().GotoStage(stageEgressSecurity). + Done(), + // This generates the flow to avoid committing Service connections (with ServiceCTMark) another time. They + // have been committed in EndpointDNATTable, using the same CT zone. + ConntrackCommitTable.ofTable.BuildFlow(priorityHigh). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchCTMark(ServiceCTMark). + Action().GotoStage(stageOutput). Done(), ) } return flows } -// TODO: Use DuplicateToBuilder or integrate this function into original one to avoid unexpected -// difference. -// traceflowConnectionTrackFlows generates Traceflow specific flows in the -// connectionTrackStateTable or L2ForwardingCalcTable. When packet is not -// provided, the flows bypass the drop flow in connectionTrackFlows to avoid -// unexpected drop of the injected Traceflow packet, and to drop any Traceflow -// packet that has ct_state +rpl, which may happen when the Traceflow request -// destination is the Node's IP. -// When packet is provided, a flow is added to mark - the first packet of the -// first connection that matches the provided packet - as the Traceflow packet. -// The flow is added in connectionTrackStateTable when receiverOnly is false and -// it also matches in_port to be the provided ofPort (the sender Pod); otherwise -// when receiverOnly is true, the flow is added into L2ForwardingCalcTable and -// matches the destination MAC (the receiver Pod MAC). -func (c *client) traceflowConnectionTrackFlows(dataplaneTag uint8, receiverOnly bool, packet *binding.Packet, ofPort uint32, timeout uint16, category cookie.Category) []binding.Flow { +// snatConntrackFlows generates the flows about conntrack of SNAT connection for feature Service. +func (f *featureService) snatConntrackFlows() []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() var flows []binding.Flow - if packet == nil { - for _, ipProtocol := range c.ipProtocols { - flowBuilder := ConntrackStateTable.BuildFlow(priorityLow + 1). + for _, ipProtocol := range f.ipProtocols { + flows = append(flows, + // This generates the flow to transform destination IP of reply packets from tracked SNATed Service connection + // committed in SNAT CT zone. + SNATConntrackTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). MatchProtocol(ipProtocol). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()) - if c.enableProxy { - flowBuilder = flowBuilder. - Action().ResubmitToTable(SessionAffinityTable.GetID()). - Action().ResubmitToTable(ServiceLBTable.GetID()) - } else { - flowBuilder = flowBuilder. - Action().ResubmitToTable(ConntrackStateTable.GetNext()) - } - flows = append(flows, flowBuilder.Done()) + Action().CT(false, SNATConntrackTable.GetNext(), f.snatCtZones[ipProtocol]). + NAT(). + CTDone(). + Done(), - flows = append(flows, ConntrackStateTable.BuildFlow(priorityLow+2). + // SNAT should be performed for the following connections: + // - Hairpin Service connection initiated through a local Pod, and SNAT should be performed with the Antrea + // gateway IP. + // - Hairpin Service connection initiated through the Antrea gateway, and SNAT should be performed with a + // virtual IP. + // - Nodeport / LoadBalancer connection initiated through the Antrea gateway and externalTrafficPolicy is + // Cluster, and SNAT should be performed with the Antrea gateway IP. + // Note that, for Service connections that require SNAT, ServiceCTMark is loaded in SNAT CT zone when performing + // SNAT since ServiceCTMark loaded in DNAT CT zone cannot be read in SNAT CT zone. For Service connections, + // ServiceCTMark (loaded in DNAT / SNAT CT zone) is used to bypass ConntrackCommitTable which is used to commit + // non-Service connections. For hairpin connections, HairpinCTMark is also loaded in SNAT CT zone when performing + // SNAT since HairpinCTMark loaded in DNAT CT zone also cannot be read in SNAT CT zone. HairpinCTMark is used + // to output packets of hairpin connections in L2ForwardingOutTable. + + // This generates the flow to match the first packet of hairpin Service connection initiated through the Antrea + // gateway with ConnSNATCTMark and HairpinCTMark, then perform SNAT in SNAT CT zone with a virtual IP. + SNATConntrackCommitTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). MatchProtocol(ipProtocol). - MatchIPDSCP(dataplaneTag). - MatchCTStateTrk(true).MatchCTStateRpl(true). - SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().Drop(). - Done()) + MatchCTStateNew(true). + MatchCTStateTrk(true). + MatchRegMark(FromGatewayRegMark). + MatchCTMark(ConnSNATCTMark). + MatchCTMark(HairpinCTMark). + Action().CT(true, SNATConntrackCommitTable.GetNext(), f.snatCtZones[ipProtocol]). + SNAT(&binding.IPRange{StartIP: f.virtualIPs[ipProtocol], EndIP: f.virtualIPs[ipProtocol]}, nil). + LoadToCtMark(ServiceCTMark). + LoadToCtMark(HairpinCTMark). + CTDone(). + Done(), + // This generates the flow to match the first packet of hairpin Service connection initiated through a Pod with + // ConnSNATCTMark and HairpinCTMark, then perform SNAT in SNAT CT zone with the Antrea gateway IP. + SNATConntrackCommitTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchCTStateNew(true). + MatchCTStateTrk(true). + MatchRegMark(FromLocalRegMark). + MatchCTMark(ConnSNATCTMark). + MatchCTMark(HairpinCTMark). + Action().CT(true, SNATConntrackCommitTable.GetNext(), f.snatCtZones[ipProtocol]). + SNAT(&binding.IPRange{StartIP: f.gatewayIPs[ipProtocol], EndIP: f.gatewayIPs[ipProtocol]}, nil). + LoadToCtMark(ServiceCTMark). + LoadToCtMark(HairpinCTMark). + CTDone(). + Done(), + // This generates the flow to match the first packet of NodePort / LoadBalancer connection (non-hairpin) initiated + // through the Antrea gateway with ConnSNATCTMark, then perform SNAT in SNAT CT zone with the Antrea gateway IP. + SNATConntrackCommitTable.ofTable.BuildFlow(priorityLow). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchCTStateNew(true). + MatchCTStateTrk(true). + MatchRegMark(FromGatewayRegMark). + MatchCTMark(ConnSNATCTMark). + Action().CT(true, SNATConntrackCommitTable.GetNext(), f.snatCtZones[ipProtocol]). + SNAT(&binding.IPRange{StartIP: f.gatewayIPs[ipProtocol], EndIP: f.gatewayIPs[ipProtocol]}, nil). + LoadToCtMark(ServiceCTMark). + CTDone(). + Done(), + // This generates the flow to match the subsequent request packets of connection whose first request packet has + // been committed in SNAT CT zone, then commit the packets in SNAT CT zone again to perform SNAT. + // For example: + /* + * 192.168.77.1 is the IP address of client. + * 192.168.77.100 is the IP address of K8s Node. + * 30001 is the NodePort port. + * 10.10.0.1 is the IP address of Antrea gateway. + * 10.10.0.3 is the IP of NodePort Service Endpoint. + + * packet 1 (request) + * client 192.168.77.1:12345->192.168.77.100:30001 + * CT zone SNAT 65521 192.168.77.1:12345->192.168.77.100:30001 + * CT zone DNAT 65520 192.168.77.1:12345->192.168.77.100:30001 + * CT commit DNAT zone 65520 192.168.77.1:12345->192.168.77.100:30001 => 192.168.77.1:12345->10.10.0.3:80 + * CT commit SNAT zone 65521 192.168.77.1:12345->10.10.0.3:80 => 10.10.0.1:12345->10.10.0.3:80 + * output + * packet 2 (reply) + * Pod 10.10.0.3:80->10.10.0.1:12345 + * CT zone SNAT 65521 10.10.0.3:80->10.10.0.1:12345 => 10.10.0.3:80->192.168.77.1:12345 + * CT zone DNAT 65520 10.10.0.3:80->192.168.77.1:12345 => 192.168.77.1:30001->192.168.77.1:12345 + * output + * packet 3 (request) + * client 192.168.77.1:12345->192.168.77.100:30001 + * CT zone SNAT 65521 192.168.77.1:12345->192.168.77.100:30001 + * CT zone DNAT 65520 192.168.77.1:12345->10.10.0.3:80 + * CT zone SNAT 65521 192.168.77.1:12345->10.10.0.3:80 => 10.10.0.1:12345->10.10.0.3:80 + * output + * packet ... + */ + // As a result, subsequent request packets like packet 3 will only perform SNAT when they pass through SNAT + // CT zone the second time, after they are DNATed in DNAT CT zone. + SNATConntrackCommitTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchCTMark(ConnSNATCTMark). + MatchCTStateNew(false). + MatchCTStateTrk(true). + MatchCTStateRpl(false). + Action().CT(false, SNATConntrackCommitTable.GetNext(), f.snatCtZones[ipProtocol]). + NAT(). + CTDone(). + Done(), + ) + } + return flows +} + +// dnsResponseBypassConntrackFlow generates the flow to bypass the dns response packetout from conntrack, to avoid unexpected +// packet drop. This flow should be installed on the first table of stageConntrackState. +func (f *featureNetworkPolicy) dnsResponseBypassConntrackFlow(table binding.Table) binding.Flow { + return table.BuildFlow(priorityHigh). + MatchRegFieldWithValue(CustomReasonField, CustomReasonDNS). + Cookie(f.cookieAllocator.Request(cookie.Default).Raw()). + Action().GotoStage(stageSwitching). + Done() +} + +// dnsResponseBypassPacketInFlow generates the flow to bypass the dns packetIn conjunction flow for dns response packetOut. +// This packetOut should be sent directly to the requesting client without being intercepted again. +func (f *featureNetworkPolicy) dnsResponseBypassPacketInFlow() binding.Flow { + // TODO: use a unified register bit to mark packetOuts. The pipeline does not need to be + // aware of why the packetOut is being set by the controller, it just needs to be aware that + // this is a packetOut message and that some pipeline stages (conntrack, policy enforcement) + // should therefore be skipped. + return AntreaPolicyIngressRuleTable.ofTable.BuildFlow(priorityDNSBypass). + Cookie(f.cookieAllocator.Request(cookie.Default).Raw()). + MatchRegFieldWithValue(CustomReasonField, CustomReasonDNS). + Action().GotoStage(stageOutput). + Done() +} + +// TODO: Use DuplicateToBuilder or integrate this function into original one to avoid unexpected difference. +// flowsToTrace generates Traceflow specific flows in the connectionTrackStateTable or L2ForwardingCalcTable for featurePodConnectivity. +// When packet is not provided, the flows bypass the drop flow in conntrackStateFlow to avoid unexpected drop of the +// injected Traceflow packet, and to drop any Traceflow packet that has ct_state +rpl, which may happen when the Traceflow +// request destination is the Node's IP. When packet is provided, a flow is added to mark - the first packet of the first +// connection that matches the provided packet - as the Traceflow packet. The flow is added in connectionTrackStateTable +// when receiverOnly is false and it also matches in_port to be the provided ofPort (the sender Pod); otherwise when +// receiverOnly is true, the flow is added into L2ForwardingCalcTable and matches the destination MAC (the receiver Pod MAC). +func (f *featurePodConnectivity) flowsToTrace(dataplaneTag uint8, + ovsMetersAreSupported, + liveTraffic, + droppedOnly, + receiverOnly bool, + packet *binding.Packet, + ofPort uint32, + timeout uint16) []binding.Flow { + cookieID := f.cookieAllocator.Request(cookie.Traceflow).Raw() + var flows []binding.Flow + if packet == nil { + for _, ipProtocol := range f.ipProtocols { + flows = append(flows, + ConntrackStateTable.ofTable.BuildFlow(priorityLow+1). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchIPDSCP(dataplaneTag). + SetHardTimeout(timeout). + Action().GotoStage(stagePreRouting). + Done(), + ConntrackStateTable.ofTable.BuildFlow(priorityLow+2). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchCTStateTrk(true). + MatchCTStateRpl(true). + MatchIPDSCP(dataplaneTag). + SetHardTimeout(timeout). + Action().Drop(). + Done(), + ) } } else { var flowBuilder binding.FlowBuilder if !receiverOnly { - flowBuilder = ConntrackStateTable.BuildFlow(priorityLow). + flowBuilder = ConntrackStateTable.ofTable.BuildFlow(priorityLow). + Cookie(cookieID). MatchInPort(ofPort). - Action().LoadIPDSCP(dataplaneTag) + MatchCTStateNew(true). + MatchCTStateTrk(true). + Action().LoadIPDSCP(dataplaneTag). + SetHardTimeout(timeout). + Action().GotoStage(stagePreRouting) if packet.DestinationIP != nil { flowBuilder = flowBuilder.MatchDstIP(packet.DestinationIP) } - if c.enableProxy { - flowBuilder = flowBuilder. - Action().ResubmitToTable(SessionAffinityTable.GetID()). - Action().ResubmitToTable(ServiceLBTable.GetID()) - } else { - flowBuilder = flowBuilder. - Action().ResubmitToTable(ConntrackStateTable.GetNext()) - } } else { - nextTable := c.ingressEntryTable - flowBuilder = L2ForwardingCalcTable.BuildFlow(priorityHigh). + flowBuilder = L2ForwardingCalcTable.ofTable.BuildFlow(priorityHigh). + Cookie(cookieID). + MatchCTStateNew(true). + MatchCTStateTrk(true). MatchDstMAC(packet.DestinationMAC). Action().LoadToRegField(TargetOFPortField, ofPort). Action().LoadRegMark(OFPortFoundRegMark). Action().LoadIPDSCP(dataplaneTag). - Action().GotoTable(nextTable) + SetHardTimeout(timeout). + Action().GotoStage(stageIngressSecurity) if packet.SourceIP != nil { flowBuilder = flowBuilder.MatchSrcIP(packet.SourceIP) } } - - flowBuilder = flowBuilder.MatchCTStateNew(true).MatchCTStateTrk(true). - SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()) - // Match transport header switch packet.IPProto { case protocol.Type_ICMP: @@ -856,7 +945,6 @@ func (c *client) traceflowConnectionTrackFlows(dataplaneTag uint8, receiverOnly } default: flowBuilder = flowBuilder.MatchIPProtocolValue(packet.IsIPv6, packet.IPProto) - } if packet.IPProto == protocol.Type_TCP || packet.IPProto == protocol.Type_UDP { if packet.DestinationPort != 0 { @@ -866,44 +954,191 @@ func (c *client) traceflowConnectionTrackFlows(dataplaneTag uint8, receiverOnly flowBuilder = flowBuilder.MatchSrcPort(packet.SourcePort, nil) } } - flows = []binding.Flow{flowBuilder.Done()} + flows = append(flows, flowBuilder.Done()) + } + + // Do not send to controller if captures only dropped packet. + ifDroppedOnly := func(fb binding.FlowBuilder) binding.FlowBuilder { + if !droppedOnly { + if ovsMetersAreSupported { + fb = fb.Action().Meter(PacketInMeterIDTF) + } + fb = fb.Action().SendToController(uint8(PacketInReasonTF)) + } + return fb + } + // Clear the loaded DSCP bits before output. + ifLiveTraffic := func(fb binding.FlowBuilder) binding.FlowBuilder { + if liveTraffic { + return fb.Action().LoadIPDSCP(0). + Action().OutputToRegField(TargetOFPortField) + } + return fb + } + + // This generates Traceflow specific flows that outputs traceflow non-hairpin packets to OVS port and Antrea Agent after + // L2forwarding calculation. + for _, ipProtocol := range f.ipProtocols { + if f.networkConfig.TrafficEncapMode.SupportsEncap() { + // SendToController and Output if output port is tunnel port. + fb := L2ForwardingOutTable.ofTable.BuildFlow(priorityNormal+3). + Cookie(cookieID). + MatchRegFieldWithValue(TargetOFPortField, config.DefaultTunOFPort). + MatchProtocol(ipProtocol). + MatchRegMark(OFPortFoundRegMark). + MatchIPDSCP(dataplaneTag). + SetHardTimeout(timeout). + Action().OutputToRegField(TargetOFPortField) + fb = ifDroppedOnly(fb) + flows = append(flows, fb.Done()) + // For injected packets, only SendToController if output port is local gateway. In encapMode, a Traceflow + // packet going out of the gateway port (i.e. exiting the overlay) essentially means that the Traceflow + // request is complete. + fb = L2ForwardingOutTable.ofTable.BuildFlow(priorityNormal+2). + Cookie(cookieID). + MatchRegFieldWithValue(TargetOFPortField, config.HostGatewayOFPort). + MatchProtocol(ipProtocol). + MatchRegMark(OFPortFoundRegMark). + MatchIPDSCP(dataplaneTag). + SetHardTimeout(timeout) + fb = ifDroppedOnly(fb) + fb = ifLiveTraffic(fb) + flows = append(flows, fb.Done()) + } else { + // SendToController and Output if output port is local gateway. Unlike in encapMode, inter-Node Pod-to-Pod + // traffic is expected to go out of the gateway port on the way to its destination. + fb := L2ForwardingOutTable.ofTable.BuildFlow(priorityNormal+2). + Cookie(cookieID). + MatchRegFieldWithValue(TargetOFPortField, config.HostGatewayOFPort). + MatchProtocol(ipProtocol). + MatchRegMark(OFPortFoundRegMark). + MatchIPDSCP(dataplaneTag). + SetHardTimeout(timeout). + Action().OutputToRegField(TargetOFPortField) + fb = ifDroppedOnly(fb) + flows = append(flows, fb.Done()) + } + // Only SendToController if output port is local gateway and destination IP is gateway. + gatewayIP := f.gatewayIPs[ipProtocol] + if gatewayIP != nil { + fb := L2ForwardingOutTable.ofTable.BuildFlow(priorityNormal+3). + Cookie(cookieID). + MatchRegFieldWithValue(TargetOFPortField, config.HostGatewayOFPort). + MatchProtocol(ipProtocol). + MatchDstIP(gatewayIP). + MatchRegMark(OFPortFoundRegMark). + MatchIPDSCP(dataplaneTag). + SetHardTimeout(timeout) + fb = ifDroppedOnly(fb) + fb = ifLiveTraffic(fb) + flows = append(flows, fb.Done()) + } + // Only SendToController if output port is Pod port. + fb := L2ForwardingOutTable.ofTable.BuildFlow(priorityNormal + 2). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchRegMark(OFPortFoundRegMark). + MatchIPDSCP(dataplaneTag). + SetHardTimeout(timeout) + fb = ifDroppedOnly(fb) + fb = ifLiveTraffic(fb) + flows = append(flows, fb.Done()) + } + + return flows +} + +// flowsToTrace is used to generate flows for Traceflow in featureService. +func (f *featureService) flowsToTrace(dataplaneTag uint8, + ovsMetersAreSupported, + liveTraffic, + droppedOnly, + receiverOnly bool, + packet *binding.Packet, + ofPort uint32, + timeout uint16) []binding.Flow { + cookieID := f.cookieAllocator.Request(cookie.Traceflow).Raw() + var flows []binding.Flow + // Do not send to controller if captures only dropped packet. + ifDroppedOnly := func(fb binding.FlowBuilder) binding.FlowBuilder { + if !droppedOnly { + if ovsMetersAreSupported { + fb = fb.Action().Meter(PacketInMeterIDTF) + } + fb = fb.Action().SendToController(uint8(PacketInReasonTF)) + } + return fb + } + // Clear the loaded DSCP bits before output. + ifLiveTraffic := func(fb binding.FlowBuilder) binding.FlowBuilder { + if liveTraffic { + return fb.Action().LoadIPDSCP(0). + Action().OutputToRegField(TargetOFPortField) + } + return fb + } + + // This generates Traceflow specific flows that outputs hairpin traceflow packets to OVS port and Antrea Agent after + // L2forwarding calculation. + for _, ipProtocol := range f.ipProtocols { + if f.enableProxy { + // Only SendToController for hairpin traffic. + // This flow must have higher priority than the one installed by l2ForwardOutputHairpinServiceFlow. + fb := L2ForwardingOutTable.ofTable.BuildFlow(priorityHigh + 2). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchCTMark(HairpinCTMark). + MatchIPDSCP(dataplaneTag). + SetHardTimeout(timeout) + fb = ifDroppedOnly(fb) + fb = ifLiveTraffic(fb) + flows = append(flows, fb.Done()) + } } return flows } -func (c *client) traceflowNetworkPolicyFlows(dataplaneTag uint8, timeout uint16, category cookie.Category) []binding.Flow { - flows := []binding.Flow{} - c.conjMatchFlowLock.Lock() - defer c.conjMatchFlowLock.Unlock() - // Copy default drop rules. - for _, ctx := range c.globalConjMatchFlowCache { +// flowsToTrace is used to generate flows for Traceflow from globalConjMatchFlowCache and policyCache. +func (f *featureNetworkPolicy) flowsToTrace(dataplaneTag uint8, + ovsMetersAreSupported, + liveTraffic, + droppedOnly, + receiverOnly bool, + packet *binding.Packet, + ofPort uint32, + timeout uint16) []binding.Flow { + cookieID := f.cookieAllocator.Request(cookie.Traceflow).Raw() + var flows []binding.Flow + f.conjMatchFlowLock.Lock() + defer f.conjMatchFlowLock.Unlock() + for _, ctx := range f.globalConjMatchFlowCache { if ctx.dropFlow != nil { copyFlowBuilder := ctx.dropFlow.CopyToBuilder(priorityNormal+2, false) if ctx.dropFlow.FlowProtocol() == "" { copyFlowBuilderIPv6 := ctx.dropFlow.CopyToBuilder(priorityNormal+2, false) copyFlowBuilderIPv6 = copyFlowBuilderIPv6.MatchProtocol(binding.ProtocolIPv6) - if c.ovsMetersAreSupported { + if f.ovsMetersAreSupported { copyFlowBuilderIPv6 = copyFlowBuilderIPv6.Action().Meter(PacketInMeterIDTF) } flows = append(flows, copyFlowBuilderIPv6.MatchIPDSCP(dataplaneTag). + Cookie(cookieID). SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()). Action().SendToController(uint8(PacketInReasonTF)). Done()) copyFlowBuilder = copyFlowBuilder.MatchProtocol(binding.ProtocolIP) } - if c.ovsMetersAreSupported { + if f.ovsMetersAreSupported { copyFlowBuilder = copyFlowBuilder.Action().Meter(PacketInMeterIDTF) } flows = append(flows, copyFlowBuilder.MatchIPDSCP(dataplaneTag). + Cookie(cookieID). SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()). Action().SendToController(uint8(PacketInReasonTF)). Done()) } } // Copy Antrea NetworkPolicy drop rules. - for _, conj := range c.policyCache.List() { + for _, conj := range f.policyCache.List() { for _, flow := range conj.(*policyRuleConjunction).metricFlows { if flow.IsDropFlow() { copyFlowBuilder := flow.CopyToBuilder(priorityNormal+2, false) @@ -912,22 +1147,22 @@ func (c *client) traceflowNetworkPolicyFlows(dataplaneTag uint8, timeout uint16, if flow.FlowProtocol() == "" { copyFlowBuilderIPv6 := flow.CopyToBuilder(priorityNormal+2, false) copyFlowBuilderIPv6 = copyFlowBuilderIPv6.MatchProtocol(binding.ProtocolIPv6) - if c.ovsMetersAreSupported { + if f.ovsMetersAreSupported { copyFlowBuilderIPv6 = copyFlowBuilderIPv6.Action().Meter(PacketInMeterIDTF) } flows = append(flows, copyFlowBuilderIPv6.MatchIPDSCP(dataplaneTag). SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()). + Cookie(cookieID). Action().SendToController(uint8(PacketInReasonTF)). Done()) copyFlowBuilder = copyFlowBuilder.MatchProtocol(binding.ProtocolIP) } - if c.ovsMetersAreSupported { + if f.ovsMetersAreSupported { copyFlowBuilder = copyFlowBuilder.Action().Meter(PacketInMeterIDTF) } flows = append(flows, copyFlowBuilder.MatchIPDSCP(dataplaneTag). SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()). + Cookie(cookieID). Action().SendToController(uint8(PacketInReasonTF)). Done()) } @@ -936,428 +1171,233 @@ func (c *client) traceflowNetworkPolicyFlows(dataplaneTag uint8, timeout uint16, return flows } -// serviceLBBypassFlows makes packets that belong to a tracked connection bypass -// service LB tables and enter egressRuleTable directly. -func (c *client) serviceLBBypassFlows(ipProtocol binding.Protocol) []binding.Flow { - flows := []binding.Flow{ - // Tracked connections with the ServiceCTMark (load-balanced by AntreaProxy) receive - // the macRewriteMark and are sent to egressRuleTable. - ConntrackStateTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchCTMark(ServiceCTMark). - MatchCTStateNew(false).MatchCTStateTrk(true). - Action().LoadRegMark(RewriteMACRegMark). - Action().GotoTable(EgressRuleTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done(), - // Tracked connections without the ServiceCTMark are sent to egressRuleTable - // directly. This is meant to match connections which were load-balanced by - // kube-proxy before AntreaProxy got enabled. - ConntrackStateTable.BuildFlow(priorityLow).MatchProtocol(ipProtocol). - MatchCTStateNew(false).MatchCTStateTrk(true). - Action().GotoTable(EgressRuleTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done(), - } - return flows -} - -// l2ForwardCalcFlow generates the flow that matches dst MAC and loads ofPort to reg. -func (c *client) l2ForwardCalcFlow(dstMAC net.HardwareAddr, ofPort uint32, skipIngressRules bool, category cookie.Category) binding.Flow { - nextTable := L2ForwardingCalcTable.GetNext() - if !skipIngressRules { - // Go to ingress NetworkPolicy tables for traffic to local Pods. - nextTable = c.ingressEntryTable - } - return L2ForwardingCalcTable.BuildFlow(priorityNormal). +// l2ForwardCalcFlow generates the flow to match the destination MAC and load the target ofPort to TargetOFPortField. +func (f *featurePodConnectivity) l2ForwardCalcFlow(dstMAC net.HardwareAddr, ofPort uint32) binding.Flow { + return L2ForwardingCalcTable.ofTable.BuildFlow(priorityNormal). + Cookie(f.cookieAllocator.Request(f.category).Raw()). MatchDstMAC(dstMAC). Action().LoadToRegField(TargetOFPortField, ofPort). Action().LoadRegMark(OFPortFoundRegMark). - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(category).Raw()). + Action().NextTable(). Done() - // Broadcast, multicast, and unknown unicast packets will be dropped by - // the default flow of L2ForwardingOutTable. -} - -// traceflowL2ForwardOutputFlows generates Traceflow specific flows that outputs traceflow packets -// to OVS port and Antrea Agent after L2forwarding calculation. -func (c *client) traceflowL2ForwardOutputFlows(dataplaneTag uint8, liveTraffic, droppedOnly bool, timeout uint16, category cookie.Category) []binding.Flow { - flows := []binding.Flow{} - for _, ipProtocol := range c.ipProtocols { - if c.networkConfig.TrafficEncapMode.SupportsEncap() { - // SendToController and Output if output port is tunnel port. - fb1 := L2ForwardingOutTable.BuildFlow(priorityNormal+3). - MatchRegFieldWithValue(TargetOFPortField, config.DefaultTunOFPort). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(OFPortFoundRegMark). - Action().OutputToRegField(TargetOFPortField). - Cookie(c.cookieAllocator.Request(category).Raw()) - // For injected packets, only SendToController if output port is local - // gateway. In encapMode, a Traceflow packet going out of the gateway - // port (i.e. exiting the overlay) essentially means that the Traceflow - // request is complete. - fb2 := L2ForwardingOutTable.BuildFlow(priorityNormal+2). - MatchRegFieldWithValue(TargetOFPortField, config.HostGatewayOFPort). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(OFPortFoundRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()) - - // Do not send to controller if captures only dropped packet. - if !droppedOnly { - if c.ovsMetersAreSupported { - fb1 = fb1.Action().Meter(PacketInMeterIDTF) - fb2 = fb2.Action().Meter(PacketInMeterIDTF) - } - fb1 = fb1.Action().SendToController(uint8(PacketInReasonTF)) - fb2 = fb2.Action().SendToController(uint8(PacketInReasonTF)) - } - if liveTraffic { - // Clear the loaded DSCP bits before output. - fb2 = fb2.Action().LoadIPDSCP(0). - Action().OutputToRegField(TargetOFPortField) - } - flows = append(flows, fb1.Done(), fb2.Done()) - } else { - // SendToController and Output if output port is local gateway. Unlike in - // encapMode, inter-Node Pod-to-Pod traffic is expected to go out of the - // gateway port on the way to its destination. - fb1 := L2ForwardingOutTable.BuildFlow(priorityNormal+2). - MatchRegFieldWithValue(TargetOFPortField, config.HostGatewayOFPort). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(OFPortFoundRegMark). - Action().OutputToRegField(TargetOFPortField). - Cookie(c.cookieAllocator.Request(category).Raw()) - if !droppedOnly { - if c.ovsMetersAreSupported { - fb1 = fb1.Action().Meter(PacketInMeterIDTF) - } - fb1 = fb1.Action().SendToController(uint8(PacketInReasonTF)) - } - flows = append(flows, fb1.Done()) - } - // Only SendToController if output port is local gateway and destination IP is gateway. - gatewayIP := c.nodeConfig.GatewayConfig.IPv4 - if ipProtocol == binding.ProtocolIPv6 { - gatewayIP = c.nodeConfig.GatewayConfig.IPv6 - } - if gatewayIP != nil { - fb := L2ForwardingOutTable.BuildFlow(priorityNormal+3). - MatchRegFieldWithValue(TargetOFPortField, config.HostGatewayOFPort). - MatchDstIP(gatewayIP). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(OFPortFoundRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()) - if !droppedOnly { - if c.ovsMetersAreSupported { - fb = fb.Action().Meter(PacketInMeterIDTF) - } - fb = fb.Action().SendToController(uint8(PacketInReasonTF)) - } - if liveTraffic { - fb = fb.Action().LoadIPDSCP(0). - Action().OutputToRegField(TargetOFPortField) - } - flows = append(flows, fb.Done()) - } - // Only SendToController if output port is Pod port. - fb := L2ForwardingOutTable.BuildFlow(priorityNormal + 2). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(OFPortFoundRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()) - if !droppedOnly { - if c.ovsMetersAreSupported { - fb = fb.Action().Meter(PacketInMeterIDTF) - } - fb = fb.Action().SendToController(uint8(PacketInReasonTF)) - } - if liveTraffic { - fb = fb.Action().LoadIPDSCP(0). - Action().OutputToRegField(TargetOFPortField) - } - flows = append(flows, fb.Done()) - if c.enableProxy { - // Only SendToController for hairpin traffic. - // This flow must have higher priority than the one installed by l2ForwardOutputServiceHairpinFlow - fbHairpin := L2ForwardingOutTable.BuildFlow(priorityHigh + 2). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(HairpinRegMark). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()) - if !droppedOnly { - if c.ovsMetersAreSupported { - fbHairpin = fbHairpin.Action().Meter(PacketInMeterIDTF) - } - fbHairpin = fbHairpin.Action().SendToController(uint8(PacketInReasonTF)) - } - if liveTraffic { - fbHairpin = fbHairpin.Action().LoadIPDSCP(0). - Action().OutputInPort() - } - flows = append(flows, fbHairpin.Done()) - } - } - return flows } -// l2ForwardOutputServiceHairpinFlow uses in_port action for Service -// hairpin packets to avoid packets from being dropped by OVS. -func (c *client) l2ForwardOutputServiceHairpinFlow() binding.Flow { - return L2ForwardingOutTable.BuildFlow(priorityHigh). - MatchRegMark(HairpinRegMark). +// l2ForwardOutputHairpinServiceFlow generates the flow to output the packet of hairpin Service connection with IN_PORT +// action. +func (f *featureService) l2ForwardOutputHairpinServiceFlow() binding.Flow { + return L2ForwardingOutTable.ofTable.BuildFlow(priorityHigh). + Cookie(f.cookieAllocator.Request(f.category).Raw()). + MatchCTMark(HairpinCTMark). Action().OutputInPort(). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). Done() } -// l2ForwardOutputFlows generates the flows that output packets to OVS port after L2 forwarding calculation. -func (c *client) l2ForwardOutputFlows(category cookie.Category) []binding.Flow { - var flows []binding.Flow - flows = append(flows, - L2ForwardingOutTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). - MatchRegMark(OFPortFoundRegMark). - Action().OutputToRegField(TargetOFPortField). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - L2ForwardingOutTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIPv6). - MatchRegMark(OFPortFoundRegMark). - Action().OutputToRegField(TargetOFPortField). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ) - return flows +// l2ForwardOutputFlow generates the flow to output the packets to target OVS port according to the value of TargetOFPortField. +func (f *featurePodConnectivity) l2ForwardOutputFlow() binding.Flow { + return L2ForwardingOutTable.ofTable.BuildFlow(priorityNormal). + Cookie(f.cookieAllocator.Request(f.category).Raw()). + MatchRegMark(OFPortFoundRegMark). + Action().OutputToRegField(TargetOFPortField). + Done() } -// l3FwdFlowToPod generates the L3 forward flows for traffic from tunnel to a -// local Pod. It rewrites the destination MAC (should be GlobalVirtualMAC) to -// the Pod interface MAC, and rewrites the source MAC to the gateway interface -// MAC. -func (c *client) l3FwdFlowToPod(localGatewayMAC net.HardwareAddr, podInterfaceIPs []net.IP, podInterfaceMAC net.HardwareAddr, category cookie.Category) []binding.Flow { +// l3FwdFlowToPod generates the flows to match the packets destined for a local Pod. For a per-Node IPAM Pod, the flow +// rewrites destination MAC to the Pod interface's MAC, and rewrites source MAC to Antrea gateway interface's MAC. For +// an Antrea IPAM Pod, the flow only rewrites the destination MAC to the Pod interface's MAC. +func (f *featurePodConnectivity) l3FwdFlowToPod(localGatewayMAC net.HardwareAddr, + podInterfaceIPs []net.IP, + podInterfaceMAC net.HardwareAddr, + isAntreaFlexibleIPAM bool) []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() var flows []binding.Flow for _, ip := range podInterfaceIPs { ipProtocol := getIPProtocol(ip) - flowBuilder := L3ForwardingTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol) - if !c.connectUplinkToBridge { - // dstMAC will be overwritten always for AntreaFlexibleIPAM - flowBuilder = flowBuilder.MatchRegMark(RewriteMACRegMark) + if isAntreaFlexibleIPAM { + // This generates the flow to match the packets destined for a local Antrea IPAM Pod. + flows = append(flows, L3ForwardingTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchDstIP(ip). + Action().SetDstMAC(podInterfaceMAC). + Action().GotoTable(L3DecTTLTable.GetID()). + Done()) + } else { + // This generates the flow to match the packets with RewriteMACRegMark and destined for a local per-Node IPAM Pod. + flows = append(flows, L3ForwardingTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchRegMark(RewriteMACRegMark). + MatchDstIP(ip). + Action().SetSrcMAC(localGatewayMAC). + Action().SetDstMAC(podInterfaceMAC). + Action().GotoTable(L3DecTTLTable.GetID()). + Done()) } - flow := flowBuilder.MatchDstIP(ip). - Action().SetSrcMAC(localGatewayMAC). - // Rewrite src MAC to local gateway MAC, and rewrite dst MAC to pod MAC - Action().SetDstMAC(podInterfaceMAC). - Action().GotoTable(L3DecTTLTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() - flows = append(flows, flow) } return flows } -// l3FwdFlowRouteToPod generates the flows to route the traffic to a Pod based on -// the destination IP. It rewrites the destination MAC of the packets to the Pod -// interface MAC. The flow is used in the networkPolicyOnly mode for the traffic -// from the gateway to a local Pod. -func (c *client) l3FwdFlowRouteToPod(podInterfaceIPs []net.IP, podInterfaceMAC net.HardwareAddr, category cookie.Category) []binding.Flow { +// l3FwdFlowRouteToPod generates the flows to match the packets destined for a Pod based on the destination IPs. It rewrites +// destination MAC to the Pod interface's MAC. The flows are used in networkPolicyOnly mode to match the packets from the +// Antrea gateway. +func (f *featurePodConnectivity) l3FwdFlowRouteToPod(podInterfaceIPs []net.IP, podInterfaceMAC net.HardwareAddr) []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() var flows []binding.Flow for _, ip := range podInterfaceIPs { ipProtocol := getIPProtocol(ip) - flows = append(flows, L3ForwardingTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). + flows = append(flows, L3ForwardingTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). MatchDstIP(ip). Action().SetDstMAC(podInterfaceMAC). - Action().GotoTable(L3DecTTLTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). + Action().NextTable(). Done()) } return flows } -// l3FwdFlowRouteToGW generates the flows to route the traffic to the gateway -// interface. It rewrites the destination MAC of the packets to the gateway -// interface MAC. The flow is used in the networkPolicyOnly mode for the traffic -// from a local Pod to remote Pods, Nodes, or external network. -func (c *client) l3FwdFlowRouteToGW(gwMAC net.HardwareAddr, category cookie.Category) []binding.Flow { +// l3FwdFlowRouteToGW generates the flows to match the packets destined for the Antrea gateway. It rewrites destination MAC +// to the Antrea gateway interface's MAC. The flows are used in networkPolicyOnly mode to match the packets sourced from a +// local Pod and destined for remote Pods, Nodes, or external network. +func (f *featurePodConnectivity) l3FwdFlowRouteToGW() []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() var flows []binding.Flow - for _, ipProto := range c.ipProtocols { - flows = append(flows, L3ForwardingTable.BuildFlow(priorityLow).MatchProtocol(ipProto). - Action().SetDstMAC(gwMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). + for _, ipProtocol := range f.ipProtocols { + flows = append(flows, L3ForwardingTable.ofTable.BuildFlow(priorityLow). + Cookie(cookieID). + MatchProtocol(ipProtocol). + Action().SetDstMAC(f.nodeConfig.GatewayConfig.MAC). + Action().LoadRegMark(ToGatewayRegMark). + Action().NextTable(). Done(), ) } return flows } -// l3FwdFlowToGateway generates the L3 forward flows to rewrite the destination MAC of the packets to the gateway interface -// MAC if the destination IP is the gateway IP or the connection was initiated through the gateway interface. -func (c *client) l3FwdFlowToGateway(localGatewayIPs []net.IP, localGatewayMAC net.HardwareAddr, category cookie.Category) []binding.Flow { +// l3FwdFlowToGateway generates the flows to match the packets destined for the Antrea gateway. +func (f *featurePodConnectivity) l3FwdFlowToGateway() []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() var flows []binding.Flow - for _, ip := range localGatewayIPs { - ipProtocol := getIPProtocol(ip) - flows = append(flows, L3ForwardingTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchRegMark(RewriteMACRegMark). - MatchDstIP(ip). - Action().SetDstMAC(localGatewayMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - // Rewrite the destination MAC address with the local host gateway MAC if the packet is in the reply direction and - // is marked with FromGatewayCTMark. This is for connections which were initiated through the gateway, to ensure that - // this reply traffic gets forwarded correctly (back to the host network namespace, through the gateway). In - // particular, it is necessary in the following 2 cases: - // 1) reply traffic for connections from a local Pod to a ClusterIP Service (when AntreaProxy is disabled and - // kube-proxy is used). In this case the destination IP address of the reply traffic is the Pod which initiated the - // connection to the Service (no SNAT). We need to make sure that these packets are sent back through the gateway - // so that the source IP can be rewritten (Service backend IP -> Service ClusterIP). - // 2) when hair-pinning is involved, i.e. connections between 2 local Pods, for which NAT is performed. This - // applies regardless of whether AntreaProxy is enabled or not, and thus also applies to Windows Nodes (for which - // AntreaProxy is enabled by default). One example is a Pod accessing a NodePort Service for which - // externalTrafficPolicy is set to Local, using the local Node's IP address. - for _, proto := range c.ipProtocols { - // The following two OpenFlow entries are a workaround for issue: https://github.com/antrea-io/antrea/issues/2981. - // The issue is a Windows OVS bug, which identifies a reply packet as "new" in conntrack, and mark the connection - // with "FromGatewayCTMark". The OVS datapath might drop the packet if the reply packet is actually form - // antrea-gw0 because the input_port and output port number are the same. This workaround doesn't write the - // dst MAC if the reply packet of a connection marked with "FromGatewayCTMark" but it enters OVS from antrea-gw0. - flows = append(flows, L3ForwardingTable.BuildFlow(priorityHigh).MatchProtocol(proto). - MatchRegMark(FromLocalRegMark). - MatchCTMark(FromGatewayCTMark). - MatchCTStateRpl(true).MatchCTStateTrk(true). - Action().SetDstMAC(localGatewayMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ) - - if c.networkConfig.TrafficEncapMode.SupportsEncap() { - flows = append(flows, L3ForwardingTable.BuildFlow(priorityHigh).MatchProtocol(proto). - MatchRegMark(FromTunnelRegMark). + for ipProtocol, gatewayIP := range f.gatewayIPs { + flows = append(flows, + // This generates the flow to match the packets destined for Antrea gateway. + L3ForwardingTable.ofTable.BuildFlow(priorityHigh). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchDstIP(gatewayIP). + Action().SetDstMAC(f.nodeConfig.GatewayConfig.MAC). + Action().LoadRegMark(ToGatewayRegMark). + Action().GotoTable(L3DecTTLTable.GetID()). + Done(), + // This generates the flow to match the reply packets of connection with FromGatewayCTMark. + L3ForwardingTable.ofTable.BuildFlow(priorityHigh). + Cookie(cookieID). + MatchProtocol(ipProtocol). MatchCTMark(FromGatewayCTMark). - MatchCTStateRpl(true).MatchCTStateTrk(true). - Action().SetDstMAC(localGatewayMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - - if c.connectUplinkToBridge { - flows = append(flows, L3ForwardingTable.BuildFlow(priorityHigh).MatchProtocol(proto). - MatchCTMark(FromBridgeCTMark). - MatchCTStateRpl(true).MatchCTStateTrk(true). - Action().SetDstMAC(c.nodeConfig.UplinkNetConfig.MAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } + MatchCTStateRpl(true). + MatchCTStateTrk(true). + Action().SetDstMAC(f.nodeConfig.GatewayConfig.MAC). + Action().LoadRegMark(ToGatewayRegMark). + Action().GotoTable(L3DecTTLTable.GetID()). + Done(), + ) } return flows } -// l3FwdFlowToRemote generates the L3 forward flow for traffic to a remote Node -// (Pods or gateway) through the tunnel. -func (c *client) l3FwdFlowToRemote( - localGatewayMAC net.HardwareAddr, - peerSubnet net.IPNet, - tunnelPeer net.IP, - category cookie.Category) binding.Flow { - ipProto := getIPProtocol(peerSubnet.IP) - return L3ForwardingTable.BuildFlow(priorityNormal).MatchProtocol(ipProto). +// l3FwdFlowToRemoteViaTun generates the flow to match the packets destined for remote Pods via tunnel. +func (f *featurePodConnectivity) l3FwdFlowToRemoteViaTun(localGatewayMAC net.HardwareAddr, peerSubnet net.IPNet, tunnelPeer net.IP) binding.Flow { + ipProtocol := getIPProtocol(peerSubnet.IP) + return L3ForwardingTable.ofTable.BuildFlow(priorityNormal). + Cookie(f.cookieAllocator.Request(f.category).Raw()). + MatchProtocol(ipProtocol). MatchDstIPNet(peerSubnet). - // Rewrite src MAC to local gateway MAC and rewrite dst MAC to virtual MAC. - Action().SetSrcMAC(localGatewayMAC). - Action().SetDstMAC(GlobalVirtualMAC). - // Flow based tunnel. Set tunnel destination. - Action().SetTunnelDst(tunnelPeer). + Action().SetSrcMAC(localGatewayMAC). // Rewrite src MAC to local gateway MAC. + Action().SetDstMAC(GlobalVirtualMAC). // Rewrite dst MAC to virtual MAC. + Action().SetTunnelDst(tunnelPeer). // Flow based tunnel. Set tunnel destination. + Action().LoadRegMark(ToTunnelRegMark). Action().GotoTable(L3DecTTLTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). Done() } -// l3FwdFlowToRemoteViaGW generates the L3 forward flow to support traffic to -// remote via gateway. It is used when the cross-Node traffic does not require -// encapsulation (in noEncap, networkPolicyOnly, or hybrid mode). -func (c *client) l3FwdFlowToRemoteViaGW( - localGatewayMAC net.HardwareAddr, - peerSubnet net.IPNet, - category cookie.Category, - isAntreaFlexibleIPAM bool) binding.Flow { - ipProto := getIPProtocol(peerSubnet.IP) - priority := priorityNormal - // AntreaFlexibleIPAM Pod -> Per-Node IPAM Pod traffic will be sent to remote Gw directly. - if isAntreaFlexibleIPAM { - priority = priorityHigh - } - flowBuilder := L3ForwardingTable.BuildFlow(priority).MatchProtocol(ipProto). +// l3FwdFlowToRemoteViaGW generates the flow to match the packets destined for remote Pods via the Antrea gateway. It is +// used when the cross-Node connections that do not require encapsulation (in noEncap, networkPolicyOnly, or hybrid mode). +func (f *featurePodConnectivity) l3FwdFlowToRemoteViaGW(localGatewayMAC net.HardwareAddr, peerSubnet net.IPNet) binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() + ipProtocol := getIPProtocol(peerSubnet.IP) + // This generates the flow to match the packets destined for remote Pods. Note that, this flow is installed in Linux Nodes + // or Windows Nodes whose remote Node's transport interface MAC is unknown. + fb := L3ForwardingTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). MatchDstIPNet(peerSubnet) - if isAntreaFlexibleIPAM { - flowBuilder = flowBuilder.MatchRegMark(AntreaFlexibleIPAMRegMark) + if f.connectUplinkToBridge { + fb = fb.MatchRegMark(NotAntreaFlexibleIPAMRegMark) // Exclude the packets from Antrea IPAM Pods. } - return flowBuilder.Action().SetDstMAC(localGatewayMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). + return fb.Action().SetDstMAC(localGatewayMAC). + Action().LoadRegMark(ToGatewayRegMark). + Action().NextTable(). Done() } -// l3FwdServiceDefaultFlowsViaGW generates the default L3 forward flows to support Service traffic to pass through Antrea gateway. -func (c *client) l3FwdServiceDefaultFlowsViaGW(ipProto binding.Protocol, category cookie.Category) []binding.Flow { - gatewayMAC := c.nodeConfig.GatewayConfig.MAC - - flows := []binding.Flow{ - // This flow is used to match the packets of Service traffic: - // - NodePort/LoadBalancer request packets which pass through Antrea gateway and the Service Endpoint is not on - // local Pod CIDR or any remote Pod CIDRs. - // - ClusterIP request packets which are from Antrea gateway and the Service Endpoint is not on local Pod CIDR - // or any remote Pod CIDRs. - // - ClusterIP/NodePort/LoadBalancer response packets from external network. - // The matched packets should leave through Antrea gateway, however, they also enter through Antrea gateway. This - // is hairpin traffic. - // Skip traffic from AntreaFlexibleIPAM Pods. - L3ForwardingTable.BuildFlow(priorityLow).MatchProtocol(ipProto). - MatchCTMark(ServiceCTMark). - MatchCTStateTrk(true). - MatchRegMark(RewriteMACRegMark). +// l3FwdFlowToRemoteViaUplink generates the flow to match the packets destined for remote Pods via uplink. It is used +// when the cross-Node connections that do not require encapsulation (in noEncap, networkPolicyOnly, hybrid mode). +func (f *featurePodConnectivity) l3FwdFlowToRemoteViaUplink(remoteGatewayMAC net.HardwareAddr, + peerSubnet net.IPNet, + isAntreaFlexibleIPAM bool) binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() + ipProtocol := getIPProtocol(peerSubnet.IP) + if !isAntreaFlexibleIPAM { + // This generates the flow to match the packets destined for remote Pods via uplink directly without passing + // through the Antrea gateway by rewriting destination MAC to remote Node Antrea gateway's MAC. Note that, + // this flow is only installed in Windows Nodes。 + return L3ForwardingTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). MatchRegMark(NotAntreaFlexibleIPAMRegMark). - Action().SetDstMAC(gatewayMAC). + MatchDstIPNet(peerSubnet). + Action().SetDstMAC(remoteGatewayMAC). Action().GotoTable(L3DecTTLTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), + Done() } - return flows + // This generates the flow to match the packets sourced Antrea IPAM Pods and destined for remote Pods, and rewrite + // the destination MAC to remote Node Antrea gateway's MAC. Note that, this flow is only used in Linux when AntreaIPAM + // is enabled. + return L3ForwardingTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchRegMark(AntreaFlexibleIPAMRegMark). + MatchDstIPNet(peerSubnet). + Action().SetDstMAC(remoteGatewayMAC). + Action().GotoTable(L3DecTTLTable.GetID()). + Done() } -// arpResponderFlow generates the ARP responder flow entry that replies request comes from local gateway for peer -// gateway MAC. -func (c *client) arpResponderFlow(peerGatewayIP net.IP, category cookie.Category) binding.Flow { - return arpResponderTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). +// arpResponderFlow generates the flow to reply to the ARP request with a MAC address for the target IP address. +func (f *featurePodConnectivity) arpResponderFlow(ipAddr net.IP, macAddr net.HardwareAddr) binding.Flow { + return ARPResponderTable.ofTable.BuildFlow(priorityNormal). + Cookie(f.cookieAllocator.Request(f.category).Raw()). + MatchProtocol(binding.ProtocolARP). MatchARPOp(arpOpRequest). - MatchARPTpa(peerGatewayIP). + MatchARPTpa(ipAddr). Action().Move(binding.NxmFieldSrcMAC, binding.NxmFieldDstMAC). - Action().SetSrcMAC(GlobalVirtualMAC). + Action().SetSrcMAC(macAddr). Action().LoadARPOperation(arpOpReply). Action().Move(binding.NxmFieldARPSha, binding.NxmFieldARPTha). - Action().SetARPSha(GlobalVirtualMAC). + Action().SetARPSha(macAddr). Action().Move(binding.NxmFieldARPSpa, binding.NxmFieldARPTpa). - Action().SetARPSpa(peerGatewayIP). + Action().SetARPSpa(ipAddr). Action().OutputInPort(). - Cookie(c.cookieAllocator.Request(category).Raw()). Done() } -// arpResponderStaticFlow generates ARP reply for any ARP request with the same global virtual MAC. -// This flow is used in policy-only mode, where traffic are routed via IP not MAC. -func (c *client) arpResponderStaticFlow(category cookie.Category) binding.Flow { - return arpResponderTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). +// arpResponderStaticFlow generates the flow to reply to any ARP request with the same global virtual MAC. It is used +// in policy-only mode, where traffic are routed via IP not MAC. +func (f *featurePodConnectivity) arpResponderStaticFlow() binding.Flow { + return ARPResponderTable.ofTable.BuildFlow(priorityNormal). + Cookie(f.cookieAllocator.Request(f.category).Raw()). + MatchProtocol(binding.ProtocolARP). MatchARPOp(arpOpRequest). Action().Move(binding.NxmFieldSrcMAC, binding.NxmFieldDstMAC). Action().SetSrcMAC(GlobalVirtualMAC). @@ -1368,34 +1408,38 @@ func (c *client) arpResponderStaticFlow(category cookie.Category) binding.Flow { Action().Move(binding.NxmFieldARPSpa, binding.NxmFieldARPTpa). Action().Move(SwapField.GetNXFieldName(), binding.NxmFieldARPSpa). Action().OutputInPort(). - Cookie(c.cookieAllocator.Request(category).Raw()). Done() - } -// podIPSpoofGuardFlow generates the flow to check IP traffic sent out from local pod. Traffic from host gateway interface -// will not be checked, since it might be pod to service traffic or host namespace traffic. -func (c *client) podIPSpoofGuardFlow(ifIPs []net.IP, ifMAC net.HardwareAddr, ifOFPort uint32, category cookie.Category) []binding.Flow { +// podIPSpoofGuardFlow generates the flow to check IP packets from local Pods. Packets from the Antrea gateway will not be +// checked, since it might be Pod to Service connection or host namespace connection. +func (f *featurePodConnectivity) podIPSpoofGuardFlow(ifIPs []net.IP, ifMAC net.HardwareAddr, ifOFPort uint32) []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() var flows []binding.Flow - for _, ifIP := range ifIPs { - ipProtocol := getIPProtocol(ifIP) - if ipProtocol == binding.ProtocolIP { - flows = append(flows, SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchInPort(ifOFPort). - MatchSrcMAC(ifMAC). - MatchSrcIP(ifIP). - Action().GotoTable(SpoofGuardTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } else if ipProtocol == binding.ProtocolIPv6 { - flows = append(flows, SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchInPort(ifOFPort). - MatchSrcMAC(ifMAC). - MatchSrcIP(ifIP). - Action().GotoTable(IPv6Table.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } + targetTables := make(map[binding.Protocol]uint8) + // - When IPv4 is enabled only, IPv6Table is not initialized. All packets should be forwarded to the next table of + // SpoofGuardTable. + // - When IPv6 is enabled only, IPv6Table is initialized, and it is the next table of SpoofGuardTable. All packets + // should be to IPv6Table. + // - When both IPv4 and IPv6 are enabled, IPv4 packets should skip IPv6Table (which is the next table of SpoofGuardTable) + // to avoid unnecessary overhead. + if len(f.ipProtocols) == 1 { + targetTables[f.ipProtocols[0]] = SpoofGuardTable.GetNext() + } else { + targetTables[binding.ProtocolIP] = IPv6Table.GetNext() + targetTables[binding.ProtocolIPv6] = IPv6Table.GetID() + } + + for _, ifIP := range ifIPs { + ipProtocol := getIPProtocol(ifIP) + flows = append(flows, SpoofGuardTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchInPort(ifOFPort). + MatchSrcMAC(ifMAC). + MatchSrcIP(ifIP). + Action().GotoTable(targetTables[ipProtocol]). + Done()) } return flows } @@ -1410,163 +1454,98 @@ func getIPProtocol(ip net.IP) binding.Protocol { return ipProtocol } -// serviceHairpinResponseDNATFlow generates the flow which transforms destination -// IP of the hairpin packet to the source IP. -func (c *client) serviceHairpinResponseDNATFlow(ipProtocol binding.Protocol) binding.Flow { - hpIP := hairpinIP - from := binding.NxmFieldSrcIPv4 - to := binding.NxmFieldDstIPv4 - if ipProtocol == binding.ProtocolIPv6 { - hpIP = hairpinIPv6 - from = binding.NxmFieldSrcIPv6 - to = binding.NxmFieldDstIPv6 - } - return ServiceHairpinTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchDstIP(hpIP). - Action().Move(from, to). - Action().LoadRegMark(HairpinRegMark). - Action().GotoTable(ServiceHairpinTable.GetNext()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done() -} - -// serviceHairpinRegSetFlows generates the flow to set the hairpin mark for the packet which is from Antrea gateway and -// its output interface is also Antrea gateway. In table L2ForwardingOutTable #110, a packet with hairpin mark will be -// sent out with action IN_PORT, otherwise the packet with action output will be dropped. -func (c *client) serviceHairpinRegSetFlows(ipProtocol binding.Protocol) binding.Flow { - return HairpinSNATTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchRegMark(FromGatewayRegMark). - MatchRegMark(ToGatewayRegMark). - Action().LoadRegMark(HairpinRegMark). - Action().GotoTable(L2ForwardingOutTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done() -} - -// gatewayARPSpoofGuardFlow generates the flow to check ARP traffic sent out from the local gateway interface. -func (c *client) gatewayARPSpoofGuardFlows(gatewayIP net.IP, gatewayMAC net.HardwareAddr, category cookie.Category) (flows []binding.Flow) { - flows = append(flows, SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). - MatchInPort(config.HostGatewayOFPort). - MatchARPSha(gatewayMAC). - MatchARPSpa(gatewayIP). - Action().GotoTable(arpResponderTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - if c.connectUplinkToBridge { - // When the uplink bridged and NodePort/HostPort is accessed, Node will send ARP request to HostGatewayOFPort - // with arp_spa=NodeIP. This flow is to accept these ARP requests. - flows = append(flows, SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). - MatchInPort(config.HostGatewayOFPort). - MatchARPSha(gatewayMAC). - MatchARPSpa(c.nodeConfig.NodeIPv4Addr.IP). - Action().GotoTable(arpResponderTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - return -} - -// arpSpoofGuardFlow generates the flow to check ARP traffic sent out from local pods interfaces. -func (c *client) arpSpoofGuardFlow(ifIP net.IP, ifMAC net.HardwareAddr, ifOFPort uint32, category cookie.Category) binding.Flow { - return SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). +// arpSpoofGuardFlow generates the flow to check the ARP packets sourced from local Pods or the Antrea gateway. +func (f *featurePodConnectivity) arpSpoofGuardFlow(ifIP net.IP, ifMAC net.HardwareAddr, ifOFPort uint32) binding.Flow { + return ARPSpoofGuardTable.ofTable.BuildFlow(priorityNormal). + Cookie(f.cookieAllocator.Request(f.category).Raw()). + MatchProtocol(binding.ProtocolARP). MatchInPort(ifOFPort). MatchARPSha(ifMAC). MatchARPSpa(ifIP). - Action().GotoTable(arpResponderTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). + Action().NextTable(). Done() } -// sessionAffinityReselectFlow generates the flow which resubmits the service accessing -// packet back to ServiceLBTable if there is no endpointDNAT flow matched. This -// case will occur if an Endpoint is removed and is the learned Endpoint +// sessionAffinityReselectFlow generates the flow which resubmits the Service accessing packet back to ServiceLBTable +// if there is no endpointDNAT flow matched. This case will occur if an Endpoint is removed and is the learned Endpoint // selection of the Service. -func (c *client) sessionAffinityReselectFlow() binding.Flow { - return EndpointDNATTable.BuildFlow(priorityLow). +func (f *featureService) sessionAffinityReselectFlow() binding.Flow { + return EndpointDNATTable.ofTable.BuildFlow(priorityLow). + Cookie(f.cookieAllocator.Request(f.category).Raw()). MatchRegMark(EpSelectedRegMark). Action().LoadRegMark(EpToSelectRegMark). - Action().ResubmitToTable(ServiceLBTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + Action().ResubmitToTables(ServiceLBTable.GetID()). Done() } -// gatewayIPSpoofGuardFlow generates the flow to skip spoof guard checking for traffic sent from gateway interface. -func (c *client) gatewayIPSpoofGuardFlows(category cookie.Category) []binding.Flow { +// gatewayIPSpoofGuardFlows generates the flow to skip spoof guard checking for packets from the Antrea gateway. +func (f *featurePodConnectivity) gatewayIPSpoofGuardFlows() []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() var flows []binding.Flow - for _, proto := range c.ipProtocols { - nextTable := SpoofGuardTable.GetNext() - if proto == binding.ProtocolIPv6 { - nextTable = IPv6Table.GetID() - } - flows = append(flows, - SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(proto). - MatchInPort(config.HostGatewayOFPort). - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ) + targetTables := make(map[binding.Protocol]uint8) + // - When IPv4 is enabled only, IPv6Table is not initialized. All packets should be forwarded to the next table of + // SpoofGuardTable. + // - When IPv6 is enabled only, IPv6Table is initialized, and it is the next table of SpoofGuardTable. All packets + // should be to IPv6Table. + // - When both IPv4 and IPv6 are enabled, IPv4 packets should skip IPv6Table (which is the next table of SpoofGuardTable) + // to avoid unnecessary overhead. + if len(f.ipProtocols) == 1 { + targetTables[f.ipProtocols[0]] = SpoofGuardTable.GetNext() + } else { + targetTables[binding.ProtocolIP] = IPv6Table.GetNext() + targetTables[binding.ProtocolIPv6] = IPv6Table.GetID() + } + + for _, ipProtocol := range f.ipProtocols { + flows = append(flows, SpoofGuardTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchInPort(config.HostGatewayOFPort). + Action().GotoTable(targetTables[ipProtocol]). + Done()) } return flows } -// serviceCIDRDNATFlow generates flows to match dst IP in service CIDR and output to host gateway interface directly. -func (c *client) serviceCIDRDNATFlows(serviceCIDRs []*net.IPNet) []binding.Flow { +// serviceCIDRDNATFlows generates the flows to match destination IP in Service CIDR and output to the Antrea gateway directly. +func (f *featureService) serviceCIDRDNATFlows(serviceCIDRs []*net.IPNet) []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() var flows []binding.Flow for _, serviceCIDR := range serviceCIDRs { if serviceCIDR != nil { - ipProto := getIPProtocol(serviceCIDR.IP) - flows = append(flows, DNATTable.BuildFlow(priorityNormal).MatchProtocol(ipProto). + ipProtocol := getIPProtocol(serviceCIDR.IP) + flows = append(flows, DNATTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). MatchDstIPNet(*serviceCIDR). Action().LoadToRegField(TargetOFPortField, config.HostGatewayOFPort). Action().LoadRegMark(OFPortFoundRegMark). - Action().GotoTable(ConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + Action().GotoStage(stageConntrack). Done()) } } return flows } -// serviceNeedLBFlow generates flows to mark packets as LB needed. -func (c *client) serviceNeedLBFlow() binding.Flow { - return SessionAffinityTable.BuildFlow(priorityMiss). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). +// serviceNeedLBFlow generates the default flow to mark packets with EpToSelectRegMark. +func (f *featureService) serviceNeedLBFlow() binding.Flow { + return SessionAffinityTable.ofTable.BuildFlow(priorityMiss). + Cookie(f.cookieAllocator.Request(f.category).Raw()). Action().LoadRegMark(EpToSelectRegMark). Done() } -// arpResponderLocalFlows generates the ARP responder flow entry that replies request from local Pods for local -// gateway MAC. -// Only used in AntreaFlexibleIPAM to avoid multiple ARP replies from antrea-gw0 and uplink. -// TODO(gran): use better method to process ARP and support IPv6. -func (c *client) arpResponderLocalFlows(category cookie.Category) (flows []binding.Flow) { - if c.connectUplinkToBridge && c.nodeConfig.GatewayConfig.IPv4 != nil { - flows = append(flows, arpResponderTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). - MatchARPOp(1). - MatchARPTpa(c.nodeConfig.GatewayConfig.IPv4). - Action().Move(binding.NxmFieldSrcMAC, binding.NxmFieldDstMAC). - Action().SetSrcMAC(c.nodeConfig.GatewayConfig.MAC). - Action().LoadARPOperation(2). - Action().Move(binding.NxmFieldARPSha, binding.NxmFieldARPTha). - Action().SetARPSha(c.nodeConfig.GatewayConfig.MAC). - Action().Move(binding.NxmFieldARPSpa, binding.NxmFieldARPTpa). - Action().SetARPSpa(c.nodeConfig.GatewayConfig.IPv4). - Action().OutputInPort(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - return -} - -// arpNormalFlow generates the flow to response arp in normal way if no flow in arpResponderTable is matched. -func (c *client) arpNormalFlow(category cookie.Category) binding.Flow { - return arpResponderTable.BuildFlow(priorityLow).MatchProtocol(binding.ProtocolARP). +// arpNormalFlow generates the flow to reply to the ARP request packets in normal way if no flow in ARPResponderTable is matched. +func (f *featurePodConnectivity) arpNormalFlow() binding.Flow { + return ARPResponderTable.ofTable.BuildFlow(priorityLow). + Cookie(f.cookieAllocator.Request(f.category).Raw()). + MatchProtocol(binding.ProtocolARP). Action().Normal(). - Cookie(c.cookieAllocator.Request(category).Raw()). Done() } -func (c *client) allowRulesMetricFlows(conjunctionID uint32, ingress bool) []binding.Flow { +func (f *featureNetworkPolicy) allowRulesMetricFlows(conjunctionID uint32, ingress bool) []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() metricTable := IngressMetricTable offset := 0 // We use the 0..31 bits of the ct_label to store the ingress rule ID and use the 32..63 bits to store the @@ -1578,12 +1557,12 @@ func (c *client) allowRulesMetricFlows(conjunctionID uint32, ingress bool) []bin field = EgressRuleCTLabel } metricFlow := func(isCTNew bool, protocol binding.Protocol) binding.Flow { - return metricTable.BuildFlow(priorityNormal). + return metricTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). MatchProtocol(protocol). MatchCTStateNew(isCTNew). MatchCTLabelField(0, uint64(conjunctionID)<> output port 4294967294 is out of range // Datapath actions: drop // TODO(gran): support Traceflow - L2ForwardingCalcTable.BuildFlow(priorityNormal). - MatchDstMAC(c.nodeConfig.UplinkNetConfig.MAC). + L2ForwardingCalcTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchDstMAC(f.nodeConfig.UplinkNetConfig.MAC). Action().LoadToRegField(TargetOFPortField, config.BridgeOFPort). Action().LoadRegMark(OFPortFoundRegMark). - Action().GotoTable(ConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). + Action().GotoStage(stageConntrack). Done(), - L2ForwardingOutTable.BuildFlow(priorityHigh).MatchProtocol(binding.ProtocolIP). - MatchRegMark(ToBridgeRegMark). + L2ForwardingOutTable.ofTable.BuildFlow(priorityHigh). + Cookie(cookieID). + MatchProtocol(binding.ProtocolIP). + MatchRegMark(OutputToBridgeRegMark). MatchRegMark(OFPortFoundRegMark). Action().Output(config.BridgeOFPort). - Cookie(c.cookieAllocator.Request(category).Raw()). Done(), // Handle outgoing packet from AntreaFlexibleIPAM Pods. Broadcast is not supported. - L2ForwardingCalcTable.BuildFlow(priorityLow). + L2ForwardingCalcTable.ofTable.BuildFlow(priorityLow). + Cookie(cookieID). MatchRegMark(AntreaFlexibleIPAMRegMark). Action().LoadToRegField(TargetOFPortField, config.UplinkOFPort). Action().LoadRegMark(OFPortFoundRegMark). - Action().GotoTable(ConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). + Action().GotoStage(stageConntrack). Done()) return flows } -func (c *client) l3FwdFlowToRemoteViaRouting(localGatewayMAC net.HardwareAddr, remoteGatewayMAC net.HardwareAddr, - category cookie.Category, peerIP net.IP, peerPodCIDR *net.IPNet) []binding.Flow { - return []binding.Flow{c.l3FwdFlowToRemoteViaGW(localGatewayMAC, *peerPodCIDR, category, false)} +func (f *featurePodConnectivity) l3FwdFlowToRemoteViaRouting(localGatewayMAC net.HardwareAddr, + remoteGatewayMAC net.HardwareAddr, + peerIP net.IP, + peerPodCIDR *net.IPNet) []binding.Flow { + return []binding.Flow{f.l3FwdFlowToRemoteViaGW(localGatewayMAC, *peerPodCIDR)} } diff --git a/pkg/agent/openflow/pipeline_test.go b/pkg/agent/openflow/pipeline_test.go new file mode 100644 index 00000000000..32e9e446604 --- /dev/null +++ b/pkg/agent/openflow/pipeline_test.go @@ -0,0 +1,265 @@ +// Copyright 2022 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + binding "antrea.io/antrea/pkg/ovs/openflow" +) + +type ipStack int + +const ( + ipv4Only ipStack = iota + ipv6Only + dualStack +) + +func TestBuildPipeline(t *testing.T) { + ipStackMap := map[ipStack][]binding.Protocol{ + ipv4Only: {binding.ProtocolIP}, + ipv6Only: {binding.ProtocolIPv6}, + dualStack: {binding.ProtocolIP, binding.ProtocolIPv6}, + } + for _, tc := range []struct { + ipStack ipStack + features []feature + expectedTables map[binding.PipelineID][]*Table + }{ + { + ipStack: dualStack, + features: []feature{ + &featurePodConnectivity{ipProtocols: ipStackMap[dualStack]}, + &featureNetworkPolicy{enableAntreaPolicy: true}, + &featureService{enableProxy: true, proxyAll: true}, + &featureEgress{}, + }, + expectedTables: map[binding.PipelineID][]*Table{ + pipelineRoot: { + PipelineRootClassifierTable, + }, + pipelineIP: { + ClassifierTable, + SpoofGuardTable, + IPv6Table, + SNATConntrackTable, + ConntrackTable, + ConntrackStateTable, + PreRoutingClassifierTable, + NodePortMarkTable, + SessionAffinityTable, + ServiceLBTable, + EndpointDNATTable, + AntreaPolicyEgressRuleTable, + EgressRuleTable, + EgressDefaultTable, + EgressMetricTable, + L3ForwardingTable, + EgressMarkTable, + L3DecTTLTable, + ServiceMarkTable, + SNATConntrackCommitTable, + L2ForwardingCalcTable, + AntreaPolicyIngressRuleTable, + IngressRuleTable, + IngressDefaultTable, + IngressMetricTable, + ConntrackCommitTable, + L2ForwardingOutTable, + }, + pipelineARP: { + ARPSpoofGuardTable, + ARPResponderTable, + }, + }, + }, + { + ipStack: ipv6Only, + features: []feature{ + &featurePodConnectivity{ipProtocols: ipStackMap[ipv6Only]}, + &featureNetworkPolicy{enableAntreaPolicy: true}, + &featureService{enableProxy: true, proxyAll: true}, + &featureEgress{}, + }, + expectedTables: map[binding.PipelineID][]*Table{ + pipelineRoot: { + PipelineRootClassifierTable, + }, + pipelineIP: { + ClassifierTable, + SpoofGuardTable, + IPv6Table, + SNATConntrackTable, + ConntrackTable, + ConntrackStateTable, + PreRoutingClassifierTable, + NodePortMarkTable, + SessionAffinityTable, + ServiceLBTable, + EndpointDNATTable, + AntreaPolicyEgressRuleTable, + EgressRuleTable, + EgressDefaultTable, + EgressMetricTable, + L3ForwardingTable, + EgressMarkTable, + L3DecTTLTable, + ServiceMarkTable, + SNATConntrackCommitTable, + L2ForwardingCalcTable, + AntreaPolicyIngressRuleTable, + IngressRuleTable, + IngressDefaultTable, + IngressMetricTable, + ConntrackCommitTable, + L2ForwardingOutTable, + }, + }, + }, + { + ipStack: ipv4Only, + features: []feature{ + &featurePodConnectivity{ipProtocols: ipStackMap[ipv4Only]}, + &featureNetworkPolicy{enableAntreaPolicy: true}, + &featureService{enableProxy: false}, + &featureEgress{}, + }, + expectedTables: map[binding.PipelineID][]*Table{ + pipelineRoot: { + PipelineRootClassifierTable, + }, + pipelineIP: { + ClassifierTable, + SpoofGuardTable, + ConntrackTable, + ConntrackStateTable, + DNATTable, + AntreaPolicyEgressRuleTable, + EgressRuleTable, + EgressDefaultTable, + EgressMetricTable, + L3ForwardingTable, + L3DecTTLTable, + L2ForwardingCalcTable, + AntreaPolicyIngressRuleTable, + IngressRuleTable, + IngressDefaultTable, + IngressMetricTable, + ConntrackCommitTable, + L2ForwardingOutTable, + }, + pipelineARP: { + ARPSpoofGuardTable, + ARPResponderTable, + }, + }, + }, + { + ipStack: ipv4Only, + features: []feature{ + &featurePodConnectivity{ipProtocols: ipStackMap[ipv4Only]}, + &featureNetworkPolicy{enableAntreaPolicy: true}, + &featureService{enableProxy: true, proxyAll: false}, + &featureEgress{}, + }, + expectedTables: map[binding.PipelineID][]*Table{ + pipelineRoot: { + PipelineRootClassifierTable, + }, + pipelineIP: { + ClassifierTable, + SpoofGuardTable, + SNATConntrackTable, + ConntrackTable, + ConntrackStateTable, + PreRoutingClassifierTable, + SessionAffinityTable, + ServiceLBTable, + EndpointDNATTable, + AntreaPolicyEgressRuleTable, + EgressRuleTable, + EgressDefaultTable, + EgressMetricTable, + L3ForwardingTable, + EgressMarkTable, + L3DecTTLTable, + ServiceMarkTable, + SNATConntrackCommitTable, + L2ForwardingCalcTable, + AntreaPolicyIngressRuleTable, + IngressRuleTable, + IngressDefaultTable, + IngressMetricTable, + ConntrackCommitTable, + L2ForwardingOutTable, + }, + pipelineARP: { + ARPSpoofGuardTable, + ARPResponderTable, + }, + }, + }, + } { + pipelineIDs := []binding.PipelineID{pipelineRoot, pipelineIP} + if tc.ipStack != ipv6Only { + pipelineIDs = append(pipelineIDs, pipelineARP) + } + pipelineRequiredTablesMap := make(map[binding.PipelineID]map[*Table]struct{}) + for _, pipelineID := range pipelineIDs { + pipelineRequiredTablesMap[pipelineID] = make(map[*Table]struct{}) + } + pipelineRequiredTablesMap[pipelineRoot][PipelineRootClassifierTable] = struct{}{} + for _, f := range tc.features { + for _, table := range f.getRequiredTables() { + if _, ok := pipelineRequiredTablesMap[table.pipeline]; ok { + pipelineRequiredTablesMap[table.pipeline][table] = struct{}{} + } + } + } + + for pipelineID := firstPipeline; pipelineID <= lastPipeline; pipelineID++ { + if _, ok := pipelineRequiredTablesMap[pipelineID]; !ok { + continue + } + var requiredTables []*Table + for _, table := range tableOrderCache[pipelineID] { + if _, ok := pipelineRequiredTablesMap[pipelineID][table]; ok { + requiredTables = append(requiredTables, table) + } + } + generatePipeline(pipelineID, requiredTables) + + tables := tc.expectedTables[pipelineID] + for i := 0; i < len(tables)-1; i++ { + require.NotNil(t, tables[i].ofTable, "table %q should be initialized", tables[i].name) + require.Less(t, tables[i].GetID(), tables[i+1].GetID(), fmt.Sprintf("id of table %q should less than that of table %q", tables[i].GetName(), tables[i+1].GetName())) + } + require.NotNil(t, tables[len(tables)-1].ofTable, "table %q should be initialized", tables[len(tables)-1].name) + } + reset() + } +} + +func reset() { + objs := tableCache.List() + for i := 0; i < len(objs); i++ { + tableCache.Delete(objs[i]) + } + binding.ResetTableID() +} diff --git a/pkg/agent/openflow/pipeline_windows.go b/pkg/agent/openflow/pipeline_windows.go index a3df5f3285c..486911d26ae 100644 --- a/pkg/agent/openflow/pipeline_windows.go +++ b/pkg/agent/openflow/pipeline_windows.go @@ -21,71 +21,86 @@ import ( "net" "antrea.io/antrea/pkg/agent/config" - "antrea.io/antrea/pkg/agent/openflow/cookie" binding "antrea.io/antrea/pkg/ovs/openflow" ) -// hostBridgeUplinkFlows generates the flows that forward traffic between the -// bridge local port and the uplink port to support the host traffic with -// outside. -func (c *client) hostBridgeUplinkFlows(localSubnetMap map[binding.Protocol]net.IPNet, category cookie.Category) (flows []binding.Flow) { - flows = []binding.Flow{ - ClassifierTable.BuildFlow(priorityNormal). +// hostBridgeUplinkFlows generates the flows that forward traffic between the bridge local port and the uplink port to +// support the host traffic with outside. +func (f *featurePodConnectivity) hostBridgeUplinkFlows(localSubnetMap map[binding.Protocol]net.IPNet) []binding.Flow { + cookieID := f.cookieAllocator.Request(f.category).Raw() + flows := f.hostBridgeLocalFlows() + flows = append(flows, + // This generates the flow to forward ARP packets from uplink port to bridge local port since uplink port is set + // to disable flood. + ARPSpoofGuardTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). MatchInPort(config.UplinkOFPort). Action().Output(config.BridgeOFPort). - Cookie(c.cookieAllocator.Request(category).Raw()). Done(), - ClassifierTable.BuildFlow(priorityNormal). + // This generates the flow to forward ARP packets from bridge local port to uplink port since uplink port is set + // to disable flood. + ARPSpoofGuardTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). MatchInPort(config.BridgeOFPort). Action().Output(config.UplinkOFPort). - Cookie(c.cookieAllocator.Request(category).Raw()). Done(), - } - if c.networkConfig.TrafficEncapMode.SupportsNoEncap() { + ) + if f.networkConfig.TrafficEncapMode.SupportsNoEncap() { // If NoEncap is enabled, the reply packets from remote Pod can be forwarded to local Pod directly. - // by explicitly resubmitting them to ServiceHairpinTable and marking "macRewriteMark" at same time. + // by explicitly resubmitting them to ConntrackState stage and marking "macRewriteMark" at same time. for ipProtocol, localSubnet := range localSubnetMap { - flows = append(flows, ClassifierTable.BuildFlow(priorityHigh). + flows = append(flows, ClassifierTable.ofTable.BuildFlow(priorityHigh). + Cookie(cookieID). MatchProtocol(ipProtocol). MatchInPort(config.UplinkOFPort). MatchDstIPNet(localSubnet). Action().LoadRegMark(FromUplinkRegMark). Action().LoadRegMark(RewriteMACRegMark). - Action().GotoTable(ServiceHairpinTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). + Action().GotoStage(stageConntrackState). Done()) } } return flows } -func (c *client) l3FwdFlowToRemoteViaRouting(localGatewayMAC net.HardwareAddr, remoteGatewayMAC net.HardwareAddr, - category cookie.Category, peerIP net.IP, peerPodCIDR *net.IPNet) []binding.Flow { - if c.networkConfig.NeedsDirectRoutingToPeer(peerIP, c.nodeConfig.NodeTransportIPv4Addr) && remoteGatewayMAC != nil { - ipProto := getIPProtocol(peerIP) +func (f *featurePodConnectivity) l3FwdFlowToRemoteViaRouting(localGatewayMAC net.HardwareAddr, + remoteGatewayMAC net.HardwareAddr, + peerIP net.IP, + peerPodCIDR *net.IPNet) []binding.Flow { + var flows []binding.Flow + + if f.networkConfig.NeedsDirectRoutingToPeer(peerIP, f.nodeConfig.NodeTransportIPv4Addr) && remoteGatewayMAC != nil { + ipProtocol := getIPProtocol(peerIP) + cookieID := f.cookieAllocator.Request(f.category).Raw() // It enhances Windows Noencap mode performance by bypassing host network. - flows := []binding.Flow{L2ForwardingCalcTable.BuildFlow(priorityNormal). - MatchDstMAC(remoteGatewayMAC). - Action().LoadToRegField(TargetOFPortField, config.UplinkOFPort). - Action().LoadRegMark(OFPortFoundRegMark). - Action().GotoTable(ConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), + flows = append(flows, // Output the reply packet to the uplink interface if the destination is another Node's IP. // This is for the scenario that another Node directly accesses Pods on this Node. Since the request // packet enters OVS from the uplink interface, the reply should go back in the same path. Otherwise, // Windows host will perform stateless SNAT on the reply, and the packets are possibly dropped on peer // Node because of the wrong source address. - L3ForwardingTable.BuildFlow(priorityNormal).MatchProtocol(ipProto). + L3ForwardingTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchProtocol(ipProtocol). MatchDstIP(peerIP). - MatchCTStateRpl(true).MatchCTStateTrk(true). + MatchCTStateRpl(true). + MatchCTStateTrk(true). Action().SetDstMAC(remoteGatewayMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). + Action().NextTable(). Done(), - } - flows = append(flows, c.l3FwdFlowToRemoteViaGW(remoteGatewayMAC, *peerPodCIDR, category, false)) - return flows + // This generates the flow to match the packets destined for remote Node by matching destination MAC, then + // load the ofPort number of uplink to TargetOFPortField. + L2ForwardingCalcTable.ofTable.BuildFlow(priorityNormal). + Cookie(cookieID). + MatchDstMAC(remoteGatewayMAC). + Action().LoadToRegField(TargetOFPortField, config.UplinkOFPort). + Action().LoadRegMark(OFPortFoundRegMark). + Action().GotoStage(stageConntrack). + Done(), + ) + flows = append(flows, f.l3FwdFlowToRemoteViaUplink(remoteGatewayMAC, *peerPodCIDR, false)) + } else { + flows = append(flows, f.l3FwdFlowToRemoteViaGW(localGatewayMAC, *peerPodCIDR)) } - return []binding.Flow{c.l3FwdFlowToRemoteViaGW(localGatewayMAC, *peerPodCIDR, category, false)} + return flows } diff --git a/pkg/agent/openflow/pod_connectivity.go b/pkg/agent/openflow/pod_connectivity.go new file mode 100644 index 00000000000..8c6c11aa394 --- /dev/null +++ b/pkg/agent/openflow/pod_connectivity.go @@ -0,0 +1,136 @@ +// Copyright 2022 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "net" + + "antrea.io/antrea/pkg/agent/config" + "antrea.io/antrea/pkg/agent/openflow/cookie" + binding "antrea.io/antrea/pkg/ovs/openflow" +) + +type featurePodConnectivity struct { + cookieAllocator cookie.Allocator + ipProtocols []binding.Protocol + + nodeCachedFlows *flowCategoryCache + podCachedFlows *flowCategoryCache + fixedFlows []binding.Flow + + gatewayIPs map[binding.Protocol]net.IP + ctZones map[binding.Protocol]int + localCIDRs map[binding.Protocol]net.IPNet + nodeIPs map[binding.Protocol]net.IP + nodeConfig *config.NodeConfig + networkConfig *config.NetworkConfig + + connectUplinkToBridge bool + enableMulticast bool + + category cookie.Category +} + +func (f *featurePodConnectivity) getFeatureName() string { + return "PodConnectivity" +} + +func newFeaturePodConnectivity( + cookieAllocator cookie.Allocator, + ipProtocols []binding.Protocol, + nodeConfig *config.NodeConfig, + networkConfig *config.NetworkConfig, + connectUplinkToBridge bool, + enableMulticast bool) *featurePodConnectivity { + ctZones := make(map[binding.Protocol]int) + gatewayIPs := make(map[binding.Protocol]net.IP) + localCIDRs := make(map[binding.Protocol]net.IPNet) + nodeIPs := make(map[binding.Protocol]net.IP) + for _, ipProtocol := range ipProtocols { + if ipProtocol == binding.ProtocolIP { + ctZones[ipProtocol] = CtZone + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv4 + localCIDRs[ipProtocol] = *nodeConfig.PodIPv4CIDR + nodeIPs[ipProtocol] = nodeConfig.NodeIPv4Addr.IP + } else if ipProtocol == binding.ProtocolIPv6 { + ctZones[ipProtocol] = CtZoneV6 + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv6 + localCIDRs[ipProtocol] = *nodeConfig.PodIPv6CIDR + nodeIPs[ipProtocol] = nodeConfig.NodeIPv6Addr.IP + } + } + + return &featurePodConnectivity{ + cookieAllocator: cookieAllocator, + ipProtocols: ipProtocols, + nodeCachedFlows: newFlowCategoryCache(), + podCachedFlows: newFlowCategoryCache(), + gatewayIPs: gatewayIPs, + ctZones: ctZones, + localCIDRs: localCIDRs, + nodeIPs: nodeIPs, + nodeConfig: nodeConfig, + networkConfig: networkConfig, + connectUplinkToBridge: connectUplinkToBridge, + enableMulticast: enableMulticast, + category: cookie.PodConnectivity, + } +} + +func (f *featurePodConnectivity) initFlows() []binding.Flow { + var flows []binding.Flow + + for _, ipProtocol := range f.ipProtocols { + if ipProtocol == binding.ProtocolIPv6 { + flows = append(flows, f.ipv6Flows()...) + } else if ipProtocol == binding.ProtocolIP { + flows = append(flows, f.arpNormalFlow()) + if f.connectUplinkToBridge { + flows = append(flows, f.arpResponderFlow(f.nodeConfig.GatewayConfig.IPv4, f.nodeConfig.GatewayConfig.MAC)) + } + } + } + flows = append(flows, f.l3FwdFlowToLocalPodCIDR()...) + if f.connectUplinkToBridge { + flows = append(flows, f.l3FwdFlowToNode()...) + } + flows = append(flows, f.l3FwdFlowToExternal()) + flows = append(flows, f.decTTLFlows()...) + flows = append(flows, f.conntrackFlows()...) + flows = append(flows, f.l2ForwardOutputFlow()) + if f.networkConfig.TrafficEncapMode.IsNetworkPolicyOnly() { + flows = append(flows, f.l3FwdFlowRouteToGW()...) + // If IPv6 is enabled, this flow will never get hit. + // Replies any ARP request with the same global virtual MAC. + flows = append(flows, f.arpResponderStaticFlow()) + } + return flows +} + +func (f *featurePodConnectivity) replayFlows() []binding.Flow { + var flows []binding.Flow + + // Get fixed flows. + for _, flow := range f.fixedFlows { + flow.Reset() + flows = append(flows, flow) + } + // Get cached flows. + for _, cachedFlows := range []*flowCategoryCache{f.nodeCachedFlows, f.podCachedFlows} { + flows = append(flows, getCachedFlows(cachedFlows)...) + } + + return flows +} diff --git a/pkg/agent/openflow/service.go b/pkg/agent/openflow/service.go new file mode 100644 index 00000000000..23b23e44313 --- /dev/null +++ b/pkg/agent/openflow/service.go @@ -0,0 +1,133 @@ +// Copyright 2022 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "net" + "sync" + + "k8s.io/klog/v2" + + "antrea.io/antrea/pkg/agent/config" + "antrea.io/antrea/pkg/agent/openflow/cookie" + binding "antrea.io/antrea/pkg/ovs/openflow" +) + +type featureService struct { + cookieAllocator cookie.Allocator + ipProtocols []binding.Protocol + bridge binding.Bridge + + cachedFlows *flowCategoryCache + fixedFlows []binding.Flow + groupCache sync.Map + + gatewayIPs map[binding.Protocol]net.IP + virtualIPs map[binding.Protocol]net.IP + dnatCtZones map[binding.Protocol]int + snatCtZones map[binding.Protocol]int + gatewayMAC net.HardwareAddr + + enableProxy bool + proxyAll bool + connectUplinkToBridge bool + + category cookie.Category +} + +func (f *featureService) getFeatureName() string { + return "Service" +} + +func newFeatureService( + cookieAllocator cookie.Allocator, + ipProtocols []binding.Protocol, + nodeConfig *config.NodeConfig, + bridge binding.Bridge, + enableProxy, + proxyAll, + connectUplinkToBridge bool) *featureService { + gatewayIPs := make(map[binding.Protocol]net.IP) + virtualIPs := make(map[binding.Protocol]net.IP) + dnatCtZones := make(map[binding.Protocol]int) + snatCtZones := make(map[binding.Protocol]int) + for _, ipProtocol := range ipProtocols { + if ipProtocol == binding.ProtocolIP { + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv4 + virtualIPs[ipProtocol] = config.VirtualServiceIPv4 + dnatCtZones[ipProtocol] = CtZone + snatCtZones[ipProtocol] = SNATCtZone + } else if ipProtocol == binding.ProtocolIPv6 { + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv6 + virtualIPs[ipProtocol] = config.VirtualServiceIPv6 + dnatCtZones[ipProtocol] = CtZoneV6 + snatCtZones[ipProtocol] = SNATCtZoneV6 + } + } + + return &featureService{ + cookieAllocator: cookieAllocator, + ipProtocols: ipProtocols, + bridge: bridge, + cachedFlows: newFlowCategoryCache(), + groupCache: sync.Map{}, + gatewayIPs: gatewayIPs, + virtualIPs: virtualIPs, + dnatCtZones: dnatCtZones, + snatCtZones: snatCtZones, + gatewayMAC: nodeConfig.GatewayConfig.MAC, + enableProxy: enableProxy, + proxyAll: proxyAll, + connectUplinkToBridge: connectUplinkToBridge, + category: cookie.Service, + } +} + +func (f *featureService) initFlows() []binding.Flow { + var flows []binding.Flow + if f.enableProxy { + flows = append(flows, f.conntrackFlows()...) + flows = append(flows, f.preRoutingClassifierFlows()...) + flows = append(flows, f.l3FwdFlowsToExternalEndpoint()...) + flows = append(flows, f.gatewaySNATFlows()...) + flows = append(flows, f.snatConntrackFlows()...) + } + return flows +} + +func (f *featureService) replayFlows() []binding.Flow { + var flows []binding.Flow + + // Get fixed flows. + for _, flow := range f.fixedFlows { + flow.Reset() + flows = append(flows, flow) + } + // Get cached flows. + flows = append(flows, getCachedFlows(f.cachedFlows)...) + + return flows +} + +func (f *featureService) replayGroups() { + f.groupCache.Range(func(id, value interface{}) bool { + group := value.(binding.Group) + group.Reset() + if err := group.Add(); err != nil { + klog.Errorf("Error when replaying cached group %d: %v", id, err) + } + return true + }) +} diff --git a/pkg/agent/openflow/traceflow.go b/pkg/agent/openflow/traceflow.go new file mode 100644 index 00000000000..e2fca257196 --- /dev/null +++ b/pkg/agent/openflow/traceflow.go @@ -0,0 +1,41 @@ +// Copyright 2022 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + binding "antrea.io/antrea/pkg/ovs/openflow" +) + +type featureTraceflow struct { + cachedFlows *flowCategoryCache +} + +func (f *featureTraceflow) getFeatureName() string { + return "Traceflow" +} + +func newFeatureTraceflow() *featureTraceflow { + return &featureTraceflow{ + cachedFlows: newFlowCategoryCache(), + } +} + +func (f *featureTraceflow) initFlows() []binding.Flow { + return []binding.Flow{} +} + +func (f *featureTraceflow) replayFlows() []binding.Flow { + return []binding.Flow{} +} diff --git a/pkg/ovs/openflow/interfaces.go b/pkg/ovs/openflow/interfaces.go index 5f24f90ec69..86993b9ec2e 100644 --- a/pkg/ovs/openflow/interfaces.go +++ b/pkg/ovs/openflow/interfaces.go @@ -51,10 +51,10 @@ const ( ) const ( - TableMissActionDrop MissActionType = iota + TableMissActionNone MissActionType = iota + TableMissActionDrop TableMissActionNormal TableMissActionNext - TableMissActionNone ) const ( @@ -148,6 +148,18 @@ type Table interface { GetNext() uint8 SetNext(next uint8) SetMissAction(action MissActionType) + GetStageID() StageID +} + +type PipelineID uint8 + +type StageID uint8 + +type Pipeline interface { + GetFirstTableInStage(id StageID) Table + GetFirstTable() Table + ListAllTables() []Table + IsLastTable(t Table) bool } type EntryType string @@ -196,7 +208,7 @@ type Action interface { Move(from, to string) FlowBuilder MoveRange(fromName, toName string, from, to Range) FlowBuilder Resubmit(port uint16, table uint8) FlowBuilder - ResubmitToTable(table uint8) FlowBuilder + ResubmitToTables(tables ...uint8) FlowBuilder CT(commit bool, tableID uint8, zone int) CTAction Drop() FlowBuilder Output(port uint32) FlowBuilder @@ -218,6 +230,8 @@ type Action interface { Group(id GroupIDType) FlowBuilder Learn(id uint8, priority uint16, idleTimeout, hardTimeout uint16, cookieID uint64) LearnAction GotoTable(table uint8) FlowBuilder + NextTable() FlowBuilder + GotoStage(stage StageID) FlowBuilder SendToController(reason uint8) FlowBuilder Note(notes string) FlowBuilder Meter(meterID uint32) FlowBuilder diff --git a/pkg/ovs/openflow/ofctrl_action.go b/pkg/ovs/openflow/ofctrl_action.go index 76e6e6f7ba5..e4f9fb4e538 100644 --- a/pkg/ovs/openflow/ofctrl_action.go +++ b/pkg/ovs/openflow/ofctrl_action.go @@ -305,8 +305,12 @@ func (a *ofFlowAction) Resubmit(ofPort uint16, tableID uint8) FlowBuilder { return a.builder } -func (a *ofFlowAction) ResubmitToTable(table uint8) FlowBuilder { - return a.Resubmit(openflow13.OFPP_IN_PORT, table) +func (a *ofFlowAction) ResubmitToTables(tables ...uint8) FlowBuilder { + var fb FlowBuilder + for _, t := range tables { + fb = a.Resubmit(openflow13.OFPP_IN_PORT, t) + } + return fb } // DecTTL is an action to decrease TTL. It is used in routing functions implemented by Openflow. @@ -574,3 +578,16 @@ func (a *ofFlowAction) GotoTable(tableID uint8) FlowBuilder { a.builder.ofFlow.Goto(tableID) return a.builder } + +func (a *ofFlowAction) NextTable() FlowBuilder { + tableID := a.builder.ofFlow.table.next + a.builder.ofFlow.Goto(tableID) + return a.builder +} + +func (a *ofFlowAction) GotoStage(stage StageID) FlowBuilder { + pipeline := pipelineCache[a.builder.ofFlow.table.pipelineID] + table := pipeline.GetFirstTableInStage(stage) + a.builder.ofFlow.Goto(table.GetID()) + return a.builder +} diff --git a/pkg/ovs/openflow/ofctrl_bridge.go b/pkg/ovs/openflow/ofctrl_bridge.go index 80674340308..c1b5e11dffb 100644 --- a/pkg/ovs/openflow/ofctrl_bridge.go +++ b/pkg/ovs/openflow/ofctrl_bridge.go @@ -43,6 +43,8 @@ type ofTable struct { missAction MissActionType flowCount uint updateTime time.Time + stageID StageID + pipelineID PipelineID *ofctrl.Table } @@ -83,6 +85,14 @@ func (t *ofTable) SetMissAction(action MissActionType) { t.missAction = action } +func (t *ofTable) GetStageID() StageID { + return t.stageID +} + +func (t *ofTable) GetPipelineID() PipelineID { + return t.pipelineID +} + func (t *ofTable) UpdateStatus(flowCountDelta int) { t.Lock() defer t.Unlock() @@ -141,10 +151,13 @@ func (t *ofTable) DumpFlows(cookieID, cookieMask uint64) (map[uint64]*FlowStates return flowStats, nil } -func NewOFTable(id uint8, name string) Table { +func NewOFTable(id uint8, name string, stageID StageID, pipelineID PipelineID, missAction MissActionType) Table { return &ofTable{ - id: id, - name: name, + id: id, + name: name, + stageID: stageID, + pipelineID: pipelineID, + missAction: missAction, } } @@ -738,3 +751,17 @@ func NewOFBridge(br string, mgmtAddr string) Bridge { s.controller = ofctrl.NewController(s) return s } + +var tableID uint8 + +func NextTableID() (id uint8) { + id = tableID + tableID += 1 + return +} + +// ResetTableID is used to reset the initial tableID so that the table ID increases from 0. +// This function is only for test. +func ResetTableID() { + tableID = 0 +} diff --git a/pkg/ovs/openflow/ofctrl_pipeline.go b/pkg/ovs/openflow/ofctrl_pipeline.go new file mode 100644 index 00000000000..770fba82bfd --- /dev/null +++ b/pkg/ovs/openflow/ofctrl_pipeline.go @@ -0,0 +1,57 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +var pipelineCache = make(map[PipelineID]*ofPipeline) + +type ofPipeline struct { + pipelineID PipelineID + tableMap map[StageID][]Table + tableList []Table +} + +func (p *ofPipeline) GetFirstTableInStage(id StageID) Table { + tables, ok := p.tableMap[id] + if ok { + return tables[0] + } + return nil +} + +func (p *ofPipeline) GetFirstTable() Table { + return p.tableList[0] +} + +func (p *ofPipeline) IsLastTable(t Table) bool { + return t.GetID() == p.tableList[len(p.tableList)-1].GetID() +} + +func (p *ofPipeline) ListAllTables() []Table { + return p.tableList +} + +func NewPipeline(id PipelineID, ofTables []Table) Pipeline { + tableMap := make(map[StageID][]Table) + for _, t := range ofTables { + sid := t.GetStageID() + tableMap[sid] = append(tableMap[sid], t) + } + p := &ofPipeline{pipelineID: id, + tableMap: tableMap, + tableList: ofTables, + } + pipelineCache[id] = p + return p +} diff --git a/pkg/ovs/openflow/testing/mock_openflow.go b/pkg/ovs/openflow/testing/mock_openflow.go index af365d0b222..a88cf1a59bb 100644 --- a/pkg/ovs/openflow/testing/mock_openflow.go +++ b/pkg/ovs/openflow/testing/mock_openflow.go @@ -410,6 +410,20 @@ func (mr *MockTableMockRecorder) GetNext() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetNext", reflect.TypeOf((*MockTable)(nil).GetNext)) } +// GetStageID mocks base method +func (m *MockTable) GetStageID() openflow.StageID { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetStageID") + ret0, _ := ret[0].(openflow.StageID) + return ret0 +} + +// GetStageID indicates an expected call of GetStageID +func (mr *MockTableMockRecorder) GetStageID() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetStageID", reflect.TypeOf((*MockTable)(nil).GetStageID)) +} + // SetMissAction mocks base method func (m *MockTable) SetMissAction(arg0 openflow.MissActionType) { m.ctrl.T.Helper() @@ -717,6 +731,20 @@ func (mr *MockActionMockRecorder) Drop() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Drop", reflect.TypeOf((*MockAction)(nil).Drop)) } +// GotoStage mocks base method +func (m *MockAction) GotoStage(arg0 openflow.StageID) openflow.FlowBuilder { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GotoStage", arg0) + ret0, _ := ret[0].(openflow.FlowBuilder) + return ret0 +} + +// GotoStage indicates an expected call of GotoStage +func (mr *MockActionMockRecorder) GotoStage(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GotoStage", reflect.TypeOf((*MockAction)(nil).GotoStage), arg0) +} + // GotoTable mocks base method func (m *MockAction) GotoTable(arg0 byte) openflow.FlowBuilder { m.ctrl.T.Helper() @@ -885,6 +913,20 @@ func (mr *MockActionMockRecorder) MoveRange(arg0, arg1, arg2, arg3 interface{}) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MoveRange", reflect.TypeOf((*MockAction)(nil).MoveRange), arg0, arg1, arg2, arg3) } +// NextTable mocks base method +func (m *MockAction) NextTable() openflow.FlowBuilder { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "NextTable") + ret0, _ := ret[0].(openflow.FlowBuilder) + return ret0 +} + +// NextTable indicates an expected call of NextTable +func (mr *MockActionMockRecorder) NextTable() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NextTable", reflect.TypeOf((*MockAction)(nil).NextTable)) +} + // Normal mocks base method func (m *MockAction) Normal() openflow.FlowBuilder { m.ctrl.T.Helper() @@ -983,18 +1025,22 @@ func (mr *MockActionMockRecorder) Resubmit(arg0, arg1 interface{}) *gomock.Call return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Resubmit", reflect.TypeOf((*MockAction)(nil).Resubmit), arg0, arg1) } -// ResubmitToTable mocks base method -func (m *MockAction) ResubmitToTable(arg0 byte) openflow.FlowBuilder { +// ResubmitToTables mocks base method +func (m *MockAction) ResubmitToTables(arg0 ...byte) openflow.FlowBuilder { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ResubmitToTable", arg0) + varargs := []interface{}{} + for _, a := range arg0 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "ResubmitToTables", varargs...) ret0, _ := ret[0].(openflow.FlowBuilder) return ret0 } -// ResubmitToTable indicates an expected call of ResubmitToTable -func (mr *MockActionMockRecorder) ResubmitToTable(arg0 interface{}) *gomock.Call { +// ResubmitToTables indicates an expected call of ResubmitToTables +func (mr *MockActionMockRecorder) ResubmitToTables(arg0 ...interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResubmitToTable", reflect.TypeOf((*MockAction)(nil).ResubmitToTable), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResubmitToTables", reflect.TypeOf((*MockAction)(nil).ResubmitToTables), arg0...) } // SendToController mocks base method diff --git a/test/e2e/basic_test.go b/test/e2e/basic_test.go index 8bb8bcdf953..c44ffab436b 100644 --- a/test/e2e/basic_test.go +++ b/test/e2e/basic_test.go @@ -763,7 +763,7 @@ func testGratuitousARP(t *testing.T, data *TestData, namespace string) { // be sent 100ms after processing CNI ADD request. time.Sleep(100 * time.Millisecond) - cmd := []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=10,arp,arp_spa=%s", podIP.ipv4.String())} + cmd := []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=ARPSpoofGuard,arp,arp_spa=%s", podIP.ipv4.String())} stdout, _, err := data.runCommandFromPod(antreaNamespace, antreaPodName, ovsContainerName, cmd) if err != nil { t.Fatalf("Error when querying openflow: %v", err) diff --git a/test/e2e/framework.go b/test/e2e/framework.go index 8d6458efaf6..85532bf4056 100644 --- a/test/e2e/framework.go +++ b/test/e2e/framework.go @@ -1520,6 +1520,11 @@ func (data *TestData) createNginxClusterIPService(name, namespace string, affini return data.createService(name, namespace, 80, 80, map[string]string{"app": "nginx"}, affinity, false, corev1.ServiceTypeClusterIP, ipFamily) } +// createAgnhostClusterIPService creates a ClusterIP agnhost service with the given name. +func (data *TestData) createAgnhostClusterIPService(serviceName string, affinity bool, ipFamily *corev1.IPFamily) (*corev1.Service, error) { + return data.createService(serviceName, testNamespace, 8080, 8080, map[string]string{"app": "agnhost"}, affinity, false, corev1.ServiceTypeClusterIP, ipFamily) +} + // createAgnhostNodePortService creates a NodePort agnhost service with the given name. func (data *TestData) createAgnhostNodePortService(serviceName string, affinity, nodeLocalExternal bool, ipFamily *corev1.IPFamily) (*corev1.Service, error) { return data.createService(serviceName, testNamespace, 8080, 8080, map[string]string{"app": "agnhost"}, affinity, nodeLocalExternal, corev1.ServiceTypeNodePort, ipFamily) diff --git a/test/e2e/performance_test.go b/test/e2e/performance_test.go index f2f1033cc0c..c09c8418ebf 100644 --- a/test/e2e/performance_test.go +++ b/test/e2e/performance_test.go @@ -28,6 +28,8 @@ import ( networkv1 "k8s.io/api/networking/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" + + "antrea.io/antrea/pkg/agent/openflow" ) const ( @@ -283,17 +285,17 @@ func WaitNetworkPolicyRealize(policyRules int, data *TestData) error { // checkRealize checks if all CIDR rules in the Network Policy have been realized as OVS flows. It counts the number of // flows installed in the ingressRuleTable of the OVS bridge of the control-plane Node. This relies on the implementation -// knowledge that given a single ingress policy, the Antrea agent will install exactly one flow per CIDR rule in table 90. -// checkRealize returns true when the number of flows exceeds the number of CIDR, because each table has a default flow -// entry which is used for default matching. +// knowledge that given a single ingress policy, the Antrea agent will install exactly one flow per CIDR rule in table +// IngressRule. checkRealize returns true when the number of flows exceeds the number of CIDR, because each table has a +// default flow entry which is used for default matching. // Since the check is done over SSH, the time measurement is not completely accurate. func checkRealize(policyRules int, data *TestData) (bool, error) { antreaPodName, err := data.getAntreaPodOnNode(controlPlaneNodeName()) if err != nil { return false, err } - // table 90 is the ingressRuleTable where the rules in workload network policy is being applied to. - cmd := []string{"ovs-ofctl", "dump-flows", defaultBridgeName, "table=90"} + // table IngressRule is the ingressRuleTable where the rules in workload network policy is being applied to. + cmd := []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%s", openflow.IngressRuleTable.GetName())} stdout, _, err := data.runCommandFromPod(antreaNamespace, antreaPodName, "antrea-agent", cmd) if err != nil { return false, err diff --git a/test/e2e/proxy_test.go b/test/e2e/proxy_test.go index 3fcab8aa016..12fb64f7698 100644 --- a/test/e2e/proxy_test.go +++ b/test/e2e/proxy_test.go @@ -33,8 +33,8 @@ import ( ) type expectTableFlows struct { - tableID int - flows []string + tableName string + flows []string } // TestProxy is the top-level test which contains all subtests for @@ -52,9 +52,6 @@ func TestProxy(t *testing.T) { t.Run("testProxyServiceSessionAffinityCase", func(t *testing.T) { testProxyServiceSessionAffinityCase(t, data) }) - t.Run("testProxyHairpinCase", func(t *testing.T) { - testProxyHairpinCase(t, data) - }) t.Run("testProxyEndpointLifeCycleCase", func(t *testing.T) { testProxyEndpointLifeCycleCase(t, data) }) @@ -112,20 +109,37 @@ func probeClientIPFromNode(node string, baseUrl string) (string, error) { return host, err } -func probeFromPod(data *TestData, pod string, url string) error { - _, _, err := data.runWgetCommandOnBusyboxWithRetry(pod, testNamespace, url, 5) +func probeFromPod(data *TestData, pod, container string, url string) error { + var err error + if container == busyboxContainerName { + _, _, err = data.runWgetCommandOnBusyboxWithRetry(pod, testNamespace, url, 5) + } else { + _, _, err = data.runCommandFromPod(testNamespace, pod, container, []string{"wget", "-O", "-", url, "-T", "5"}) + } return err } -func probeHostnameFromPod(data *TestData, pod string, baseUrl string) (string, error) { +func probeHostnameFromPod(data *TestData, pod, container string, baseUrl string) (string, error) { url := fmt.Sprintf("%s/%s", baseUrl, "hostname") - hostname, _, err := data.runWgetCommandOnBusyboxWithRetry(pod, testNamespace, url, 5) + var err error + var hostname string + if container == busyboxContainerName { + hostname, _, err = data.runWgetCommandOnBusyboxWithRetry(pod, testNamespace, url, 5) + } else { + hostname, _, err = data.runCommandFromPod(testNamespace, pod, container, []string{"wget", "-O", "-", url, "-T", "5"}) + } return hostname, err } -func probeClientIPFromPod(data *TestData, pod string, baseUrl string) (string, error) { +func probeClientIPFromPod(data *TestData, pod, container string, baseUrl string) (string, error) { url := fmt.Sprintf("%s/%s", baseUrl, "clientip") - hostPort, _, err := data.runWgetCommandOnBusyboxWithRetry(pod, testNamespace, url, 5) + var err error + var hostPort string + if container == busyboxContainerName { + hostPort, _, err = data.runWgetCommandOnBusyboxWithRetry(pod, testNamespace, url, 5) + } else { + hostPort, _, err = data.runCommandFromPod(testNamespace, pod, container, []string{"wget", "-O", "-", url, "-T", "5"}) + } if err != nil { return "", err } @@ -239,7 +253,7 @@ func testLoadBalancerClusterFromNode(t *testing.T, data *TestData, nodes []strin func testLoadBalancerClusterFromPod(t *testing.T, data *TestData, pods []string, url string) { for _, pod := range pods { - require.NoError(t, probeFromPod(data, pod, url), "Service LoadBalancer whose externalTrafficPolicy is Cluster should be able to be connected from Pod") + require.NoError(t, probeFromPod(data, pod, busyboxContainerName, url), "Service LoadBalancer whose externalTrafficPolicy is Cluster should be able to be connected from Pod") } } @@ -255,11 +269,11 @@ func testLoadBalancerLocalFromNode(t *testing.T, data *TestData, nodes []string, func testLoadBalancerLocalFromPod(t *testing.T, data *TestData, pods []string, url string, expectedClientIPs, expectedHostnames []string) { errMsg := "Service NodePort whose externalTrafficPolicy is Local should be able to be connected from Pod" for idx, pod := range pods { - hostname, err := probeHostnameFromPod(data, pod, url) + hostname, err := probeHostnameFromPod(data, pod, busyboxContainerName, url) require.NoError(t, err, errMsg) require.Equal(t, hostname, expectedHostnames[idx]) - clientIP, err := probeClientIPFromPod(data, pod, url) + clientIP, err := probeClientIPFromPod(data, pod, busyboxContainerName, url) require.NoError(t, err, errMsg) require.Equal(t, clientIP, expectedClientIPs[idx]) } @@ -492,7 +506,7 @@ func testNodePortClusterFromNode(t *testing.T, data *TestData, nodes, urls []str func testNodePortClusterFromPod(t *testing.T, data *TestData, pods, urls []string) { for _, url := range urls { for _, pod := range pods { - require.NoError(t, probeFromPod(data, pod, url), "Service NodePort whose externalTrafficPolicy is Cluster should be able to be connected from Pod") + require.NoError(t, probeFromPod(data, pod, busyboxContainerName, url), "Service NodePort whose externalTrafficPolicy is Cluster should be able to be connected from Pod") } } } @@ -523,11 +537,11 @@ func testNodePortLocalFromNode(t *testing.T, data *TestData, nodes, urls, expect func testNodePortLocalFromPod(t *testing.T, data *TestData, pods, urls, expectedClientIPs, expectedHostnames []string) { errMsg := "There should be no errors when accessing to Service NodePort whose externalTrafficPolicy is Local from Pod" for idx, pod := range pods { - hostname, err := probeHostnameFromPod(data, pod, urls[idx]) + hostname, err := probeHostnameFromPod(data, pod, busyboxContainerName, urls[idx]) require.NoError(t, err, errMsg) require.Equal(t, expectedHostnames[idx], hostname) - clientIP, err := probeClientIPFromPod(data, pod, urls[idx]) + clientIP, err := probeClientIPFromPod(data, pod, busyboxContainerName, urls[idx]) require.NoError(t, err, errMsg) require.Equal(t, expectedClientIPs[idx], clientIP) } @@ -653,70 +667,230 @@ func testProxyServiceSessionAffinity(ipFamily *corev1.IPFamily, ingressIPs []str agentName, err := data.getAntreaPodOnNode(nodeName) require.NoError(t, err) - table40Output, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, "table=40"}) + tableSessionAffinityName := "SessionAffinity" + tableSessionAffinityOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%s", tableSessionAffinityName)}) require.NoError(t, err) if *ipFamily == corev1.IPv4Protocol { - require.Contains(t, table40Output, fmt.Sprintf("nw_dst=%s,tp_dst=80", svc.Spec.ClusterIP)) - require.Contains(t, table40Output, fmt.Sprintf("load:0x%s->NXM_NX_REG3[]", strings.TrimLeft(hex.EncodeToString(nginxIP.ipv4.To4()), "0"))) + require.Contains(t, tableSessionAffinityOutput, fmt.Sprintf("nw_dst=%s,tp_dst=80", svc.Spec.ClusterIP)) + require.Contains(t, tableSessionAffinityOutput, fmt.Sprintf("load:0x%s->NXM_NX_REG3[]", strings.TrimLeft(hex.EncodeToString(nginxIP.ipv4.To4()), "0"))) for _, ingressIP := range ingressIPs { - require.Contains(t, table40Output, fmt.Sprintf("nw_dst=%s,tp_dst=80", ingressIP)) + require.Contains(t, tableSessionAffinityOutput, fmt.Sprintf("nw_dst=%s,tp_dst=80", ingressIP)) } } else { - require.Contains(t, table40Output, fmt.Sprintf("ipv6_dst=%s,tp_dst=80", svc.Spec.ClusterIP)) - require.Contains(t, table40Output, fmt.Sprintf("load:0x%s->NXM_NX_XXREG3[0..63]", strings.TrimLeft(hex.EncodeToString([]byte(*nginxIP.ipv6)[8:16]), "0"))) - require.Contains(t, table40Output, fmt.Sprintf("load:0x%s->NXM_NX_XXREG3[64..127]", strings.TrimLeft(hex.EncodeToString([]byte(*nginxIP.ipv6)[0:8]), "0"))) + require.Contains(t, tableSessionAffinityOutput, fmt.Sprintf("ipv6_dst=%s,tp_dst=80", svc.Spec.ClusterIP)) + require.Contains(t, tableSessionAffinityOutput, fmt.Sprintf("load:0x%s->NXM_NX_XXREG3[0..63]", strings.TrimLeft(hex.EncodeToString([]byte(*nginxIP.ipv6)[8:16]), "0"))) + require.Contains(t, tableSessionAffinityOutput, fmt.Sprintf("load:0x%s->NXM_NX_XXREG3[64..127]", strings.TrimLeft(hex.EncodeToString([]byte(*nginxIP.ipv6)[0:8]), "0"))) for _, ingressIP := range ingressIPs { - require.Contains(t, table40Output, fmt.Sprintf("ipv6_dst=%s,tp_dst=80", ingressIP)) + require.Contains(t, tableSessionAffinityOutput, fmt.Sprintf("ipv6_dst=%s,tp_dst=80", ingressIP)) } } } -func testProxyHairpinCase(t *testing.T, data *TestData) { - if len(clusterInfo.podV4NetworkCIDR) != 0 { - ipFamily := corev1.IPv4Protocol - testProxyHairpin(&ipFamily, data, t) - } - if len(clusterInfo.podV6NetworkCIDR) != 0 { - ipFamily := corev1.IPv6Protocol - testProxyHairpin(&ipFamily, data, t) - } +func TestProxyHairpinIPv4(t *testing.T) { + skipIfProxyDisabled(t) + skipIfHasWindowsNodes(t) + skipIfNotIPv4Cluster(t) + testProxyHairpin(t, false) } -func TestProxyHairpin(t *testing.T) { - skipIfHasWindowsNodes(t) +func TestProxyHairpinIPv6(t *testing.T) { skipIfProxyDisabled(t) + skipIfHasWindowsNodes(t) + skipIfNotIPv6Cluster(t) + testProxyHairpin(t, true) +} + +func testProxyHairpin(t *testing.T, isIPv6 bool) { data, err := setupTest(t) if err != nil { t.Fatalf("Error when setting up test: %v", err) } defer teardownTest(t, data) - if len(clusterInfo.podV4NetworkCIDR) != 0 { - ipFamily := corev1.IPv4Protocol - testProxyHairpin(&ipFamily, data, t) + node := nodeName(1) + workerNodeIP := workerNodeIPv4(1) + controllerNodeIP := controlPlaneNodeIPv4() + ipProtocol := corev1.IPv4Protocol + lbClusterIngressIP := []string{"192.168.240.1"} + lbLocalIngressIP := []string{"192.168.240.2"} + if isIPv6 { + workerNodeIP = workerNodeIPv6(1) + controllerNodeIP = controlPlaneNodeIPv6() + ipProtocol = corev1.IPv6Protocol + lbClusterIngressIP = []string{"fd75::aabb:ccdd:ef00"} + lbLocalIngressIP = []string{"fd75::aabb:ccdd:ef01"} } - if len(clusterInfo.podV6NetworkCIDR) != 0 { - ipFamily := corev1.IPv6Protocol - testProxyHairpin(&ipFamily, data, t) + + // Create a ClusterIP Service. + serviceClusterIP := fmt.Sprintf("clusterip-%v", isIPv6) + clusterIPSvc, err := data.createAgnhostClusterIPService(serviceClusterIP, true, &ipProtocol) + defer data.deleteServiceAndWait(defaultTimeout, serviceClusterIP) + require.NoError(t, err) + + // Create two NodePort Services. The externalTrafficPolicy of one Service is Cluster, and the externalTrafficPolicy + // of another one is Local. + var nodePortCluster, nodePortLocal string + serviceNodePortCluster := fmt.Sprintf("nodeport-cluster-%v", isIPv6) + serviceNodePortLocal := fmt.Sprintf("nodeport-local-%v", isIPv6) + nodePortSvc, err := data.createAgnhostNodePortService(serviceNodePortCluster, true, false, &ipProtocol) + defer data.deleteServiceAndWait(defaultTimeout, serviceNodePortCluster) + require.NoError(t, err) + for _, port := range nodePortSvc.Spec.Ports { + if port.NodePort != 0 { + nodePortCluster = fmt.Sprint(port.NodePort) + break + } } -} + require.NotEqual(t, "", nodePortCluster, "NodePort port number should not be empty") + nodePortSvc, err = data.createAgnhostNodePortService(serviceNodePortLocal, true, true, &ipProtocol) + require.NoError(t, err) + defer data.deleteServiceAndWait(defaultTimeout, serviceNodePortLocal) + for _, port := range nodePortSvc.Spec.Ports { + if port.NodePort != 0 { + nodePortLocal = fmt.Sprint(port.NodePort) + break + } + } + require.NotEqual(t, "", nodePortLocal, "NodePort port number should not be empty") -func testProxyHairpin(ipFamily *corev1.IPFamily, data *TestData, t *testing.T) { - busybox := randName("busybox-") - nodeName := nodeName(1) - err := data.createPodOnNode(busybox, testNamespace, nodeName, busyboxImage, []string{"nc", "-lk", "-p", "80"}, nil, nil, []corev1.ContainerPort{{ContainerPort: 80, Protocol: corev1.ProtocolTCP}}, false, nil) - defer data.deletePodAndWait(defaultTimeout, busybox, testNamespace) + // Create two LoadBalancer Services. The externalTrafficPolicy of one Service is Cluster, and the externalTrafficPolicy + // of another one is Local. + serviceLBCluster := fmt.Sprintf("lb-cluster-%v", isIPv6) + serviceLBLocal := fmt.Sprintf("lb-local-%v", isIPv6) + _, err = data.createAgnhostLoadBalancerService(serviceLBCluster, true, false, lbClusterIngressIP, &ipProtocol) require.NoError(t, err) - require.NoError(t, data.podWaitForRunning(defaultTimeout, busybox, testNamespace)) - svc, err := data.createService(busybox, testNamespace, 80, 80, map[string]string{"antrea-e2e": busybox}, false, false, corev1.ServiceTypeClusterIP, ipFamily) - defer data.deleteServiceAndWait(defaultTimeout, busybox) + _, err = data.createAgnhostLoadBalancerService(serviceLBLocal, true, true, lbLocalIngressIP, &ipProtocol) require.NoError(t, err) - // Hold on to make sure that the Service is realized. - time.Sleep(3 * time.Second) + // These are test urls. + port := "8080" + clusterIPUrl := net.JoinHostPort(clusterIPSvc.Spec.ClusterIP, port) + workerNodePortClusterUrl := net.JoinHostPort(workerNodeIP, nodePortCluster) + workerNodePortLocalUrl := net.JoinHostPort(workerNodeIP, nodePortLocal) + controllerNodePortClusterUrl := net.JoinHostPort(controllerNodeIP, nodePortCluster) + lbClusterUrl := net.JoinHostPort(lbClusterIngressIP[0], port) + lbLocalUrl := net.JoinHostPort(lbLocalIngressIP[0], port) + + // These are expected client IP. + expectedGatewayIP, _ := nodeGatewayIPs(1) + expectedVirtualIP := config.VirtualServiceIPv4.String() + expectedControllerIP := controllerNodeIP + if isIPv6 { + _, expectedGatewayIP = nodeGatewayIPs(1) + expectedVirtualIP = config.VirtualServiceIPv6.String() + } - stdout, stderr, err := data.runCommandFromPod(testNamespace, busybox, busyboxContainerName, []string{"nc", svc.Spec.ClusterIP, "80", "-w", "1", "-e", "ls", "/"}) - require.NoError(t, err, fmt.Sprintf("ipFamily: %v\nstdout: %s\nstderr: %s\n", *ipFamily, stdout, stderr)) + agnhost := fmt.Sprintf("agnhost-%v", isIPv6) + createAgnhostPod(t, data, agnhost, node, false) + t.Run("Non-HostNetwork Endpoints", func(t *testing.T) { + testProxyIntraNodeHairpinCases(data, t, expectedGatewayIP, agnhost, clusterIPUrl, workerNodePortClusterUrl, workerNodePortLocalUrl, lbClusterUrl, lbLocalUrl) + testProxyInterNodeHairpinCases(data, t, false, expectedControllerIP, nodeName(0), clusterIPUrl, controllerNodePortClusterUrl, lbClusterUrl) + }) + require.NoError(t, data.deletePod(testNamespace, agnhost)) + + agnhostHost := fmt.Sprintf("agnhost-host-%v", isIPv6) + createAgnhostPod(t, data, agnhostHost, node, true) + t.Run("HostNetwork Endpoints", func(t *testing.T) { + skipIfProxyAllDisabled(t, data) + testProxyIntraNodeHairpinCases(data, t, expectedVirtualIP, agnhostHost, clusterIPUrl, workerNodePortClusterUrl, workerNodePortLocalUrl, lbClusterUrl, lbLocalUrl) + testProxyInterNodeHairpinCases(data, t, true, expectedControllerIP, nodeName(0), clusterIPUrl, controllerNodePortClusterUrl, lbClusterUrl) + }) +} + +// If a Pod is not on host network, when it accesses a ClusterIP/NodePort/LoadBalancer Service whose Endpoint is on itself, +// that means a hairpin connection. Antrea gateway IP is used to SNAT the connection. The IP changes of the connection are: +// - Pod : Pod IP -> Service IP +// - OVS DNAT: Pod IP -> Pod IP +// - OVS SNAT: Antrea gateway IP -> Pod IP +// - Pod : Antrea gateway IP -> Pod IP +// +// If a Pod is on host network, when it accesses a ClusterIP/NodePort/LoadBalancer Service whose Endpoint is on itself +// (this is equivalent to that a Node accesses a Cluster/NodePort/LoadBalancer whose Endpoint is host network and the +// Endpoint is on this Node), that means a hairpin connection. A virtual IP is used to SNAT the connection to ensure +// that the packet can be routed via Antrea gateway. The IP changes of the connection are: +// - Antrea gateway: Antrea gateway IP -> Service IP +// - OVS DNAT: Antrea gateway IP -> Node IP +// - OVS SNAT: virtual IP -> Node IP +// - Antrea gateway: virtual IP -> Node IP +func testProxyIntraNodeHairpinCases(data *TestData, t *testing.T, expectedClientIP, pod, clusterIPUrl, nodePortClusterUrl, nodePortLocalUrl, lbClusterUrl, lbLocalUrl string) { + t.Run("IntraNode/ClusterIP", func(t *testing.T) { + clientIP, err := probeClientIPFromPod(data, pod, agnhostContainerName, clusterIPUrl) + require.NoError(t, err, "ClusterIP hairpin should be able to be connected") + require.Equal(t, expectedClientIP, clientIP) + }) + t.Run("IntraNode/NodePort/ExternalTrafficPolicy:Cluster", func(t *testing.T) { + skipIfProxyAllDisabled(t, data) + clientIP, err := probeClientIPFromPod(data, pod, agnhostContainerName, nodePortClusterUrl) + require.NoError(t, err, "NodePort whose externalTrafficPolicy is Cluster hairpin should be able to be connected") + require.Equal(t, expectedClientIP, clientIP) + }) + t.Run("IntraNode/NodePort/ExternalTrafficPolicy:Local", func(t *testing.T) { + skipIfProxyAllDisabled(t, data) + clientIP, err := probeClientIPFromPod(data, pod, agnhostContainerName, nodePortLocalUrl) + require.NoError(t, err, "NodePort whose externalTrafficPolicy is Local hairpin should be able to be connected") + require.Equal(t, expectedClientIP, clientIP) + }) + t.Run("IntraNode/LoadBalancer/ExternalTrafficPolicy:Cluster", func(t *testing.T) { + clientIP, err := probeClientIPFromPod(data, pod, agnhostContainerName, lbClusterUrl) + require.NoError(t, err, "LoadBalancer whose externalTrafficPolicy is Cluster hairpin should be able to be connected") + require.Equal(t, expectedClientIP, clientIP) + }) + t.Run("IntraNode/LoadBalancer/ExternalTrafficPolicy:Local", func(t *testing.T) { + clientIP, err := probeClientIPFromPod(data, pod, agnhostContainerName, lbLocalUrl) + require.NoError(t, err, "LoadBalancer whose externalTrafficPolicy is Local hairpin should be able to be connected") + require.Equal(t, expectedClientIP, clientIP) + }) +} + +// If client is Node A, when it accesses a ClusterIP/NodePort/LoadBalancer Service whose Endpoint is on Node B, below +// cases are hairpin (assumed that feature AntreaIPAM is not enabled): +// - Traffic mode: encap, Endpoint network: host network, OS: Linux/Windows +// - Traffic mode: noEncap, Endpoint network: not host network, OS: Linux (packets are routed via uplink interface) +// - Traffic mode: noEncap, Endpoint network: host network, OS: Linux/Windows +// The IP changes of the hairpin connections are: +// - Node A Antrea gateway: Antrea gateway IP -> Service IP +// - OVS DNAT: Antrea gateway IP -> Endpoint IP +// - OVS SNAT: virtual IP -> Endpoint IP +// - Node A Antrea gateway: virtual IP -> Endpoint IP +// - Node A output: Node A IP -> Endpoint IP (another SNAT for virtual IP, otherwise reply packets can't be routed back). +// - Node B: Node A IP -> Endpoint IP +func testProxyInterNodeHairpinCases(data *TestData, t *testing.T, hostNetwork bool, expectedClientIP, node, clusterIPUrl, nodePortClusterUrl, lbClusterUrl string) { + skipIfAntreaIPAMTest(t) + currentEncapMode, err := data.GetEncapMode() + if err != nil { + t.Fatalf("Failed to get encap mode: %v", err) + } + if !hostNetwork { + if testOptions.providerName == "kind" && (currentEncapMode == config.TrafficEncapModeEncap || currentEncapMode == config.TrafficEncapModeHybrid) { + t.Skipf("Skipping test because inter-Node Pod traffic is encapsulated when testbed is Kind and traffic mode is encap/hybrid") + } else if currentEncapMode == config.TrafficEncapModeEncap { + t.Skipf("Skipping test because inter-Node Pod traffic is encapsulated when testbed is not Kind and traffic mode encap") + } + } + + t.Run("InterNode/ClusterIP", func(t *testing.T) { + clientIP, err := probeClientIPFromNode(node, clusterIPUrl) + require.NoError(t, err, "ClusterIP hairpin should be able to be connected") + require.Equal(t, expectedClientIP, clientIP) + }) + t.Run("InterNode/NodePort/ExternalTrafficPolicy:Cluster", func(t *testing.T) { + skipIfProxyAllDisabled(t, data) + if !hostNetwork && currentEncapMode == config.TrafficEncapModeNoEncap { + skipIfHasWindowsNodes(t) + } + clientIP, err := probeClientIPFromNode(node, nodePortClusterUrl) + require.NoError(t, err, "NodePort whose externalTrafficPolicy is Cluster hairpin should be able to be connected") + require.Equal(t, expectedClientIP, clientIP) + }) + t.Run("InterNode/LoadBalancer/ExternalTrafficPolicy:Cluster", func(t *testing.T) { + skipIfProxyAllDisabled(t, data) + if !hostNetwork && currentEncapMode == config.TrafficEncapModeNoEncap { + skipIfHasWindowsNodes(t) + } + clientIP, err := probeClientIPFromNode(node, lbClusterUrl) + require.NoError(t, err, "LoadBalancer whose externalTrafficPolicy is Cluster hairpin should be able to be connected") + require.Equal(t, expectedClientIP, clientIP) + }) } func testProxyEndpointLifeCycleCase(t *testing.T, data *TestData) { @@ -771,8 +945,8 @@ func testProxyEndpointLifeCycle(ipFamily *corev1.IPFamily, data *TestData, t *te nginxIP = nginxIPs.ipv4.String() } - keywords := make(map[int]string) - keywords[42] = fmt.Sprintf("nat(dst=%s)", net.JoinHostPort(nginxIP, "80")) // endpointNATTable + keywords := make(map[string]string) + keywords["EndpointDNAT"] = fmt.Sprintf("nat(dst=%s)", net.JoinHostPort(nginxIP, "80")) // endpointNATTable var groupKeywords []string if *ipFamily == corev1.IPv6Protocol { @@ -781,8 +955,8 @@ func testProxyEndpointLifeCycle(ipFamily *corev1.IPFamily, data *TestData, t *te groupKeywords = append(groupKeywords, fmt.Sprintf("0x%s->NXM_NX_REG3[]", strings.TrimPrefix(hex.EncodeToString(nginxIPs.ipv4.To4()), "0"))) } - for tableID, keyword := range keywords { - tableOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%d", tableID)}) + for tableName, keyword := range keywords { + tableOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%s", tableName)}) require.NoError(t, err) require.Contains(t, tableOutput, keyword) } @@ -798,8 +972,8 @@ func testProxyEndpointLifeCycle(ipFamily *corev1.IPFamily, data *TestData, t *te // Wait for one second to make sure the pipeline to be updated. time.Sleep(time.Second) - for tableID, keyword := range keywords { - tableOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%d", tableID)}) + for tableName, keyword := range keywords { + tableOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%s", tableName)}) require.NoError(t, err) require.NotContains(t, tableOutput, keyword) } @@ -880,18 +1054,18 @@ func testProxyServiceLifeCycle(ipFamily *corev1.IPFamily, ingressIPs []string, d } } - table42Format := "nat(dst=%s:80)" + tableEndpointDNATFlowFormat := "nat(dst=%s:80)" if *ipFamily == corev1.IPv6Protocol { - table42Format = "nat(dst=[%s]:80)" + tableEndpointDNATFlowFormat = "nat(dst=[%s]:80)" } expectedFlows := []expectTableFlows{ { - 41, // serviceLBTable + "ServiceLB", // serviceLBTable svcLBflows, }, { - 42, - []string{fmt.Sprintf(table42Format, nginxIP)}, // endpointNATTable + "EndpointDNAT", + []string{fmt.Sprintf(tableEndpointDNATFlowFormat, nginxIP)}, // endpointNATTable }, } @@ -905,7 +1079,7 @@ func testProxyServiceLifeCycle(ipFamily *corev1.IPFamily, ingressIPs []string, d require.NoError(t, err) require.Contains(t, groupOutput, groupKeyword) for _, expectedTable := range expectedFlows { - tableOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%d", expectedTable.tableID)}) + tableOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%s", expectedTable.tableName)}) require.NoError(t, err) for _, expectedFlow := range expectedTable.flows { require.Contains(t, tableOutput, expectedFlow) @@ -922,7 +1096,7 @@ func testProxyServiceLifeCycle(ipFamily *corev1.IPFamily, ingressIPs []string, d require.NoError(t, err) require.NotContains(t, groupOutput, groupKeyword) for _, expectedTable := range expectedFlows { - tableOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%d", expectedTable.tableID)}) + tableOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%s", expectedTable.tableName)}) require.NoError(t, err) for _, expectedFlow := range expectedTable.flows { require.NotContains(t, tableOutput, expectedFlow) diff --git a/test/e2e/traceflow_test.go b/test/e2e/traceflow_test.go index 66227107b40..d1892c43db3 100644 --- a/test/e2e/traceflow_test.go +++ b/test/e2e/traceflow_test.go @@ -337,10 +337,6 @@ func testTraceflowIntraNode(t *testing.T, data *TestData) { } expectedTTL := int32(64) - if testOptions.enableAntreaIPAM { - // AntreaIPAM will always overwrite dstMAC and decrease TTL - expectedTTL = 63 - } testcases := []testcase{ { name: "intraNodeTraceflowIPv4", @@ -1093,6 +1089,7 @@ func testTraceflowInterNode(t *testing.T, data *TestData) { node1Pods, _, node1CleanupFn := createTestBusyboxPods(t, data, 1, testNamespace, node1) node2Pods, node2IPs, node2CleanupFn := createTestBusyboxPods(t, data, 2, testNamespace, node2) + gatewayIPv4, gatewayIPv6 := nodeGatewayIPs(1) defer node1CleanupFn() defer node2CleanupFn() var dstPodIPv4Str, dstPodIPv6Str string @@ -1468,7 +1465,7 @@ func testTraceflowInterNode(t *testing.T, data *TestData) { { Component: v1alpha1.ComponentLB, Pod: fmt.Sprintf("%s/%s", testNamespace, nginxPodName), - TranslatedSrcIP: "169.254.169.252", + TranslatedSrcIP: gatewayIPv4, TranslatedDstIP: nginxIPv4Str, Action: v1alpha1.ActionForwarded, }, @@ -1848,7 +1845,7 @@ func testTraceflowInterNode(t *testing.T, data *TestData) { { Component: v1alpha1.ComponentLB, Pod: fmt.Sprintf("%s/%s", testNamespace, nginxPodName), - TranslatedSrcIP: "fc00::aabb:ccdd:eeff", + TranslatedSrcIP: gatewayIPv6, TranslatedDstIP: nginxIPv6Str, Action: v1alpha1.ActionForwarded, }, diff --git a/test/integration/agent/openflow_test.go b/test/integration/agent/openflow_test.go index 2647c0fb8d7..555718a451e 100644 --- a/test/integration/agent/openflow_test.go +++ b/test/integration/agent/openflow_test.go @@ -93,7 +93,6 @@ type testConfig struct { nodeConfig *config1.NodeConfig localPods []*testLocalPodConfig peers []*testPeerConfig - serviceCIDR *net.IPNet globalMAC net.HardwareAddr enableIPv6 bool enableIPv4 bool @@ -125,9 +124,11 @@ func TestConnectivityFlows(t *testing.T) { err = ofTestUtils.DeleteOVSBridge(br) assert.Nil(t, err, fmt.Sprintf("Error while deleting OVS bridge: %v", err)) ofClient.CleanOFTableCache() + ofClient.ResetOFTable() }() - config := prepareConfiguration() + config := prepareConfiguration(true, false) + t.Run("testInitialize", func(t *testing.T) { testInitialize(t, config) }) @@ -171,9 +172,10 @@ func TestAntreaFlexibleIPAMConnectivityFlows(t *testing.T) { err = ofTestUtils.DeleteOVSBridge(br) assert.Nil(t, err, fmt.Sprintf("Error while deleting OVS bridge: %v", err)) ofClient.CleanOFTableCache() + ofClient.ResetOFTable() }() - config := prepareConfiguration() + config := prepareConfiguration(true, false) config.connectUplinkToBridge = true config.localPods[0].ips = []net.IP{net.ParseIP("192.168.255.3")} uplinkMAC, _ := net.ParseMAC("aa:aa:aa:aa:aa:00") @@ -209,7 +211,7 @@ func TestReplayFlowsConnectivityFlows(t *testing.T) { legacyregistry.Reset() metrics.InitializeOVSMetrics() - c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, false, false, false, false, false) + c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, true, false, false, false, false) err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge: %v", err)) @@ -219,9 +221,10 @@ func TestReplayFlowsConnectivityFlows(t *testing.T) { err = ofTestUtils.DeleteOVSBridge(br) assert.Nil(t, err, fmt.Sprintf("Error while deleting OVS bridge: %v", err)) ofClient.CleanOFTableCache() + ofClient.ResetOFTable() }() - config := prepareConfiguration() + config := prepareConfiguration(true, false) t.Run("testInitialize", func(t *testing.T) { testInitialize(t, config) }) @@ -254,7 +257,8 @@ func TestReplayFlowsNetworkPolicyFlows(t *testing.T) { err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge: %v", err)) - _, err = c.Initialize(roundInfo, &config1.NodeConfig{}, &config1.NetworkConfig{TrafficEncapMode: config1.TrafficEncapModeEncap}) + config := prepareConfiguration(true, false) + _, err = c.Initialize(roundInfo, config.nodeConfig, &config1.NetworkConfig{TrafficEncapMode: config1.TrafficEncapModeEncap}) require.Nil(t, err, "Failed to initialize OFClient") defer func() { @@ -263,6 +267,7 @@ func TestReplayFlowsNetworkPolicyFlows(t *testing.T) { err = ofTestUtils.DeleteOVSBridge(br) assert.Nil(t, err, fmt.Sprintf("Error while deleting OVS bridge: %v", err)) ofClient.CleanOFTableCache() + ofClient.ResetOFTable() }() ruleID := uint32(100) @@ -308,14 +313,14 @@ func testExternalFlows(t *testing.T, config *testConfig) { t.Errorf("Failed to install OpenFlow entries to allow Pod to communicate to the external addresses: %v", err) } - gwMAC := config.nodeConfig.GatewayConfig.MAC - if config.nodeConfig.NodeIPv4Addr != nil && config.nodeConfig.PodIPv4CIDR != nil { - for _, tableFlow := range expectedExternalFlows(config.nodeConfig.NodeIPv4Addr.IP, config.nodeConfig.PodIPv4CIDR, gwMAC) { + gwMACStr := config.nodeConfig.GatewayConfig.MAC.String() + if config.enableIPv4 { + for _, tableFlow := range expectedExternalFlows("ip", gwMACStr) { ofTestUtils.CheckFlowExists(t, ovsCtlClient, tableFlow.tableName, 0, true, tableFlow.flows) } } - if config.nodeConfig.NodeIPv6Addr != nil && config.nodeConfig.PodIPv6CIDR != nil { - for _, tableFlow := range expectedExternalFlows(config.nodeConfig.NodeIPv6Addr.IP, config.nodeConfig.PodIPv6CIDR, gwMAC) { + if config.enableIPv6 { + for _, tableFlow := range expectedExternalFlows("ipv6", gwMACStr) { ofTestUtils.CheckFlowExists(t, ovsCtlClient, tableFlow.tableName, 0, true, tableFlow.flows) } } @@ -442,7 +447,8 @@ func TestNetworkPolicyFlows(t *testing.T) { err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge %s", br)) - _, err = c.Initialize(roundInfo, &config1.NodeConfig{PodIPv4CIDR: podIPv4CIDR, PodIPv6CIDR: podIPv6CIDR, GatewayConfig: gwConfig}, &config1.NetworkConfig{TrafficEncapMode: config1.TrafficEncapModeEncap, IPv4Enabled: true, IPv6Enabled: true}) + config := prepareConfiguration(true, true) + _, err = c.Initialize(roundInfo, config.nodeConfig, &config1.NetworkConfig{TrafficEncapMode: config1.TrafficEncapModeEncap, IPv4Enabled: true, IPv6Enabled: true}) require.Nil(t, err, "Failed to initialize OFClient") defer func() { @@ -451,6 +457,7 @@ func TestNetworkPolicyFlows(t *testing.T) { err = ofTestUtils.DeleteOVSBridge(br) assert.Nil(t, err, fmt.Sprintf("Error while deleting OVS bridge: %v", err)) ofClient.CleanOFTableCache() + ofClient.ResetOFTable() }() ruleID := uint32(100) @@ -560,8 +567,9 @@ func TestIPv6ConnectivityFlows(t *testing.T) { err = ofTestUtils.DeleteOVSBridge(br) assert.Nil(t, err, fmt.Sprintf("Error while deleting OVS bridge: %v", err)) ofClient.CleanOFTableCache() + ofClient.ResetOFTable() }() - config := prepareIPv6Configuration() + config := prepareConfiguration(false, true) t.Run("testInitialize", func(t *testing.T) { testInitialize(t, config) }) @@ -601,7 +609,8 @@ func TestProxyServiceFlows(t *testing.T) { err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge %s", br)) - _, err = c.Initialize(roundInfo, &config1.NodeConfig{PodIPv4CIDR: podIPv4CIDR, PodIPv6CIDR: podIPv6CIDR, GatewayConfig: gwConfig}, &config1.NetworkConfig{TrafficEncapMode: config1.TrafficEncapModeEncap, IPv4Enabled: true}) + config := prepareConfiguration(true, false) + _, err = c.Initialize(roundInfo, config.nodeConfig, &config1.NetworkConfig{TrafficEncapMode: config1.TrafficEncapModeEncap, IPv4Enabled: true}) require.Nil(t, err, "Failed to initialize OFClient") defer func() { @@ -610,6 +619,7 @@ func TestProxyServiceFlows(t *testing.T) { err = ofTestUtils.DeleteOVSBridge(br) assert.Nil(t, err, fmt.Sprintf("Error while deleting OVS bridge: %v", err)) ofClient.CleanOFTableCache() + ofClient.ResetOFTable() }() endpoints := []k8sproxy.Endpoint{ @@ -721,20 +731,20 @@ func expectedProxyServiceGroupAndFlows(gid uint32, svc svcConfig, endpointList [ svcFlows := expectTableFlows{tableName: "ServiceLB", flows: []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,%s,reg4=0x10000/0x70000,nw_dst=%s,tp_dst=%d", string(svc.protocol), svc.ip.String(), svc.port), - ActStr: fmt.Sprintf("load:0x%x->NXM_NX_REG4[16..18],load:0x1->NXM_NX_REG0[19],load:0x%x->NXM_NX_REG7[],group:%d", serviceLearnReg, gid, gid), + ActStr: fmt.Sprintf("load:0x%x->NXM_NX_REG4[16..18],load:0x1->NXM_NX_REG0[9],load:0x%x->NXM_NX_REG7[],group:%d", serviceLearnReg, gid, gid), }, { MatchStr: fmt.Sprintf("priority=190,%s,reg4=0x30000/0x70000,nw_dst=%s,tp_dst=%d", string(svc.protocol), svc.ip.String(), svc.port), - ActStr: fmt.Sprintf("learn(table=SessionAffinity,hard_timeout=%d,priority=200,delete_learned,cookie=0x%x,eth_type=0x800,nw_proto=%d,%s,NXM_OF_IP_DST[],NXM_OF_IP_SRC[],load:NXM_NX_REG3[]->NXM_NX_REG3[],load:NXM_NX_REG4[0..15]->NXM_NX_REG4[0..15],load:0x2->NXM_NX_REG4[16..18],load:0x1->NXM_NX_REG0[19]),load:0x2->NXM_NX_REG4[16..18],goto_table:EndpointDNAT", stickyAge, cookieAllocator.RequestWithObjectID(4, gid).Raw(), nw_proto, learnProtoField), + ActStr: fmt.Sprintf("learn(table=SessionAffinity,hard_timeout=%d,priority=200,delete_learned,cookie=0x%x,eth_type=0x800,nw_proto=%d,%s,NXM_OF_IP_DST[],NXM_OF_IP_SRC[],load:NXM_NX_REG3[]->NXM_NX_REG3[],load:NXM_NX_REG4[0..15]->NXM_NX_REG4[0..15],load:0x2->NXM_NX_REG4[16..18],load:0x1->NXM_NX_REG0[9]),load:0x2->NXM_NX_REG4[16..18],goto_table:EndpointDNAT", stickyAge, cookieAllocator.RequestWithObjectID(cookie.Service, gid).Raw(), nw_proto, learnProtoField), }, }} epDNATFlows := expectTableFlows{tableName: "EndpointDNAT", flows: []*ofTestUtils.ExpectFlow{}} - hairpinFlows := expectTableFlows{tableName: "HairpinSNAT", flows: []*ofTestUtils.ExpectFlow{}} + hairpinFlows := expectTableFlows{tableName: "ServiceMark", flows: []*ofTestUtils.ExpectFlow{}} groupBuckets = make([]string, 0) for _, ep := range endpointList { epIP := ipToHexString(net.ParseIP(ep.IP())) epPort, _ := ep.Port() - bucket := fmt.Sprintf("weight:100,actions=load:%s->NXM_NX_REG3[],load:0x%x->NXM_NX_REG4[0..15],resubmit(,42)", epIP, epPort) + bucket := fmt.Sprintf("weight:100,actions=load:%s->NXM_NX_REG3[],load:0x%x->NXM_NX_REG4[0..15],resubmit(,%d)", epIP, epPort, ofClient.EndpointDNATTable.GetID()) groupBuckets = append(groupBuckets, bucket) unionVal := (0b010 << 16) + uint32(epPort) @@ -745,8 +755,8 @@ func expectedProxyServiceGroupAndFlows(gid uint32, svc svcConfig, endpointList [ if ep.GetIsLocal() { hairpinFlows.flows = append(hairpinFlows.flows, &ofTestUtils.ExpectFlow{ - MatchStr: fmt.Sprintf("priority=200,ip,nw_src=%s,nw_dst=%s", ep.IP(), ep.IP()), - ActStr: "set_field:169.254.169.252->ip_src,load:0x1->NXM_NX_REG0[18],goto_table:Output", + MatchStr: fmt.Sprintf("priority=190,ct_state=+new+trk,ip,nw_src=%s,nw_dst=%s", ep.IP(), ep.IP()), + ActStr: "ct(commit,table=SNATConntrackCommit,zone=65520,exec(load:0x1->NXM_NX_CT_MARK[5],load:0x1->NXM_NX_CT_MARK[6]))", }) } } @@ -955,123 +965,87 @@ func testInstallGatewayFlows(t *testing.T, config *testConfig) { } } -func prepareConfiguration() *testConfig { +func prepareConfiguration(enableIPv4, enableIPv6 bool) *testConfig { podMAC, _ := net.ParseMAC("aa:aa:aa:aa:aa:13") gwMAC, _ := net.ParseMAC("aa:aa:aa:aa:aa:11") peerNodeMAC, _ := net.ParseMAC("aa:aa:aa:aa:ab:00") - nodeIP, nodeSubnet, _ := net.ParseCIDR("10.10.10.1/24") - nodeSubnet.IP = nodeIP - gatewayConfig := &config1.GatewayConfig{ - IPv4: net.ParseIP("192.168.1.1"), - MAC: gwMAC, - } - nodeConfig := &config1.NodeConfig{ - NodeIPv4Addr: nodeSubnet, - GatewayConfig: gatewayConfig, - PodIPv4CIDR: podIPv4CIDR, - } + nodeIPv4, nodeIPv4Subnet, _ := net.ParseCIDR("10.10.10.1/24") + nodeIPv4Subnet.IP = nodeIPv4 + nodeIPv6, nodeIPv6Subnet, _ := net.ParseCIDR("a963:ca9b:172:10::11/64") + nodeIPv6Subnet.IP = nodeIPv6 + _, peerIPv4Subnet, _ := net.ParseCIDR("192.168.2.0/24") + _, peerIPv6Subnet, _ := net.ParseCIDR("fd74:ca9b:172:20::/64") + gatewayConfig := &config1.GatewayConfig{MAC: gwMAC} + nodeConfig := &config1.NodeConfig{GatewayConfig: gatewayConfig} podCfg := &testLocalPodConfig{ name: "container-1", testPortConfig: &testPortConfig{ - ips: []net.IP{net.ParseIP("192.168.1.3")}, mac: podMAC, ofPort: uint32(11), }, } - _, serviceCIDR, _ := net.ParseCIDR("172.16.0.0/16") - _, peerSubnet, _ := net.ParseCIDR("192.168.2.0/24") peerNode := &testPeerConfig{ name: "n2", nodeAddress: net.ParseIP("10.1.1.2"), - subnet: *peerSubnet, - gateway: net.ParseIP("192.168.2.1"), nodeMAC: peerNodeMAC, } - vMAC, _ := net.ParseMAC("aa:bb:cc:dd:ee:ff") - return &testConfig{ - bridge: br, - nodeConfig: nodeConfig, - localPods: []*testLocalPodConfig{podCfg}, - peers: []*testPeerConfig{peerNode}, - serviceCIDR: serviceCIDR, - globalMAC: vMAC, - enableIPv4: true, - enableIPv6: false, - } -} -func prepareIPv6Configuration() *testConfig { - podMAC, _ := net.ParseMAC("aa:aa:aa:aa:aa:13") - nodeIP, nodeSubnet, _ := net.ParseCIDR("a963:ca9b:172:10::11/64") - nodeSubnet.IP = nodeIP - gwMAC, _ := net.ParseMAC("aa:aa:aa:aa:aa:11") - - gatewayConfig := &config1.GatewayConfig{ - IPv6: net.ParseIP("fd74:ca9b:172:19::1"), - MAC: gwMAC, + if enableIPv4 { + gatewayConfig.IPv4 = net.ParseIP("192.168.1.1") + nodeConfig.NodeIPv4Addr = nodeIPv4Subnet + nodeConfig.PodIPv4CIDR = podIPv4CIDR + podCfg.ips = append(podCfg.ips, net.ParseIP("192.168.1.3")) + peerNode.gateway = net.ParseIP("192.168.2.1") + peerNode.subnet = *peerIPv4Subnet } - nodeConfig := &config1.NodeConfig{ - NodeIPv4Addr: nodeSubnet, - GatewayConfig: gatewayConfig, - PodIPv6CIDR: podIPv6CIDR, + if enableIPv6 { + gatewayConfig.IPv6 = net.ParseIP("fd74:ca9b:172:19::1") + nodeConfig.NodeIPv6Addr = nodeIPv6Subnet + nodeConfig.PodIPv6CIDR = podIPv6CIDR + podCfg.ips = append(podCfg.ips, net.ParseIP("fd74:ca9b:172:19::3")) + peerNode.gateway = net.ParseIP("fd74:ca9b:172:20::1") + peerNode.subnet = *peerIPv6Subnet } - podCfg := &testLocalPodConfig{ - name: "container-1", - testPortConfig: &testPortConfig{ - ips: []net.IP{net.ParseIP("fd74:ca9b:172:19::3")}, - mac: podMAC, - ofPort: uint32(11), - }, - } - _, serviceCIDR, _ := net.ParseCIDR("ee74:ca9b:2345:a33::/64") - _, peerSubnet, _ := net.ParseCIDR("fd74:ca9b:172:20::/64") - peerNode := &testPeerConfig{ - name: "n2", - nodeAddress: net.ParseIP("10.1.1.2"), - subnet: *peerSubnet, - gateway: net.ParseIP("fd74:ca9b:172:20::1"), - } vMAC, _ := net.ParseMAC("aa:bb:cc:dd:ee:ff") return &testConfig{ - bridge: br, - nodeConfig: nodeConfig, - localPods: []*testLocalPodConfig{podCfg}, - peers: []*testPeerConfig{peerNode}, - serviceCIDR: serviceCIDR, - globalMAC: vMAC, - enableIPv4: false, - enableIPv6: true, + bridge: br, + nodeConfig: nodeConfig, + localPods: []*testLocalPodConfig{podCfg}, + peers: []*testPeerConfig{peerNode}, + globalMAC: vMAC, + enableIPv4: enableIPv4, + enableIPv6: enableIPv6, } } func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, gwMAC, vMAC net.HardwareAddr, nodeConfig *config1.NodeConfig, connectUplinkToBridge bool) []expectTableFlows { podIPv4 := util.GetIPv4Addr(podIPs) isAntreaFlexibleIPAM := connectUplinkToBridge && podIPv4 != nil && !nodeConfig.PodIPv4CIDR.Contains(podIPv4) - actionAntreaFlexibleIPAMMarkString := "" - matchRewriteMACMarkString := ",reg0=0x80000/0x80000" + actionNotAntreaFlexibleIPAMString := "" + matchRewriteMACMarkString := ",reg0=0x200/0x200" if isAntreaFlexibleIPAM { - actionAntreaFlexibleIPAMMarkString = ",load:0x1->NXM_NX_REG4[21]" + actionNotAntreaFlexibleIPAMString = ",load:0x1->NXM_NX_REG4[20],load:0x1->NXM_NX_REG0[9]" matchRewriteMACMarkString = "" } flows := []expectTableFlows{ { - "Classification", + "Classifier", []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=190,in_port=%d", podOFPort), - ActStr: fmt.Sprintf("load:0x2->NXM_NX_REG0[0..3]%s,goto_table:SpoofGuard", actionAntreaFlexibleIPAMMarkString), + ActStr: fmt.Sprintf("load:0x3->NXM_NX_REG0[0..3]%s,goto_table:SpoofGuard", actionNotAntreaFlexibleIPAMString), }, }, }, { - "L2Forwarding", + "L2ForwardingCalc", []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,dl_dst=%s", podMAC.String()), - ActStr: fmt.Sprintf("load:0x%x->NXM_NX_REG1[],load:0x1->NXM_NX_REG0[16],goto_table:IngressRule", podOFPort), + ActStr: fmt.Sprintf("load:0x%x->NXM_NX_REG1[],load:0x1->NXM_NX_REG0[8],goto_table:IngressSecurityClassifier", podOFPort), }, }, }, @@ -1079,20 +1053,20 @@ func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, if isAntreaFlexibleIPAM { flows = append(flows, []expectTableFlows{{ - "Classification", + "Classifier", []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=210,in_port=%d,dl_dst=%s", 3, podMAC.String()), - ActStr: fmt.Sprintf("load:0x4->NXM_NX_REG0[0..3],goto_table:ServiceHairpin"), + ActStr: fmt.Sprintf("load:0x4->NXM_NX_REG0[0..3],goto_table:SNATConntrackZone"), }, }, }, { - "Classification", + "Classifier", []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=210,in_port=LOCAL,dl_dst=%s", podMAC.String()), - ActStr: fmt.Sprintf("load:0x5->NXM_NX_REG0[0..3],goto_table:ServiceHairpin"), + ActStr: fmt.Sprintf("load:0x5->NXM_NX_REG0[0..3],goto_table:SNATConntrackZone"), }, }, }}...) @@ -1101,13 +1075,17 @@ func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, for _, podIP := range podIPs { var ipProto, nwSrcField, nwDstField string var nextTableForSpoofguard string + actionNotAntreaFlexibleIPAMString = "" + if !isAntreaFlexibleIPAM { + actionNotAntreaFlexibleIPAMString = fmt.Sprintf("set_field:%s->eth_src,", gwMAC) + } if podIP.To4() != nil { ipProto = "ip" nwSrcField = "nw_src" nwDstField = "nw_dst" flows = append(flows, expectTableFlows{ - "SpoofGuard", + "ARPSpoofGuard", []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,arp,in_port=%d,arp_spa=%s,arp_sha=%s", podOFPort, podIP.String(), podMAC.String()), @@ -1115,7 +1093,7 @@ func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, }, }, }) - nextTableForSpoofguard = "ServiceHairpin" + nextTableForSpoofguard = "SNATConntrackZone" } else { ipProto = "ipv6" nwSrcField = "ipv6_src" @@ -1137,7 +1115,7 @@ func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,%s%s,%s=%s", ipProto, matchRewriteMACMarkString, nwDstField, podIP.String()), - ActStr: fmt.Sprintf("set_field:%s->eth_src,set_field:%s->eth_dst,goto_table:IPTTLDec", gwMAC.String(), podMAC.String()), + ActStr: fmt.Sprintf("%sset_field:%s->eth_dst,goto_table:L3DecTTL", actionNotAntreaFlexibleIPAMString, podMAC.String()), }, }, }, @@ -1150,20 +1128,20 @@ func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, func prepareGatewayFlows(gwIPs []net.IP, gwMAC net.HardwareAddr, vMAC net.HardwareAddr, nodeConfig *config1.NodeConfig, connectUplinkToBridge bool) []expectTableFlows { flows := []expectTableFlows{ { - "Classification", + "Classifier", []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,in_port=%d", config1.HostGatewayOFPort), - ActStr: "load:0x1->NXM_NX_REG0[0..3],goto_table:SpoofGuard", + ActStr: "load:0x2->NXM_NX_REG0[0..3],goto_table:SpoofGuard", }, }, }, { - "L2Forwarding", + "L2ForwardingCalc", []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,dl_dst=%s", gwMAC.String()), - ActStr: fmt.Sprintf("load:0x%x->NXM_NX_REG1[],load:0x1->NXM_NX_REG0[16],goto_table:IngressMetric", config1.HostGatewayOFPort), + ActStr: fmt.Sprintf("load:0x%x->NXM_NX_REG1[],load:0x1->NXM_NX_REG0[8],goto_table:IngressSecurityClassifier", config1.HostGatewayOFPort), }, }, }, @@ -1180,15 +1158,21 @@ func prepareGatewayFlows(gwIPs []net.IP, gwMAC net.HardwareAddr, vMAC net.Hardwa "SpoofGuard", []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=200,arp,in_port=%d,arp_spa=%s,arp_sha=%s", config1.HostGatewayOFPort, gwIP, gwMAC), - ActStr: "goto_table:ARPResponder", + MatchStr: fmt.Sprintf("priority=200,ip,in_port=%d", config1.HostGatewayOFPort), + ActStr: "goto_table:SNATConntrackZone", }, + }, + }, + expectTableFlows{ + "ARPSpoofGuard", + []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=200,ip,in_port=%d", config1.HostGatewayOFPort), - ActStr: "goto_table:ServiceHairpin", + MatchStr: fmt.Sprintf("priority=200,arp,in_port=%d,arp_spa=%s,arp_sha=%s", config1.HostGatewayOFPort, gwIP, gwMAC), + ActStr: "goto_table:ARPResponder", }, }, - }) + }, + ) if connectUplinkToBridge { flows[len(flows)-1].flows = append(flows[len(flows)-1].flows, &ofTestUtils.ExpectFlow{ MatchStr: fmt.Sprintf("priority=200,arp,in_port=%d,arp_spa=%s,arp_sha=%s", config1.HostGatewayOFPort, nodeConfig.NodeIPv4Addr.IP.String(), gwMAC), @@ -1202,16 +1186,7 @@ func prepareGatewayFlows(gwIPs []net.IP, gwMAC net.HardwareAddr, vMAC net.Hardwa } flows = append(flows, expectTableFlows{ - "L3Forwarding", - []*ofTestUtils.ExpectFlow{ - { - MatchStr: fmt.Sprintf("priority=200,%s,reg0=0x80000/0x80000,%s=%s", ipProtoStr, nwDstStr, gwIP.String()), - ActStr: fmt.Sprintf("set_field:%s->eth_dst,goto_table:L2Forwarding", gwMAC.String()), - }, - }, - }, - expectTableFlows{ - tableName: "IngressRule", + tableName: "IngressSecurityClassifier", flows: []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=210,%s,%s=%s", ipProtoStr, nwSrcStr, gwIP.String()), @@ -1223,12 +1198,12 @@ func prepareGatewayFlows(gwIPs []net.IP, gwMAC net.HardwareAddr, vMAC net.Hardwa "L3Forwarding", []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=210,ct_state=+rpl+trk,ct_mark=0x1/0xf,%s,reg0=0x2/0xf", ipProtoStr), - ActStr: fmt.Sprintf("set_field:%s->eth_dst,goto_table:L2Forwarding", gwMAC.String()), + MatchStr: fmt.Sprintf("priority=210,ct_state=+rpl+trk,ct_mark=0x2/0xf,%s", ipProtoStr), + ActStr: fmt.Sprintf("set_field:%s->eth_dst,load:0x2->NXM_NX_REG0[4..7],goto_table:L3DecTTL", gwMAC.String()), }, { - MatchStr: fmt.Sprintf("priority=210,ct_state=+rpl+trk,ct_mark=0x1/0xf,%s,reg0=0/0xf", ipProtoStr), - ActStr: fmt.Sprintf("set_field:%s->eth_dst,goto_table:L2Forwarding", gwMAC.String()), + MatchStr: fmt.Sprintf("priority=210,%s,%s=%s", ipProtoStr, nwDstStr, gwIP.String()), + ActStr: fmt.Sprintf("set_field:%s->eth_dst,load:0x2->NXM_NX_REG0[4..7],goto_table:L3DecTTL", gwMAC.String()), }, }, }, @@ -1241,20 +1216,20 @@ func prepareGatewayFlows(gwIPs []net.IP, gwMAC net.HardwareAddr, vMAC net.Hardwa func prepareTunnelFlows(tunnelPort uint32, vMAC net.HardwareAddr) []expectTableFlows { return []expectTableFlows{ { - "Classification", + "Classifier", []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,in_port=%d", tunnelPort), - ActStr: "load:0->NXM_NX_REG0[0..3],load:0x1->NXM_NX_REG0[19],goto_table:ConntrackZone", + ActStr: "load:0x1->NXM_NX_REG0[0..3],load:0x1->NXM_NX_REG0[9],goto_table:SNATConntrackZone", }, }, }, { - "L2Forwarding", + "L2ForwardingCalc", []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,dl_dst=%s", vMAC.String()), - ActStr: fmt.Sprintf("load:0x%x->NXM_NX_REG1[],load:0x1->NXM_NX_REG0[16],goto_table:IngressMetric", config1.DefaultTunOFPort), + ActStr: fmt.Sprintf("load:0x%x->NXM_NX_REG1[],load:0x1->NXM_NX_REG0[8],goto_table:IngressSecurityClassifier", config1.DefaultTunOFPort), }, }, }, @@ -1285,7 +1260,7 @@ func prepareNodeFlows(peerSubnet net.IPNet, peerGwIP, peerNodeIP net.IP, vMAC, l []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,%s,%s=%s", ipProtoStr, nwDstFieldName, peerSubnet.String()), - ActStr: fmt.Sprintf("set_field:%s->eth_src,set_field:%s->eth_dst,set_field:%s->tun_dst,goto_table:IPTTLDec", localGwMAC.String(), vMAC.String(), peerNodeIP.String()), + ActStr: fmt.Sprintf("set_field:%s->eth_src,set_field:%s->eth_dst,set_field:%s->tun_dst,load:0x1->NXM_NX_REG0[4..7],goto_table:L3DecTTL", localGwMAC.String(), vMAC.String(), peerNodeIP.String()), }, }, }) @@ -1294,8 +1269,8 @@ func prepareNodeFlows(peerSubnet net.IPNet, peerGwIP, peerNodeIP net.IP, vMAC, l "L3Forwarding", []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=210,%s,reg4=0x200000/0x200000,%s=%s", ipProtoStr, nwDstFieldName, peerSubnet.String()), - ActStr: fmt.Sprintf("set_field:%s->eth_dst,goto_table:L2Forwarding", peerNodeMAC.String()), + MatchStr: fmt.Sprintf("priority=200,%s,reg4=0x100000/0x100000,%s=%s", ipProtoStr, nwDstFieldName, peerSubnet.String()), + ActStr: fmt.Sprintf("set_field:%s->eth_dst,goto_table:L3DecTTL", peerNodeMAC.String()), }, }, }) @@ -1319,80 +1294,160 @@ func prepareServiceHelperFlows() []expectTableFlows { } func prepareDefaultFlows(config *testConfig) []expectTableFlows { - table20Flows := expectTableFlows{ + tableARPResponderFlows := expectTableFlows{ tableName: "ARPResponder", - flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "drop"}}, } - table31Flows := expectTableFlows{ + tableConntrackStateFlows := expectTableFlows{ tableName: "ConntrackState", - flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "resubmit(,SessionAffinity),resubmit(,ServiceLB)"}}, + flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:PreRoutingClassifier"}}, } - table105Flows := expectTableFlows{ + tableConntrackCommitFlows := expectTableFlows{ tableName: "ConntrackCommit", - flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:HairpinSNAT"}}, + flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:Output"}}, + } + tableSNATConntrackCommitFlows := expectTableFlows{ + tableName: "SNATConntrackCommit", + } + tableL3ForwardingFlows := expectTableFlows{ + "L3Forwarding", + []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "load:0x2->NXM_NX_REG0[4..7],goto_table:L2ForwardingCalc"}}, + } + tableL3DecTTLFlows := expectTableFlows{ + tableName: "L3DecTTL", + flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:ServiceMark"}}, } - table72Flows := expectTableFlows{ - tableName: "IPTTLDec", - flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:L2Forwarding"}}, + tableSNATConntrackZoneFlows := expectTableFlows{ + tableName: "SNATConntrackZone", + flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:ConntrackZone"}}, } - table30Flows := expectTableFlows{ + tableConntrackZoneFlows := expectTableFlows{ tableName: "ConntrackZone", + flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:ConntrackState"}}, + } + tableServiceMarkFlows := expectTableFlows{ + tableName: "ServiceMark", + flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:SNATConntrackCommit"}}, } if config.enableIPv4 { - table30Flows.flows = append(table30Flows.flows, + tableARPResponderFlows.flows = append(tableARPResponderFlows.flows, + &ofTestUtils.ExpectFlow{MatchStr: "priority=190,arp", ActStr: "NORMAL"}, + &ofTestUtils.ExpectFlow{MatchStr: "priority=0", ActStr: "drop"}, + ) + tableSNATConntrackZoneFlows.flows = append(tableSNATConntrackZoneFlows.flows, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ip", ActStr: "ct(table=ConntrackZone,zone=65521,nat)"}, + ) + tableConntrackZoneFlows.flows = append(tableConntrackZoneFlows.flows, &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ip", ActStr: "ct(table=ConntrackState,zone=65520,nat)"}, ) - table31Flows.flows = append(table31Flows.flows, + tableConntrackStateFlows.flows = append(tableConntrackStateFlows.flows, &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+inv+trk,ip", ActStr: "drop"}, ) - table105Flows.flows = append(table105Flows.flows, - &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+new+trk,ip", ActStr: "ct(commit,table=HairpinSNAT,zone=65520,exec(move:NXM_NX_REG0[0..3]->NXM_NX_CT_MARK[0..3]))"}, + tableConntrackCommitFlows.flows = append(tableConntrackCommitFlows.flows, + &ofTestUtils.ExpectFlow{MatchStr: "priority=210,ct_mark=0x10/0x10,ip", ActStr: "goto_table:Output"}, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ct_state=+new+trk,ip", ActStr: "ct(commit,table=Output,zone=65520,exec(move:NXM_NX_REG0[0..3]->NXM_NX_CT_MARK[0..3]))"}, + ) + tableSNATConntrackCommitFlows.flows = append(tableSNATConntrackCommitFlows.flows, + &ofTestUtils.ExpectFlow{ + MatchStr: "priority=200,ct_state=+new+trk,ct_mark=0x40/0x40,ip,reg0=0x2/0xf", + ActStr: fmt.Sprintf("ct(commit,table=L2ForwardingCalc,zone=65521,nat(src=%s),exec(load:0x1->NXM_NX_CT_MARK[4],load:0x1->NXM_NX_CT_MARK[6]))", config1.VirtualServiceIPv4), + }, + &ofTestUtils.ExpectFlow{ + MatchStr: "priority=200,ct_state=+new+trk,ct_mark=0x40/0x40,ip,reg0=0x3/0xf", + ActStr: fmt.Sprintf("ct(commit,table=L2ForwardingCalc,zone=65521,nat(src=%s),exec(load:0x1->NXM_NX_CT_MARK[4],load:0x1->NXM_NX_CT_MARK[6]))", config.nodeConfig.GatewayConfig.IPv4), + }, + &ofTestUtils.ExpectFlow{ + MatchStr: "priority=190,ct_state=+new+trk,ct_mark=0x20/0x20,ip,reg0=0x2/0xf", + ActStr: fmt.Sprintf("ct(commit,table=L2ForwardingCalc,zone=65521,nat(src=%s),exec(load:0x1->NXM_NX_CT_MARK[4]))", config.nodeConfig.GatewayConfig.IPv4), + }, + &ofTestUtils.ExpectFlow{ + MatchStr: "priority=200,ct_state=-new-rpl+trk,ct_mark=0x20/0x20,ip", + ActStr: "ct(table=L2ForwardingCalc,zone=65521,nat)", + }, + ) + podCIDR := config.nodeConfig.PodIPv4CIDR.String() + tableL3ForwardingFlows.flows = append(tableL3ForwardingFlows.flows, + &ofTestUtils.ExpectFlow{MatchStr: fmt.Sprintf("priority=190,ip,reg0=0/0x200,nw_dst=%s", podCIDR), ActStr: "goto_table:L2ForwardingCalc"}, ) - table72Flows.flows = append(table72Flows.flows, - &ofTestUtils.ExpectFlow{MatchStr: "priority=210,ip,reg0=0x1/0xf", ActStr: "goto_table:L2Forwarding"}, - &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ip", ActStr: "dec_ttl,goto_table:L2Forwarding"}, + tableServiceMarkFlows.flows = append(tableServiceMarkFlows.flows, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ct_state=+new+trk,ip,reg0=0x22/0xff", ActStr: "ct(commit,table=SNATConntrackCommit,zone=65520,exec(load:0x1->NXM_NX_CT_MARK[5],load:0x1->NXM_NX_CT_MARK[6]))"}, + &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+new+trk,ip,reg0=0x2/0xf,reg4=0x200000/0x200000", ActStr: "ct(commit,table=SNATConntrackCommit,zone=65520,exec(load:0x1->NXM_NX_CT_MARK[5]))"}, + ) + tableL3DecTTLFlows.flows = append(tableL3DecTTLFlows.flows, + &ofTestUtils.ExpectFlow{MatchStr: "priority=210,ip,reg0=0x2/0xf", ActStr: "goto_table:ServiceMark"}, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ip", ActStr: "dec_ttl,goto_table:ServiceMark"}, ) } if config.enableIPv6 { - table30Flows.flows = append(table30Flows.flows, + tableSNATConntrackZoneFlows.flows = append(tableSNATConntrackZoneFlows.flows, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ipv6", ActStr: "ct(table=ConntrackZone,zone=65511,nat)"}, + ) + tableConntrackZoneFlows.flows = append(tableConntrackZoneFlows.flows, &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ipv6", ActStr: "ct(table=ConntrackState,zone=65510,nat)"}, ) - table31Flows.flows = append(table31Flows.flows, + tableConntrackStateFlows.flows = append(tableConntrackStateFlows.flows, &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+inv+trk,ipv6", ActStr: "drop"}, ) - table105Flows.flows = append(table105Flows.flows, - &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+new+trk,ipv6", ActStr: "ct(commit,table=HairpinSNAT,zone=65510,exec(move:NXM_NX_REG0[0..3]->NXM_NX_CT_MARK[0..3]))"}, + tableConntrackCommitFlows.flows = append(tableConntrackCommitFlows.flows, + &ofTestUtils.ExpectFlow{MatchStr: "priority=210,ct_mark=0x10/0x10,ipv6", ActStr: "goto_table:Output"}, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ct_state=+new+trk,ipv6", ActStr: "ct(commit,table=Output,zone=65510,exec(move:NXM_NX_REG0[0..3]->NXM_NX_CT_MARK[0..3]))"}, + ) + tableSNATConntrackCommitFlows.flows = append(tableSNATConntrackCommitFlows.flows, + &ofTestUtils.ExpectFlow{ + MatchStr: "priority=200,ct_state=+new+trk,ct_mark=0x40/0x40,ipv6,reg0=0x2/0xf", + ActStr: fmt.Sprintf("ct(commit,table=L2ForwardingCalc,zone=65511,nat(src=%s),exec(load:0x1->NXM_NX_CT_MARK[4],load:0x1->NXM_NX_CT_MARK[6]))", config1.VirtualServiceIPv6), + }, + &ofTestUtils.ExpectFlow{ + MatchStr: "priority=200,ct_state=+new+trk,ct_mark=0x40/0x40,ipv6,reg0=0x3/0xf", + ActStr: fmt.Sprintf("ct(commit,table=L2ForwardingCalc,zone=65511,nat(src=%s),exec(load:0x1->NXM_NX_CT_MARK[4],load:0x1->NXM_NX_CT_MARK[6]))", config.nodeConfig.GatewayConfig.IPv6), + }, + &ofTestUtils.ExpectFlow{ + MatchStr: "priority=190,ct_state=+new+trk,ct_mark=0x20/0x20,ipv6,reg0=0x2/0xf", + ActStr: fmt.Sprintf("ct(commit,table=L2ForwardingCalc,zone=65511,nat(src=%s),exec(load:0x1->NXM_NX_CT_MARK[4]))", config.nodeConfig.GatewayConfig.IPv6), + }, + &ofTestUtils.ExpectFlow{ + MatchStr: "priority=200,ct_state=-new-rpl+trk,ct_mark=0x20/0x20,ipv6", + ActStr: "ct(table=L2ForwardingCalc,zone=65511,nat)", + }, + ) + podCIDR := config.nodeConfig.PodIPv6CIDR.String() + tableL3ForwardingFlows.flows = append(tableL3ForwardingFlows.flows, + &ofTestUtils.ExpectFlow{MatchStr: fmt.Sprintf("priority=190,ipv6,reg0=0/0x200,ipv6_dst=%s", podCIDR), ActStr: "goto_table:L2ForwardingCalc"}, + ) + tableServiceMarkFlows.flows = append(tableServiceMarkFlows.flows, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ct_state=+new+trk,ipv6,reg0=0x22/0xff", ActStr: "ct(commit,table=SNATConntrackCommit,zone=65510,exec(load:0x1->NXM_NX_CT_MARK[5],load:0x1->NXM_NX_CT_MARK[6]))"}, + &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+new+trk,ipv6,reg0=0x2/0xf,reg4=0x200000/0x200000", ActStr: "ct(commit,table=SNATConntrackCommit,zone=65510,exec(load:0x1->NXM_NX_CT_MARK[5]))"}, ) - table72Flows.flows = append(table72Flows.flows, - &ofTestUtils.ExpectFlow{MatchStr: "priority=210,ipv6,reg0=0x1/0xf", ActStr: "goto_table:L2Forwarding"}, - &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ipv6", ActStr: "dec_ttl,goto_table:L2Forwarding"}, + tableL3DecTTLFlows.flows = append(tableL3DecTTLFlows.flows, + &ofTestUtils.ExpectFlow{MatchStr: "priority=210,ipv6,reg0=0x2/0xf", ActStr: "goto_table:ServiceMark"}, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ipv6", ActStr: "dec_ttl,goto_table:ServiceMark"}, ) } - if config.connectUplinkToBridge { - table20Flows.flows = append(table20Flows.flows, + if config.enableIPv4 && config.connectUplinkToBridge { + tableARPResponderFlows.flows = append(tableARPResponderFlows.flows, &ofTestUtils.ExpectFlow{ MatchStr: fmt.Sprintf("priority=200,arp,arp_tpa=%s,arp_op=1", config.nodeConfig.GatewayConfig.IPv4.String()), ActStr: fmt.Sprintf("move:NXM_OF_ETH_SRC[]->NXM_OF_ETH_DST[],set_field:%s->eth_src,load:0x2->NXM_OF_ARP_OP[],move:NXM_NX_ARP_SHA[]->NXM_NX_ARP_THA[],set_field:%s->arp_sha,move:NXM_OF_ARP_SPA[]->NXM_OF_ARP_TPA[],set_field:%s->arp_spa,IN_PORT", config.nodeConfig.GatewayConfig.MAC.String(), config.nodeConfig.GatewayConfig.MAC.String(), config.nodeConfig.GatewayConfig.IPv4.String()), }, ) } - return []expectTableFlows{ - table20Flows, table30Flows, table31Flows, table105Flows, table72Flows, + + tableFlows := []expectTableFlows{ + tableConntrackZoneFlows, + tableConntrackStateFlows, + tableConntrackCommitFlows, + tableSNATConntrackCommitFlows, + tableL3ForwardingFlows, + tableL3DecTTLFlows, + tableSNATConntrackZoneFlows, + tableServiceMarkFlows, { - "Classification", + "Classifier", []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "drop"}}, }, { "SpoofGuard", []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "drop"}}, }, - { - "ARPResponder", - []*ofTestUtils.ExpectFlow{ - {MatchStr: "priority=190,arp", ActStr: "NORMAL"}, - {MatchStr: "priority=0", ActStr: "drop"}, - }, - }, { "EndpointDNAT", []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:EgressRule"}}, @@ -1410,12 +1465,16 @@ func prepareDefaultFlows(config *testConfig) []expectTableFlows { []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:L3Forwarding"}}, }, { - "L3Forwarding", - []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:L2Forwarding"}}, + "L2ForwardingCalc", + []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:IngressSecurityClassifier"}}, }, { - "L2Forwarding", - []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:IngressMetric"}}, + "IngressSecurityClassifier", + []*ofTestUtils.ExpectFlow{ + {MatchStr: "priority=0", ActStr: "goto_table:IngressRule"}, + {MatchStr: "priority=200,reg0=0x20/0xf0", ActStr: "goto_table:IngressMetric"}, + {MatchStr: "priority=200,reg0=0x10/0xf0", ActStr: "goto_table:IngressMetric"}, + }, }, { "IngressRule", @@ -1432,10 +1491,14 @@ func prepareDefaultFlows(config *testConfig) []expectTableFlows { { "Output", []*ofTestUtils.ExpectFlow{ - {MatchStr: "priority=200,ip,reg0=0x10000/0x10000", ActStr: "output:NXM_NX_REG1[]"}, + {MatchStr: "priority=200,reg0=0x100/0x100", ActStr: "output:NXM_NX_REG1[]"}, }, }, } + if config.enableIPv4 { + tableFlows = append(tableFlows, tableARPResponderFlows) + } + return tableFlows } func prepareIPAddresses(addresses []string) []types.Address { @@ -1456,55 +1519,38 @@ func prepareIPNetAddresses(addresses []string) []types.Address { return ipAddresses } -func expectedExternalFlows(nodeIP net.IP, localSubnet *net.IPNet, gwMAC net.HardwareAddr) []expectTableFlows { - var ipProtoStr, nwDstFieldName string - if localSubnet.IP.To4() != nil { - ipProtoStr = "ip" - nwDstFieldName = "nw_dst" - } else { - ipProtoStr = "ipv6" - nwDstFieldName = "ipv6_dst" - } +func expectedExternalFlows(ipProtoStr, gwMACStr string) []expectTableFlows { return []expectTableFlows{ { - // snatCommonFlows() "L3Forwarding", []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=200,%s,reg0=0/0x80000,%s=%s", ipProtoStr, nwDstFieldName, localSubnet.String()), - ActStr: "goto_table:L2Forwarding", - }, - { - MatchStr: fmt.Sprintf("priority=200,%s,reg0=0x2/0xf,%s=%s", ipProtoStr, nwDstFieldName, nodeIP.String()), - ActStr: "goto_table:L2Forwarding", + MatchStr: fmt.Sprintf("priority=190,ct_state=-rpl+trk,%s,reg0=0x3/0xf", ipProtoStr), + ActStr: "goto_table:EgressMark", }, { - MatchStr: fmt.Sprintf("priority=190,ct_state=-rpl+trk,%s,reg0=0x2/0xf", ipProtoStr), - ActStr: "goto_table:SNAT", - }, - { - MatchStr: fmt.Sprintf("priority=190,ct_state=-rpl+trk,%s,reg0=0/0xf", ipProtoStr), - ActStr: fmt.Sprintf("set_field:%s->eth_dst,goto_table:SNAT", gwMAC.String()), + MatchStr: fmt.Sprintf("priority=190,ct_state=-rpl+trk,%s,reg0=0x1/0xf", ipProtoStr), + ActStr: fmt.Sprintf("set_field:%s->eth_dst,goto_table:EgressMark", gwMACStr), }, }, }, { - "SNAT", + "EgressMark", []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=190,ct_state=+new+trk,%s,reg0=0/0xf", ipProtoStr), + MatchStr: fmt.Sprintf("priority=190,ct_state=+new+trk,%s,reg0=0x1/0xf", ipProtoStr), ActStr: "drop", }, { MatchStr: "priority=0", - ActStr: "goto_table:L2Forwarding", + ActStr: "load:0x2->NXM_NX_REG0[4..7],goto_table:L2ForwardingCalc", }, }, }, } } -func prepareSNATFlows(snatIP net.IP, mark, podOFPort, podOFPortRemote uint32, vMAC, localGwMAC net.HardwareAddr) []expectTableFlows { +func prepareEgressMarkFlows(snatIP net.IP, mark, podOFPort, podOFPortRemote uint32, vMAC, localGwMAC net.HardwareAddr) []expectTableFlows { var ipProtoStr, tunDstFieldName string if snatIP.To4() != nil { tunDstFieldName = "tun_dst" @@ -1515,26 +1561,26 @@ func prepareSNATFlows(snatIP net.IP, mark, podOFPort, podOFPortRemote uint32, vM } return []expectTableFlows{ { - "SNAT", + "EgressMark", []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,ct_state=+new+trk,%s,%s=%s", ipProtoStr, tunDstFieldName, snatIP), - ActStr: fmt.Sprintf("load:0x%x->NXM_NX_PKT_MARK[0..7],goto_table:IPTTLDec", mark), + ActStr: fmt.Sprintf("load:0x%x->NXM_NX_PKT_MARK[0..7],load:0x2->NXM_NX_REG0[4..7],goto_table:L2ForwardingCalc", mark), }, { MatchStr: fmt.Sprintf("priority=200,ct_state=+new+trk,%s,in_port=%d", ipProtoStr, podOFPort), - ActStr: fmt.Sprintf("load:0x%x->NXM_NX_PKT_MARK[0..7],goto_table:L2Forwarding", mark), + ActStr: fmt.Sprintf("load:0x%x->NXM_NX_PKT_MARK[0..7],load:0x2->NXM_NX_REG0[4..7],goto_table:L2ForwardingCalc", mark), }, { MatchStr: fmt.Sprintf("priority=200,%s,in_port=%d", ipProtoStr, podOFPortRemote), - ActStr: fmt.Sprintf("set_field:%s->eth_src,set_field:%s->eth_dst,set_field:%s->%s,goto_table:IPTTLDec", localGwMAC.String(), vMAC.String(), snatIP, tunDstFieldName), + ActStr: fmt.Sprintf("set_field:%s->eth_src,set_field:%s->eth_dst,set_field:%s->%s,load:0x1->NXM_NX_REG0[4..7],goto_table:L2ForwardingCalc", localGwMAC.String(), vMAC.String(), snatIP, tunDstFieldName), }, }, }, } } -func TestSNATFlows(t *testing.T) { +func TestEgressMarkFlows(t *testing.T) { // Reset OVS metrics (Prometheus) and reinitialize them to test. legacyregistry.Reset() metrics.InitializeOVSMetrics() @@ -1543,8 +1589,8 @@ func TestSNATFlows(t *testing.T) { err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge %s", br)) - config := prepareConfiguration() - _, err = c.Initialize(roundInfo, config.nodeConfig, &config1.NetworkConfig{TrafficEncapMode: config1.TrafficEncapModeEncap, IPv4Enabled: true, IPv6Enabled: true}) + config := prepareConfiguration(true, true) + _, err = c.Initialize(roundInfo, config.nodeConfig, &config1.NetworkConfig{TrafficEncapMode: config1.TrafficEncapModeEncap}) require.Nil(t, err, "Failed to initialize OFClient") defer func() { @@ -1553,6 +1599,7 @@ func TestSNATFlows(t *testing.T) { err = ofTestUtils.DeleteOVSBridge(br) assert.Nil(t, err, fmt.Sprintf("Error while deleting OVS bridge: %v", err)) ofClient.CleanOFTableCache() + ofClient.ResetOFTable() }() snatIP := net.ParseIP("10.10.10.14") @@ -1566,8 +1613,8 @@ func TestSNATFlows(t *testing.T) { vMAC := config.globalMAC gwMAC := config.nodeConfig.GatewayConfig.MAC - expectedFlows := append(prepareSNATFlows(snatIP, snatMark, podOFPort, podOFPortRemote, vMAC, gwMAC), - prepareSNATFlows(snatIPV6, snatMarkV6, podOFPortV6, podOFPortRemoteV6, vMAC, gwMAC)...) + expectedFlows := append(prepareEgressMarkFlows(snatIP, snatMark, podOFPort, podOFPortRemote, vMAC, gwMAC), + prepareEgressMarkFlows(snatIPV6, snatMarkV6, podOFPortV6, podOFPortRemoteV6, vMAC, gwMAC)...) c.InstallSNATMarkFlows(snatIP, snatMark) c.InstallSNATMarkFlows(snatIPV6, snatMarkV6) diff --git a/test/integration/ovs/ofctrl_test.go b/test/integration/ovs/ofctrl_test.go index d6ba04c71c7..9e2b127071c 100644 --- a/test/integration/ovs/ofctrl_test.go +++ b/test/integration/ovs/ofctrl_test.go @@ -68,12 +68,11 @@ var ( vMAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:ff") ipDSCP = uint8(10) - - t0 = binding.NewOFTable(0, "t0") - t1 = binding.NewOFTable(1, "t1") - t2 = binding.NewOFTable(2, "t2") - t3 = binding.NewOFTable(3, "t3") - t4 = binding.NewOFTable(4, "t4") + t0 = binding.NewOFTable(0, "t0", 0, 0, 0) + t1 = binding.NewOFTable(1, "t1", 0, 0, 0) + t2 = binding.NewOFTable(2, "t2", 0, 0, 0) + t3 = binding.NewOFTable(3, "t3", 0, 0, 0) + t4 = binding.NewOFTable(4, "t4", 0, 0, 0) ) func newOFBridge(brName string) binding.Bridge { @@ -129,7 +128,7 @@ func prepareOverlapFlows(table binding.Table, ipStr string, sameCookie bool) ([] table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). Cookie(cookie2). MatchSrcIP(srcIP). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), } expectFlows := []*ExpectFlow{ @@ -447,7 +446,7 @@ func TestBundleErrorWhenOVSRestart(t *testing.T) { flows := []binding.Flow{table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). Cookie(getCookieID()). MatchInPort(uint32(count + 1)). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done()} err = bridge.AddFlowsInBundle(flows, nil, nil) if err != nil { @@ -649,7 +648,7 @@ func TestPacketOutIn(t *testing.T) { Action().LoadToRegField(reg2Field, reg2Data). Action().LoadToRegField(reg3Field, reg3Data). Action().SetTunnelDst(tunDst). - Action().ResubmitToTable(table0.GetNext()). + Action().ResubmitToTables(table0.GetNext()). Done() flow1 := table1.BuildFlow(100). MatchSrcMAC(srcMAC).MatchDstMAC(dstcMAC). @@ -686,14 +685,14 @@ func TestTLVMap(t *testing.T) { time.Sleep(1 * time.Second) flow1 := table.BuildFlow(100). MatchProtocol(binding.ProtocolIP).MatchTunMetadata(0, 0x1234). - Action().ResubmitToTable(table.GetNext()). + Action().NextTable(). Done() err = bridge.AddFlowsInBundle([]binding.Flow{flow1}, nil, nil) require.Nil(t, err) expectedFlows := []*ExpectFlow{ { MatchStr: "priority=100,ip,tun_metadata0=0x1234", - ActStr: fmt.Sprintf("resubmit(,%d)", table.GetNext()), + ActStr: fmt.Sprintf("goto_table:%d", table.GetNext()), }, } ovsCtlClient := ovsctl.NewClient(br) @@ -719,14 +718,14 @@ func TestMoveTunMetadata(t *testing.T) { flow1 := table.BuildFlow(100). MatchProtocol(binding.ProtocolIP).MatchTunMetadata(0, 0x1234). Action().MoveRange("NXM_NX_TUN_METADATA0", "NXM_NX_REG0", binding.Range{28, 31}, binding.Range{28, 31}). - Action().ResubmitToTable(table.GetNext()). + Action().NextTable(). Done() err = bridge.AddFlowsInBundle([]binding.Flow{flow1}, nil, nil) require.Nil(t, err) expectedFlows := []*ExpectFlow{ { MatchStr: "priority=100,ip,tun_metadata0=0x1234", - ActStr: fmt.Sprintf("move:NXM_NX_TUN_METADATA0[28..31]->NXM_NX_REG0[28..31],resubmit(,%d)", table.GetNext()), + ActStr: fmt.Sprintf("move:NXM_NX_TUN_METADATA0[28..31]->NXM_NX_REG0[28..31],goto_table:%d", table.GetNext()), }, } ovsCtlClient := ovsctl.NewClient(br) @@ -760,7 +759,7 @@ func TestFlowWithCTMatchers(t *testing.T) { MatchCTSrcPort(ctPortSrc). MatchCTDstPort(ctPortDst). MatchCTProtocol(binding.ProtocolTCP). - Action().ResubmitToTable(table.GetNext()). + Action().NextTable(). Done() flow2 := table.BuildFlow(priority). MatchProtocol(binding.ProtocolIP). @@ -768,17 +767,17 @@ func TestFlowWithCTMatchers(t *testing.T) { MatchCTSrcIPNet(*ctIPSrcNet). MatchCTDstIPNet(*ctIPDstNet). MatchCTProtocol(binding.ProtocolTCP). - Action().ResubmitToTable(table.GetNext()). + Action().NextTable(). Done() expectFlows := []*ExpectFlow{ {fmt.Sprintf("priority=%d,ct_state=+new,ct_nw_src=%s,ct_nw_dst=%s,ct_nw_proto=6,ct_tp_src=%d,ct_tp_dst=%d,ip", priority, ctIPSrc.String(), ctIPDst.String(), ctPortSrc, ctPortDst), - fmt.Sprintf("resubmit(,%d)", table.GetNext()), + fmt.Sprintf("goto_table:%d", table.GetNext()), }, { fmt.Sprintf("priority=%d,ct_state=+est,ct_nw_src=%s,ct_nw_dst=%s,ct_nw_proto=6,ip", priority, ctIPSrcNet.String(), ctIPDstNet.String()), - fmt.Sprintf("resubmit(,%d)", table.GetNext()), + fmt.Sprintf("goto_table:%d", table.GetNext()), }, } for _, f := range []binding.Flow{flow1, flow2} { @@ -814,7 +813,7 @@ func TestNoteAction(t *testing.T) { MatchProtocol(binding.ProtocolIP). MatchSrcIP(srcIP). Action().Note(testNotes). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done() convertNoteToHex := func(note string) string { @@ -861,21 +860,21 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { Cookie(getCookieID()). MatchInPort(podOFport). Action().LoadRegMark(fromLocalMark). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). Cookie(getCookieID()). MatchInPort(podOFport). MatchARPSha(podMAC). MatchARPSpa(podIP). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). Cookie(getCookieID()). MatchInPort(podOFport). MatchSrcMAC(podMAC). MatchSrcIP(podIP). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). Cookie(getCookieID()). @@ -902,7 +901,7 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { LoadFieldToField(regField0, regField0). LoadRegMark(mark1). Done(). // Finish learn action. - Action().ResubmitToTable(table.GetID()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). Cookie(getCookieID()). @@ -913,7 +912,7 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { MatchRegMark(fromGatewayMark). MatchCTMark(gatewayCTMark). MatchCTStateNew(false).MatchCTStateTrk(true). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). Cookie(getCookieID()). @@ -930,7 +929,7 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { MatchCTMark(gatewayCTMark). MatchCTStateNew(false).MatchCTStateTrk(true). Action().LoadRange(binding.NxmFieldDstMAC, gwMACData, &binding.Range{0, 47}). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). Cookie(getCookieID()). @@ -949,7 +948,7 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { Action().SetSrcMAC(gwMAC). Action().SetDstMAC(podMAC). Action().DecTTL(). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). Cookie(getCookieID()). @@ -958,7 +957,7 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { Action().SetSrcMAC(gwMAC). Action().SetDstMAC(vMAC). Action().SetTunnelDst(tunnelPeer). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIPv6). Cookie(getCookieID()). @@ -967,20 +966,20 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { Action().SetSrcMAC(gwMAC). Action().SetDstMAC(vMAC). Action().SetTunnelDst(tunnelPeerIPv6). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). Cookie(getCookieID()). MatchDstIP(gwIP). Action().SetDstMAC(gwMAC). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal). Cookie(getCookieID()). MatchDstMAC(podMAC). Action().LoadToRegField(portCacheField, podOFport). Action().LoadRegMark(portFoundMark). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal). Cookie(getCookieID()). @@ -997,7 +996,7 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { MatchProtocol(binding.ProtocolIP). MatchSrcIP(podIP). MatchIPDSCP(ipDSCP). - Action().GotoTable(table.GetNext()). + Action().NextTable(). Done(), table.BuildFlow(priorityNormal+20).MatchProtocol(binding.ProtocolTCP).Cookie(getCookieID()).MatchDstPort(uint16(8080), nil). Action().Conjunction(uint32(1001), uint8(3), uint8(3)).Done(), @@ -1010,9 +1009,9 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { table.BuildFlow(priorityNormal+20).MatchProtocol(binding.ProtocolIP).Cookie(getCookieID()).MatchRegFieldWithValue(portCacheField, podOFport). Action().Conjunction(uint32(1001), uint8(2), uint8(3)).Done(), table.BuildFlow(priorityNormal+20).MatchProtocol(binding.ProtocolIP).Cookie(getCookieID()).MatchConjID(1001). - Action().GotoTable(table.GetNext()).Done(), + Action().NextTable().Done(), table.BuildFlow(priorityNormal+20).MatchProtocol(binding.ProtocolIP).Cookie(getCookieID()).MatchConjID(1001).MatchSrcIP(gwIP). - Action().GotoTable(table.GetNext()).Done(), + Action().NextTable().Done(), ) gotoTableAction := fmt.Sprintf("goto_table:%d", table.GetNext()) @@ -1023,7 +1022,7 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { &ExpectFlow{"priority=200,ip,in_port=3,dl_src=aa:aa:aa:aa:aa:13,nw_src=192.168.1.3", gotoTableAction}, &ExpectFlow{"priority=200,arp,arp_tpa=192.168.2.1,arp_op=1", "move:NXM_OF_ETH_SRC[]->NXM_OF_ETH_DST[],set_field:aa:bb:cc:dd:ee:ff->eth_src,load:0x2->NXM_OF_ARP_OP[],move:NXM_NX_ARP_SHA[]->NXM_NX_ARP_THA[],set_field:aa:bb:cc:dd:ee:ff->arp_sha,move:NXM_OF_ARP_SPA[]->NXM_OF_ARP_TPA[],set_field:192.168.2.1->arp_spa,IN_PORT"}, &ExpectFlow{"priority=190,arp", "NORMAL"}, - &ExpectFlow{"priority=200,tcp", fmt.Sprintf("learn(table=%d,idle_timeout=10,priority=190,delete_learned,cookie=0x1,eth_type=0x800,nw_proto=6,NXM_OF_TCP_DST[],NXM_NX_REG0[0..15]=0xfff,load:NXM_NX_REG0[0..15]->NXM_NX_REG0[0..15],load:0xffe->NXM_NX_REG0[16..31]),resubmit(,%d)", table.GetID(), table.GetID())}, + &ExpectFlow{"priority=200,tcp", fmt.Sprintf("learn(table=%d,idle_timeout=10,priority=190,delete_learned,cookie=0x1,eth_type=0x800,nw_proto=6,NXM_OF_TCP_DST[],NXM_NX_REG0[0..15]=0xfff,load:NXM_NX_REG0[0..15]->NXM_NX_REG0[0..15],load:0xffe->NXM_NX_REG0[16..31]),goto_table:%d", table.GetID(), table.GetNext())}, &ExpectFlow{"priority=200,ip", fmt.Sprintf("ct(table=%d,zone=65520)", table.GetNext())}, &ExpectFlow{"priority=210,ct_state=-new+trk,ct_mark=0x2/0x2,ip,reg0=0x1/0xffff", gotoTableAction}, &ExpectFlow{"priority=200,ct_state=+new+trk,ip,reg0=0x1/0xffff", fmt.Sprintf("ct(commit,table=%d,zone=65520,exec(load:0x1->NXM_NX_CT_MARK[1])", table.GetNext())},