From 19fb48edf37b5b0f5bf35f3dc0cebf9551d73389 Mon Sep 17 00:00:00 2001 From: Hongliang Liu Date: Wed, 24 Nov 2021 17:45:09 +0800 Subject: [PATCH] Flexible pipeline #1: Architecture Signed-off-by: Hongliang Liu --- pkg/agent/openflow/egress.go | 41 + pkg/agent/openflow/feature_templates.go | 214 ++ pkg/agent/openflow/fields.go | 69 +- pkg/agent/openflow/interfaces.go | 93 + pkg/agent/openflow/network_policy.go | 68 + pkg/agent/openflow/pipeline.go | 2355 ++------------------- pkg/agent/openflow/pipeline_other.go | 98 - pkg/agent/openflow/pipeline_test.go | 237 +++ pkg/agent/openflow/pipeline_windows.go | 140 -- pkg/agent/openflow/pod_connectivity.go | 76 + pkg/agent/openflow/service.go | 88 + pkg/agent/openflow/traceflow.go | 68 + pkg/agent/openflow/vm_connectivity.go | 22 + pkg/ovs/openflow/interfaces.go | 33 +- pkg/ovs/openflow/ofctrl_action.go | 23 +- pkg/ovs/openflow/ofctrl_bridge.go | 32 +- pkg/ovs/openflow/ofctrl_pipeline.go | 113 + pkg/ovs/openflow/testing/mock_openflow.go | 58 +- 18 files changed, 1370 insertions(+), 2458 deletions(-) create mode 100644 pkg/agent/openflow/egress.go create mode 100644 pkg/agent/openflow/feature_templates.go create mode 100644 pkg/agent/openflow/interfaces.go delete mode 100644 pkg/agent/openflow/pipeline_other.go create mode 100644 pkg/agent/openflow/pipeline_test.go delete mode 100644 pkg/agent/openflow/pipeline_windows.go create mode 100644 pkg/agent/openflow/pod_connectivity.go create mode 100644 pkg/agent/openflow/service.go create mode 100644 pkg/agent/openflow/traceflow.go create mode 100644 pkg/agent/openflow/vm_connectivity.go create mode 100644 pkg/ovs/openflow/ofctrl_pipeline.go diff --git a/pkg/agent/openflow/egress.go b/pkg/agent/openflow/egress.go new file mode 100644 index 00000000000..2bc1872e3ff --- /dev/null +++ b/pkg/agent/openflow/egress.go @@ -0,0 +1,41 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "antrea.io/antrea/pkg/agent/openflow/cookie" + binding "antrea.io/antrea/pkg/ovs/openflow" +) + +type featureEgress struct { + cookieAllocator cookie.Allocator + + snatFlowCache *flowCategoryCache + hostNetworkingFlows []binding.Flow + + enableProxy bool +} + +func (c *featureEgress) getFeatureID() featureID { + return Egress +} + +func newFeatureEgress(cookieAllocator cookie.Allocator, enableProxy bool) feature { + return &featureEgress{ + snatFlowCache: newFlowCategoryCache(), + cookieAllocator: cookieAllocator, + enableProxy: enableProxy, + } +} diff --git a/pkg/agent/openflow/feature_templates.go b/pkg/agent/openflow/feature_templates.go new file mode 100644 index 00000000000..5bae85be979 --- /dev/null +++ b/pkg/agent/openflow/feature_templates.go @@ -0,0 +1,214 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + binding "antrea.io/antrea/pkg/ovs/openflow" + "antrea.io/antrea/pkg/util/runtime" +) + +func (c *featurePodConnectivity) getTemplate(protocol ofProtocol) *pipelineTemplate { + var template *pipelineTemplate + if protocol == ofProtocolIP { + template = &pipelineTemplate{ + stageTables: map[binding.StageID][]tableRequest{ + binding.ClassifierStage: { + tableRequest{ClassifierTable, 0x7f}, + }, + binding.ValidationStage: { + tableRequest{SpoofGuardTable, 0x7f}, + }, + binding.ConntrackStateStage: { + tableRequest{ConntrackTable, 0x7f}, + tableRequest{ConntrackStateTable, 0x6f}, + }, + binding.RoutingStage: { + tableRequest{L3ForwardingTable, 0x7f}, + tableRequest{L3DecTTLTable, 0x7d}, + }, + binding.SwitchingStage: { + tableRequest{L2ForwardingCalcTable, 0x7f}, + }, + binding.ConntrackStage: { + tableRequest{ConntrackCommitTable, 0x7f}, + }, + binding.OutputStage: { + tableRequest{L2ForwardingOutTable, 0x7f}, + }, + }, + } + for _, ipProtocol := range c.ipProtocols { + if ipProtocol == binding.ProtocolIPv6 { + template.stageTables[binding.ValidationStage] = append(template.stageTables[binding.ValidationStage], tableRequest{IPv6Table, 0x7e}) + break + } + } + } else if protocol == ofProtocolARP { + template = &pipelineTemplate{ + stageTables: map[binding.StageID][]tableRequest{ + binding.ValidationStage: { + tableRequest{ARPSpoofGuardTable, 0x7f}, + }, + binding.OutputStage: { + tableRequest{ARPResponderTable, 0x7f}, + }, + }, + } + } + + return template +} + +func (c *featureNetworkPolicy) getTemplate(protocol ofProtocol) *pipelineTemplate { + var template *pipelineTemplate + if protocol != ofProtocolIP { + return template + } + template = &pipelineTemplate{ + stageTables: map[binding.StageID][]tableRequest{ + binding.ValidationStage: { + tableRequest{SpoofGuardTable, 0x7f}, + }, + binding.ConntrackStateStage: { + tableRequest{SNATConntrackTable, 0xff}, + }, + binding.EgressSecurityStage: { + tableRequest{EgressRuleTable, 0x7f}, + tableRequest{EgressDefaultTable, 0x7e}, + tableRequest{EgressMetricTable, 0x7d}, + }, + binding.RoutingStage: { + tableRequest{L3ForwardingTable, 0x7f}, + }, + binding.IngressSecurityStage: { + tableRequest{IngressRuleTable, 0x7f}, + tableRequest{IngressDefaultTable, 0x7e}, + tableRequest{IngressMetricTable, 0x7d}, + }, + binding.ConntrackStage: { + tableRequest{ConntrackCommitTable, 0x7f}, + }, + }, + } + if c.enableAntreaPolicy { + template.stageTables[binding.EgressSecurityStage] = append(template.stageTables[binding.EgressSecurityStage], + tableRequest{AntreaPolicyEgressRuleTable, 0x8f}, + ) + template.stageTables[binding.IngressSecurityStage] = append(template.stageTables[binding.IngressSecurityStage], + tableRequest{AntreaPolicyIngressRuleTable, 0x8f}, + ) + } + return template +} + +func (c *featureService) getTemplate(protocol ofProtocol) *pipelineTemplate { + var template *pipelineTemplate + if protocol != ofProtocolIP { + return template + } + if c.enableProxy { + template = &pipelineTemplate{ + stageTables: map[binding.StageID][]tableRequest{ + binding.ConntrackStateStage: { + tableRequest{SNATConntrackTable, 0xff}, + }, + binding.PreRoutingStage: { + tableRequest{SessionAffinityTable, 0x7f}, + tableRequest{ServiceLBTable, 0x7e}, + tableRequest{EndpointDNATTable, 0x7d}, + }, + binding.RoutingStage: { + tableRequest{L3ForwardingTable, 0x7f}, + tableRequest{ServiceHairpinMarkTable, 0x7e}, + }, + binding.PostRoutingStage: { + tableRequest{SNATConntrackCommitTable, 0x7e}, + }, + binding.ConntrackStage: { + tableRequest{ConntrackCommitTable, 0x7f}, + }, + binding.OutputStage: { + tableRequest{L2ForwardingOutTable, 0x7f}, + }, + }, + } + if runtime.IsWindowsPlatform() { + template.stageTables[binding.ValidationStage] = append(template.stageTables[binding.ValidationStage], tableRequest{UplinkTable, 0x8f}) + } + if c.proxyAll { + template.stageTables[binding.PreRoutingStage] = append(template.stageTables[binding.PreRoutingStage], tableRequest{NodePortProbeTable, 0x8f}) + } + } else { + template = &pipelineTemplate{ + stageTables: map[binding.StageID][]tableRequest{ + binding.PreRoutingStage: { + tableRequest{DNATTable, 0x7f}, + }, + }, + } + } + return template +} + +func (c *featureEgress) getTemplate(protocol ofProtocol) *pipelineTemplate { + var template *pipelineTemplate + if protocol != ofProtocolIP { + return template + } + template = &pipelineTemplate{ + stageTables: map[binding.StageID][]tableRequest{ + binding.RoutingStage: { + tableRequest{L3ForwardingTable, 0x7f}, + }, + binding.PostRoutingStage: { + tableRequest{SNATTable, 0x8f}, + }, + }, + } + return template +} + +func (c *featureTraceflow) getTemplate(protocol ofProtocol) *pipelineTemplate { + var template *pipelineTemplate + if protocol != ofProtocolIP { + return template + } + template = &pipelineTemplate{} + return template +} + +func (c *featureVMConnectivity) getTemplate(protocol ofProtocol) *pipelineTemplate { + var template *pipelineTemplate + if protocol != ofProtocolIP { + return template + } + template = &pipelineTemplate{ + stageTables: map[binding.StageID][]tableRequest{ + binding.ClassifierStage: { + tableRequest{ClassifierTable, 0x7f}, + }, + binding.ConntrackStateStage: { + tableRequest{ConntrackStateTable, 0x7f}, + }, + binding.ConntrackStage: { + tableRequest{ConntrackTable, 0x7f}, + }, + binding.OutputStage: { + tableRequest{L2ForwardingOutTable, 0x7f}, + }, + }, + } + return template +} diff --git a/pkg/agent/openflow/fields.go b/pkg/agent/openflow/fields.go index a34264efd71..623f4439495 100644 --- a/pkg/agent/openflow/fields.go +++ b/pkg/agent/openflow/fields.go @@ -34,32 +34,49 @@ var ( FromLocalRegMark = binding.NewRegMark(PktSourceField, 2) FromUplinkRegMark = binding.NewRegMark(PktSourceField, 4) FromBridgeRegMark = binding.NewRegMark(PktSourceField, 5) - // reg0[16]: Mark to indicate the ofPort number of an interface is found. - OFPortFoundRegMark = binding.NewOneBitRegMark(0, 16, "OFPortFound") - // reg0[18]: Mark to indicate the packet needs DNAT to virtual IP. + // reg0[4..7]: Field to mark the packet destination. Marks in this field include, + // - 0: to local Pod + // - 1: to remote Node + // - 2: to external + PktDestinationField = binding.NewRegField(0, 4, 7, "PacketDestination") + ToTunnelRegMark = binding.NewRegMark(PktDestinationField, 0) + ToGatewayRegMark = binding.NewRegMark(PktDestinationField, 1) + ToLocalRegMark = binding.NewRegMark(PktDestinationField, 2) + ToUplinkRegMark = binding.NewRegMark(PktDestinationField, 4) + PacketUnionField = binding.NewRegField(0, 0, 7, "PacketUnion") + GatewayHairpinRegMark = binding.NewRegMark(PacketUnionField, (1<<4)|1) + // reg0[8]: Mark to indicate the ofPort number of an interface is found. + OFPortFoundRegMark = binding.NewOneBitRegMark(0, 8, "OFPortFound") + // reg0[9]: Mark to indicate the packet needs DNAT to virtual IP. // If a packet uses HairpinRegMark, it will be output to the port where it enters OVS pipeline in L2ForwardingOutTable. - HairpinRegMark = binding.NewOneBitRegMark(0, 18, "Hairpin") - // reg0[19]: Mark to indicate the packet's MAC address needs to be rewritten. - RewriteMACRegMark = binding.NewOneBitRegMark(0, 19, "RewriteMAC") - // reg0[20]: Mark to indicate the packet is denied(Drop/Reject). - CnpDenyRegMark = binding.NewOneBitRegMark(0, 20, "CNPDeny") - // reg0[21..22]: Field to indicate disposition of Antrea Policy. It could have more bits to support more disposition + HairpinRegMark = binding.NewOneBitRegMark(0, 9, "Hairpin") + // reg0[10]: Field to indicate that which IP should be used for hairpin connections. + SNATWithGatewayIP = binding.NewOneBitRegMark(0, 10, "SNATWithGatewayIP") + SNATWithVirtualIP = binding.NewOneBitZeroRegMark(0, 10, "SNATWithVirtualIP") + HairpinSNATUnionField = binding.NewRegField(0, 9, 10, "HairpinSNATUnion") + HairpinSNATWithVirtualIP = binding.NewRegMark(HairpinSNATUnionField, 1) + HairpinSNATWithGatewayIP = binding.NewRegMark(HairpinSNATUnionField, 3) + // reg0[11]: Mark to indicate the packet's MAC address needs to be rewritten. + RewriteMACRegMark = binding.NewOneBitRegMark(0, 11, "RewriteMAC") + // reg0[12]: Mark to indicate the packet is denied(Drop/Reject). + CnpDenyRegMark = binding.NewOneBitRegMark(0, 12, "CNPDeny") + // reg0[13..14]: Field to indicate disposition of Antrea Policy. It could have more bits to support more disposition // that Antrea policy support in the future. // Marks in this field include, // - 0b00: allow // - 0b01: drop // - 0b10: reject - APDispositionField = binding.NewRegField(0, 21, 22, "APDisposition") + APDispositionField = binding.NewRegField(0, 13, 14, "APDisposition") DispositionAllowRegMark = binding.NewRegMark(APDispositionField, DispositionAllow) DispositionDropRegMark = binding.NewRegMark(APDispositionField, DispositionDrop) DispositionRejRegMark = binding.NewRegMark(APDispositionField, DispositionRej) - // reg0[24..27]: Field to indicate the reasons of sending packet to the controller. + // reg0[15..18]: Field to indicate the reasons of sending packet to the controller. // Marks in this field include, // - 0b0001: logging // - 0b0010: reject // - 0b0100: deny (used by Flow Exporter) // - 0b1000: DNS packet (used by FQDN) - CustomReasonField = binding.NewRegField(0, 24, 27, "PacketInReason") + CustomReasonField = binding.NewRegField(0, 15, 18, "PacketInReason") CustomReasonLoggingRegMark = binding.NewRegMark(CustomReasonField, CustomReasonLogging) CustomReasonRejectRegMark = binding.NewRegMark(CustomReasonField, CustomReasonReject) CustomReasonDenyRegMark = binding.NewRegMark(CustomReasonField, CustomReasonDeny) @@ -68,8 +85,6 @@ var ( // reg1(NXM_NX_REG1) // Field to cache the ofPort of the OVS interface where to output packet. TargetOFPortField = binding.NewRegField(1, 0, 31, "TargetOFPort") - // ToGatewayRegMark marks that the output interface is Antrea gateway. - ToGatewayRegMark = binding.NewRegMark(TargetOFPortField, config.HostGatewayOFPort) // ToBridgeRegMark marks that the output interface is OVS bridge. ToBridgeRegMark = binding.NewRegMark(TargetOFPortField, config.BridgeOFPort) @@ -103,16 +118,20 @@ var ( NodePortAddressField = binding.NewRegField(4, 19, 19, "NodePortAddress") // ToNodePortAddressRegMark marks that the Service type as NodePort. ToNodePortAddressRegMark = binding.NewRegMark(NodePortAddressField, 0b1) - // reg4[20]: Field to mark that whether the packet of Service NodePort/LoadBalancer from gateway requires SNAT. - ServiceSNATField = binding.NewRegField(4, 20, 20, "ServiceSNAT") - // ServiceNeedSNATRegMark marks that the packet of Service NodePort/LoadBalancer requires SNAT. - ServiceNeedSNATRegMark = binding.NewRegMark(ServiceSNATField, 0b1) // reg4[16..19]: Field to store the union value of Endpoint state and the mark of whether Service type is NodePort. NodePortUnionField = binding.NewRegField(4, 16, 19, "NodePortUnion") // reg4[21]: Mark to indicate the packet is from local AntreaFlexibleIPAM Pod. // NotAntreaFlexibleIPAMRegMark will be used with RewriteMACRegMark, thus the reg id must not be same due to the limitation of ofnet library. AntreaFlexibleIPAMRegMark = binding.NewOneBitRegMark(4, 21, "AntreaFlexibleIPAM") NotAntreaFlexibleIPAMRegMark = binding.NewOneBitZeroRegMark(4, 21, "AntreaFlexibleIPAM") + // reg4[22..23]: Field to store the state of a connection of Service NodePort/LoadBalancer from gateway which + // requires SNAT or not. + // - 0b01: connection requires SNAT and is not marked with a ct mark. + // - 0b11: connection requires SNAT and is marked with a ct mark. + ServiceSNATStateField = binding.NewRegField(4, 22, 23, "ServiceSNAT") + NotRequireSNATRegMark = binding.NewRegMark(ServiceSNATStateField, 0b00) + RequireSNATRegMark = binding.NewRegMark(ServiceSNATStateField, 0b01) + CTMarkedSNATRegMark = binding.NewRegMark(ServiceSNATStateField, 0b11) // reg5(NXM_NX_REG5) // Field to cache the Egress conjunction ID hit by TraceFlow packet. @@ -141,12 +160,24 @@ var ( // Mark to indicate the connection is initiated through the host gateway interface // (i.e. for which the first packet of the connection was received through the gateway). + // This CT mark is only used in CtZone / CtZoneV6. FromGatewayCTMark = binding.NewCTMark(0b1, 1, 1) // Mark to indicate DNAT is performed on the connection for Service. + // This CT mark is both used in CtZone / CtZoneV6 and SNATCtZone / SNATCtZoneV6. ServiceCTMark = binding.NewCTMark(0b1, 2, 2) // Mark to indicate the connection is initiated through the host bridge interface // (i.e. for which the first packet of the connection was received through the bridge). - FromBridgeCTMark = binding.NewCTMark(0x1, 3, 3) + // This CT mark is only used in CtZone / CtZoneV6. + FromBridgeCTMark = binding.NewCTMark(0xb1, 3, 3) + // Mark to indicate SNAT should be performed on the connection for Service. + // This CT mark is only used in CtZone / CtZoneV6. + ServiceSNATCTMark = binding.NewCTMark(0b1, 4, 4) + // Mark to indicate the connection is hairpin. + // This CT mark is only used in SNATCtZone / SNATCtZoneV6. + HairpinCTMark = binding.NewCTMark(0b1, 5, 5) + // Mark to indicate the connection is hairpin and Service. + // This CT mark is only used in SNATCtZone / SNATCtZoneV6. + UnionHairpinServiceCTMark = binding.NewCTMark(0b11, 4, 5) ) // Fields using CT label. diff --git a/pkg/agent/openflow/interfaces.go b/pkg/agent/openflow/interfaces.go new file mode 100644 index 00000000000..65a56d500ae --- /dev/null +++ b/pkg/agent/openflow/interfaces.go @@ -0,0 +1,93 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "k8s.io/apimachinery/pkg/util/sets" + + binding "antrea.io/antrea/pkg/ovs/openflow" +) + +type featureID int + +const ( + Shared featureID = iota + PodConnectivity + VMConnectivity + NetworkPolicy + Service + Egress + Traceflow +) + +type ofProtocol int + +const ( + ofProtocolIP ofProtocol = iota + ofProtocolARP +) + +type FeatureTable struct { + name string + ofTable binding.Table + features sets.Int +} + +func newFeatureTable(tableName string) *FeatureTable { + return &FeatureTable{ + name: tableName, + } +} + +func (c *FeatureTable) GetID() uint8 { + return c.ofTable.GetID() +} + +func (c *FeatureTable) GetNext() uint8 { + return c.ofTable.GetNext() +} + +func (c *FeatureTable) GetName() string { + return c.name +} + +func (c *FeatureTable) GetOFTable() binding.Table { + return c.ofTable +} + +// SetOFTable is only used for test code. +func (c *FeatureTable) SetOFTable(id uint8) { + c.ofTable = binding.NewOFTable(id, c.name, 0, 0) +} + +// A table with a higher priority is assigned with a lower tableID, which means a packet should enter the table +// before others with lower priorities in the same stage. +type tableRequest struct { + table *FeatureTable + priority uint8 +} + +type pipelineTemplate struct { + // Declare the tables and the corresponding priorities in the expected stage. + // If it is expected to enforce a packet to enter other tables in the same stage after leaving the current table, + // use a higher priority in the tableRequest. + stageTables map[binding.StageID][]tableRequest + feature featureID +} + +type feature interface { + getFeatureID() featureID + getTemplate(protocol ofProtocol) *pipelineTemplate +} diff --git a/pkg/agent/openflow/network_policy.go b/pkg/agent/openflow/network_policy.go index 0de01e0bf21..b4761c68b89 100644 --- a/pkg/agent/openflow/network_policy.go +++ b/pkg/agent/openflow/network_policy.go @@ -15,10 +15,14 @@ package openflow import ( + "antrea.io/antrea/pkg/agent/config" + "antrea.io/antrea/pkg/agent/openflow/cookie" "fmt" + "k8s.io/client-go/tools/cache" "net" "strconv" "strings" + "sync" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/klog/v2" @@ -1698,3 +1702,67 @@ func (c *client) NetworkPolicyMetrics() map[uint32]*types.RuleMetric { collectMetricsFromFlows(ingressFlows) return result } + +type featureNetworkPolicy struct { + cookieAllocator cookie.Allocator + ipProtocols []binding.Protocol + bridge binding.Bridge + + // globalConjMatchFlowCache is a global map for conjMatchFlowContext. The key is a string generated from the + // conjMatchFlowContext. + globalConjMatchFlowCache map[string]*conjMatchFlowContext + conjMatchFlowLock sync.Mutex // Lock for access globalConjMatchFlowCache + // policyCache is a storage that supports listing policyRuleConjunction with different indexers. + // It's guaranteed that one policyRuleConjunction is processed by at most one goroutine at any given time. + policyCache cache.Indexer + flowCategoryCache *flowCategoryCache + packetInHandlers map[uint8]map[string]PacketInHandler + + gatewayIPs map[binding.Protocol]net.IP + + proxyAll bool + ovsMetersAreSupported bool + enableDenyTracking bool + enableAntreaPolicy bool + // deterministic represents whether to generate flows deterministically. + // For example, if a flow has multiple actions, setting it to true can get consistent flow. + // Enabling it may carry a performance impact. It's disabled by default and should only be used in testing. + deterministic bool +} + +func (c *featureNetworkPolicy) getFeatureID() featureID { + return NetworkPolicy +} + +func newFeatureNetworkPolicy( + cookieAllocator cookie.Allocator, + ipProtocols []binding.Protocol, + bridge binding.Bridge, + nodeConfig *config.NodeConfig, + proxyAll, + ovsMetersAreSupported, + enableDenyTracking, + enableAntreaPolicy bool) feature { + gatewayIPs := make(map[binding.Protocol]net.IP) + for _, ipProtocol := range ipProtocols { + if ipProtocol == binding.ProtocolIP { + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv4 + } else if ipProtocol == binding.ProtocolIPv6 { + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv6 + } + } + return &featureNetworkPolicy{ + cookieAllocator: cookieAllocator, + ipProtocols: ipProtocols, + bridge: bridge, + flowCategoryCache: newFlowCategoryCache(), + globalConjMatchFlowCache: make(map[string]*conjMatchFlowContext), + packetInHandlers: map[uint8]map[string]PacketInHandler{}, + policyCache: cache.NewIndexer(policyConjKeyFunc, cache.Indexers{priorityIndex: priorityIndexFunc}), + gatewayIPs: gatewayIPs, + proxyAll: proxyAll, + ovsMetersAreSupported: ovsMetersAreSupported, + enableDenyTracking: enableDenyTracking, + enableAntreaPolicy: enableAntreaPolicy, + } +} diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index d634d3464d6..1271421424c 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -15,7 +15,6 @@ package openflow import ( - "encoding/binary" "fmt" "math" "net" @@ -23,9 +22,8 @@ import ( "sync" "time" - "antrea.io/libOpenflow/protocol" "antrea.io/ofnet/ofctrl" - v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" @@ -36,43 +34,40 @@ import ( binding "antrea.io/antrea/pkg/ovs/openflow" "antrea.io/antrea/pkg/ovs/ovsconfig" "antrea.io/antrea/pkg/ovs/ovsctl" - "antrea.io/antrea/pkg/util/runtime" - "antrea.io/antrea/third_party/proxy" ) var ( - ClassifierTable = binding.NewOFTable(0, "Classification") - UplinkTable = binding.NewOFTable(5, "Uplink") - SpoofGuardTable = binding.NewOFTable(10, "SpoofGuard") - arpResponderTable = binding.NewOFTable(20, "ARPResponder") - IPv6Table = binding.NewOFTable(21, "IPv6") - ServiceHairpinTable = binding.NewOFTable(23, "ServiceHairpin") - ServiceConntrackTable = binding.NewOFTable(24, "ServiceConntrack") // serviceConntrackTable use a new ct_zone to transform SNAT'd connections. - ConntrackTable = binding.NewOFTable(30, "ConntrackZone") - ConntrackStateTable = binding.NewOFTable(31, "ConntrackState") - ServiceClassifierTable = binding.NewOFTable(35, "ServiceClassifier") - SessionAffinityTable = binding.NewOFTable(40, "SessionAffinity") - DNATTable = binding.NewOFTable(40, "DNAT(SessionAffinity)") - ServiceLBTable = binding.NewOFTable(41, "ServiceLB") - EndpointDNATTable = binding.NewOFTable(42, "EndpointDNAT") - AntreaPolicyEgressRuleTable = binding.NewOFTable(45, "AntreaPolicyEgressRule") - DefaultTierEgressRuleTable = binding.NewOFTable(49, "DefaultTierEgressRule") - EgressRuleTable = binding.NewOFTable(50, "EgressRule") - EgressDefaultTable = binding.NewOFTable(60, "EgressDefaultRule") - EgressMetricTable = binding.NewOFTable(61, "EgressMetric") - L3ForwardingTable = binding.NewOFTable(70, "L3Forwarding") - SNATTable = binding.NewOFTable(71, "SNAT") - L3DecTTLTable = binding.NewOFTable(72, "IPTTLDec") - L2ForwardingCalcTable = binding.NewOFTable(80, "L2Forwarding") - AntreaPolicyIngressRuleTable = binding.NewOFTable(85, "AntreaPolicyIngressRule") - DefaultTierIngressRuleTable = binding.NewOFTable(89, "DefaultTierIngressRule") - IngressRuleTable = binding.NewOFTable(90, "IngressRule") - IngressDefaultTable = binding.NewOFTable(100, "IngressDefaultRule") - IngressMetricTable = binding.NewOFTable(101, "IngressMetric") - ConntrackCommitTable = binding.NewOFTable(105, "ConntrackCommit") - ServiceConntrackCommitTable = binding.NewOFTable(106, "ServiceConntrackCommit") - HairpinSNATTable = binding.NewOFTable(108, "HairpinSNAT") - L2ForwardingOutTable = binding.NewOFTable(110, "Output") + PipelineClassifierTable = newFeatureTable("PipelineClassification") + ARPSpoofGuardTable = newFeatureTable("ARPSpoofGuard") + ARPResponderTable = newFeatureTable("ARPResponder") + ClassifierTable = newFeatureTable("Classification") + UplinkTable = newFeatureTable("Uplink") + SpoofGuardTable = newFeatureTable("SpoofGuard") + IPv6Table = newFeatureTable("IPv6") + NodePortProbeTable = newFeatureTable("NodePortProbe") + SNATConntrackTable = newFeatureTable("SNATConntrackZone") + ConntrackTable = newFeatureTable("ConntrackZone") + ConntrackStateTable = newFeatureTable("ConntrackState") + SessionAffinityTable = newFeatureTable("SessionAffinity") + DNATTable = newFeatureTable("DNAT") + ServiceLBTable = newFeatureTable("ServiceLB") + EndpointDNATTable = newFeatureTable("EndpointDNAT") + AntreaPolicyEgressRuleTable = newFeatureTable("AntreaPolicyEgressRule") + EgressRuleTable = newFeatureTable("EgressRule") + EgressDefaultTable = newFeatureTable("EgressDefaultRule") + EgressMetricTable = newFeatureTable("EgressMetric") + L3ForwardingTable = newFeatureTable("L3Forwarding") + ServiceHairpinMarkTable = newFeatureTable("ServiceHairpinMark") + L3DecTTLTable = newFeatureTable("IPTTLDec") + SNATTable = newFeatureTable("SNATTable") + SNATConntrackCommitTable = newFeatureTable("SNATConntrackCommit") + L2ForwardingCalcTable = newFeatureTable("L2Forwarding") + AntreaPolicyIngressRuleTable = newFeatureTable("AntreaPolicyIngressRule") + IngressRuleTable = newFeatureTable("IngressRule") + IngressDefaultTable = newFeatureTable("IngressDefaultRule") + IngressMetricTable = newFeatureTable("IngressMetric") + ConntrackCommitTable = newFeatureTable("ConntrackCommit") + L2ForwardingOutTable = newFeatureTable("L2ForwardingOut") // Flow priority level priorityHigh = uint16(210) @@ -122,32 +117,28 @@ func (a ofAction) String() string { var ( // egressTables map records all IDs of tables related to // egress rules. - egressTables = map[uint8]struct{}{ - AntreaPolicyEgressRuleTable.GetID(): {}, - EgressRuleTable.GetID(): {}, - EgressDefaultTable.GetID(): {}, - } + egressTables = map[uint8]struct{}{} - // ofTableCache caches the OpenFlow tables used in the pipeline, and it supports using the table ID and name as the index to query the OpenFlow table. - ofTableCache = cache.NewIndexer(tableIDKeyFunc, cache.Indexers{tableNameIndex: tableNameIndexFunc}) + // tableCache caches the OpenFlow tables used in the pipeline, and it supports using the table ID and name as the index to query the OpenFlow table. + tableCache = cache.NewIndexer(tableIDKeyFunc, cache.Indexers{tableNameIndex: tableNameIndexFunc}) ) func tableNameIndexFunc(obj interface{}) ([]string, error) { - ofTable := obj.(binding.Table) - return []string{ofTable.GetName()}, nil + ft := obj.(*FeatureTable) + return []string{ft.GetName()}, nil } func tableIDKeyFunc(obj interface{}) (string, error) { - ofTable := obj.(binding.Table) - return fmt.Sprintf("%d", ofTable.GetID()), nil + ft := obj.(*FeatureTable) + return fmt.Sprintf("%d", ft.GetID()), nil } func getTableByID(id uint8) binding.Table { - obj, exists, _ := ofTableCache.GetByKey(fmt.Sprintf("%d", id)) + obj, exists, _ := tableCache.GetByKey(fmt.Sprintf("%d", id)) if !exists { return nil } - return obj.(binding.Table) + return obj.(*FeatureTable).ofTable } // GetFlowTableName returns the flow table name given the table ID. An empty @@ -164,7 +155,7 @@ func GetFlowTableName(tableID uint8) string { // returns the flow table number if the table is found. Otherwise TableIDAll is // returned if the table cannot be found. func GetFlowTableID(tableName string) uint8 { - objs, _ := ofTableCache.ByIndex(tableNameIndex, tableName) + objs, _ := tableCache.ByIndex(tableNameIndex, tableName) if len(objs) == 0 { return binding.TableIDAll } @@ -173,36 +164,36 @@ func GetFlowTableID(tableName string) uint8 { func GetTableList() []binding.Table { tables := make([]binding.Table, 0) - for _, obj := range ofTableCache.List() { + for _, obj := range tableCache.List() { t := obj.(binding.Table) tables = append(tables, t) } return tables } -func GetAntreaPolicyEgressTables() []binding.Table { - return []binding.Table{ +func GetAntreaPolicyEgressTables() []*FeatureTable { + return []*FeatureTable{ AntreaPolicyEgressRuleTable, EgressDefaultTable, } } -func GetAntreaPolicyIngressTables() []binding.Table { - return []binding.Table{ +func GetAntreaPolicyIngressTables() []*FeatureTable { + return []*FeatureTable{ AntreaPolicyIngressRuleTable, IngressDefaultTable, } } -func GetAntreaPolicyBaselineTierTables() []binding.Table { - return []binding.Table{ +func GetAntreaPolicyBaselineTierTables() []*FeatureTable { + return []*FeatureTable{ EgressDefaultTable, IngressDefaultTable, } } -func GetAntreaPolicyMultiTierTables() []binding.Table { - return []binding.Table{ +func GetAntreaPolicyMultiTierTables() []*FeatureTable { + return []*FeatureTable{ AntreaPolicyEgressRuleTable, AntreaPolicyIngressRuleTable, } @@ -251,8 +242,6 @@ var ( snatPktMarkRange = &binding.Range{0, 7} GlobalVirtualMAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:ff") - hairpinIP = net.ParseIP("169.254.169.252").To4() - hairpinIPv6 = net.ParseIP("fc00::aabb:ccdd:eeff").To16() ) type OFEntryOperations interface { @@ -292,24 +281,19 @@ type client struct { roundInfo types.RoundInfo cookieAllocator cookie.Allocator bridge binding.Bridge - egressEntryTable uint8 - ingressEntryTable uint8 - // Flow caches for corresponding deletions. - nodeFlowCache, podFlowCache, serviceFlowCache, snatFlowCache, tfFlowCache *flowCategoryCache - // "fixed" flows installed by the agent after initialization and which do not change during - // the lifetime of the client. - gatewayFlows, defaultServiceFlows, defaultTunnelFlows, hostNetworkingFlows []binding.Flow + + featurePodConnectivity *featurePodConnectivity + featureService *featureService + featureEgress *featureEgress + featureNetworkPolicy *featureNetworkPolicy + featureTraceflow *featureTraceflow + + pipelines map[ofProtocol]binding.Pipeline + ipProtocols []binding.Protocol + // ofEntryOperations is a wrapper interface for OpenFlow entry Add / Modify / Delete operations. It // enables convenient mocking in unit tests. ofEntryOperations OFEntryOperations - // policyCache is a storage that supports listing policyRuleConjunction with different indexers. - // It's guaranteed that one policyRuleConjunction is processed by at most one goroutine at any given time. - policyCache cache.Indexer - conjMatchFlowLock sync.Mutex // Lock for access globalConjMatchFlowCache - groupCache sync.Map - // globalConjMatchFlowCache is a global map for conjMatchFlowContext. The key is a string generated from the - // conjMatchFlowContext. - globalConjMatchFlowCache map[string]*conjMatchFlowContext // replayMutex provides exclusive access to the OFSwitch to the ReplayFlows method. replayMutex sync.RWMutex nodeConfig *config.NodeConfig @@ -320,11 +304,6 @@ type client struct { ovsDatapathType ovsconfig.OVSDatapathType // ovsMetersAreSupported indicates whether the OVS datapath supports OpenFlow meters. ovsMetersAreSupported bool - // packetInHandlers stores handler to process PacketIn event. Each packetin reason can have multiple handlers registered. - // When a packetin arrives, openflow send packet to registered handlers in this map. - packetInHandlers map[uint8]map[string]PacketInHandler - // Supported IP Protocols (IP or IPv6) on the current Node. - ipProtocols []binding.Protocol // ovsctlClient is the interface for executing OVS "ovs-ofctl" and "ovs-appctl" commands. ovsctlClient ovsctl.OVSCtlClient // deterministic represents whether to generate flows deterministically. @@ -432,13 +411,13 @@ func (c *client) DeleteOFEntries(ofEntries []binding.OFEntry) error { } // defaultFlows generates the default flows of all tables. -func (c *client) defaultFlows() (flows []binding.Flow) { - for _, obj := range ofTableCache.List() { - table := obj.(binding.Table) +func defaultFlows(pipeline binding.Pipeline, category uint64) []binding.Flow { + var flows []binding.Flow + for _, table := range pipeline.ListAllTables() { flowBuilder := table.BuildFlow(priorityMiss) switch table.GetMissAction() { case binding.TableMissActionNext: - flowBuilder = flowBuilder.Action().GotoTable(table.GetNext()) + flowBuilder = flowBuilder.Action().NextTable() case binding.TableMissActionNormal: flowBuilder = flowBuilder.Action().Normal() case binding.TableMissActionDrop: @@ -448,954 +427,7 @@ func (c *client) defaultFlows() (flows []binding.Flow) { default: continue } - flows = append(flows, flowBuilder.Cookie(c.cookieAllocator.Request(cookie.Default).Raw()).Done()) - } - return flows -} - -// tunnelClassifierFlow generates the flow to mark traffic comes from the tunnelOFPort. -func (c *client) tunnelClassifierFlow(tunnelOFPort uint32, category cookie.Category) binding.Flow { - nextTable := ConntrackTable - if c.proxyAll { - nextTable = ServiceConntrackTable - } - return ClassifierTable.BuildFlow(priorityNormal). - MatchInPort(tunnelOFPort). - Action().LoadRegMark(FromTunnelRegMark). - Action().LoadRegMark(RewriteMACRegMark). - Action().GotoTable(nextTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() -} - -// gatewayClassifierFlow generates the flow to mark traffic comes from the gatewayOFPort. -func (c *client) gatewayClassifierFlow(category cookie.Category) binding.Flow { - return ClassifierTable.BuildFlow(priorityNormal). - MatchInPort(config.HostGatewayOFPort). - Action().LoadRegMark(FromGatewayRegMark). - Action().GotoTable(ClassifierTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() -} - -// podClassifierFlow generates the flow to mark traffic comes from the podOFPort. -func (c *client) podClassifierFlow(podOFPort uint32, category cookie.Category, isAntreaFlexibleIPAM bool) binding.Flow { - flowBuilder := ClassifierTable.BuildFlow(priorityLow). - MatchInPort(podOFPort). - Action().LoadRegMark(FromLocalRegMark). - Action().GotoTable(ClassifierTable.GetNext()) - if isAntreaFlexibleIPAM { - // mark traffic from local AntreaFlexibleIPAM Pod - flowBuilder = flowBuilder.Action().LoadRegMark(AntreaFlexibleIPAMRegMark) - } - return flowBuilder.Cookie(c.cookieAllocator.Request(category).Raw()).Done() -} - -// podUplinkClassifierFlow generates the flows to mark traffic from uplink and bridge ports, which are needed when -// uplink is connected to OVS bridge when AntreaFlexibleIPAM is configured. -func (c *client) podUplinkClassifierFlows(dstMAC net.HardwareAddr, category cookie.Category) (flows []binding.Flow) { - flows = append(flows, ClassifierTable.BuildFlow(priorityHigh). - MatchInPort(config.UplinkOFPort). - MatchDstMAC(dstMAC). - Action().LoadRegMark(FromUplinkRegMark). - Action().GotoTable(ServiceHairpinTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - flows = append(flows, ClassifierTable.BuildFlow(priorityHigh). - MatchInPort(config.BridgeOFPort). - MatchDstMAC(dstMAC). - Action().LoadRegMark(FromBridgeRegMark). - Action().GotoTable(ServiceHairpinTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - return -} - -// connectionTrackFlows generates flows that redirect traffic to ct_zone and handle traffic according to ct_state: -// 1) commit new connections to ct_zone(0xfff0) in the ConntrackCommitTable. -// 2) Add ct_mark on the packet if it is sent to the switch from the host gateway. -// 3) Allow traffic if it hits ct_mark and is sent from the host gateway. -// 4) Drop all invalid traffic. -// 5) Let other traffic go to the SessionAffinityTable first and then the ServiceLBTable. -// The SessionAffinityTable is a side-effect table which means traffic will not -// be resubmitted to any table. serviceLB does Endpoint selection for traffic -// to a Service. -// 6) Add a flow to bypass reject response packet sent by the controller. -func (c *client) connectionTrackFlows(category cookie.Category) []binding.Flow { - flows := c.conntrackBasicFlows(category) - if c.enableProxy { - // Replace the default flow with multiple resubmits actions. - if c.proxyAll { - flows = append(flows, ConntrackStateTable.BuildFlow(priorityMiss). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().ResubmitToTable(ServiceClassifierTable.GetID()). - Action().ResubmitToTable(SessionAffinityTable.GetID()). - Action().ResubmitToTable(ServiceLBTable.GetID()). - Done()) - } else { - flows = append(flows, ConntrackStateTable.BuildFlow(priorityMiss). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().ResubmitToTable(SessionAffinityTable.GetID()). - Action().ResubmitToTable(ServiceLBTable.GetID()). - Done()) - } - - for _, proto := range c.ipProtocols { - gatewayIP := c.nodeConfig.GatewayConfig.IPv4 - serviceVirtualIP := config.VirtualServiceIPv4 - snatZone := SNATCtZone - ctZone := CtZone - if proto == binding.ProtocolIPv6 { - gatewayIP = c.nodeConfig.GatewayConfig.IPv6 - serviceVirtualIP = config.VirtualServiceIPv6 - snatZone = SNATCtZoneV6 - ctZone = CtZoneV6 - } - flows = append(flows, - // This flow is used to maintain DNAT conntrack for Service traffic. - ConntrackTable.BuildFlow(priorityNormal).MatchProtocol(proto). - Action().CT(false, ConntrackTable.GetNext(), ctZone).NAT().CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ConntrackCommitTable.BuildFlow(priorityLow).MatchProtocol(proto). - MatchCTStateTrk(true). - MatchCTMark(ServiceCTMark). - MatchRegMark(EpSelectedRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().GotoTable(ConntrackCommitTable.GetNext()). - Done(), - ) - - if c.proxyAll { - flows = append(flows, - // This flow is used to match the Service traffic from Antrea gateway. The Service traffic from gateway - // should enter table serviceConntrackCommitTable, otherwise it will be matched by other flows in - // table connectionTrackCommit. - ConntrackCommitTable.BuildFlow(priorityHigh).MatchProtocol(proto). - MatchCTMark(ServiceCTMark). - MatchRegMark(FromGatewayRegMark). - Action().GotoTable(ServiceConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // This flow is used to maintain SNAT conntrack for Service traffic. - ServiceConntrackTable.BuildFlow(priorityNormal).MatchProtocol(proto). - Action().CT(false, ServiceConntrackTable.GetNext(), snatZone).NAT().CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // This flow is used to match the following cases: - // - The first packet of NodePort/LoadBalancer whose Endpoint is not on local Pod CIDR or any remote - // Pod CIDRs. Note that, this flow will change the behavior of the packet that NodePort/LoadBalancer - // whose externalTrafficPolicy is Local and the Endpoint is on host network. According to the definition - // of externalTrafficPolicy Local, the source IP should be retained. If the Endpoint is on host network, - // there should be only one backend Pod of the Service on a Node (It is impossible to have more than - // one Pods which listen on the same port on host network), so it is not useful to expose the Pod as - // NodePort Service, as it makes no difference to access it directly. - // - The first packet of ClusterIP and the Endpoint is not on local Pod CIDR or any remote Pod CIDRs. - // As the packet is from Antrea gateway, and it will pass through Antrea gateway, a virtual IP is used - // to perform SNAT for the packet, rather than Antrea gateway's IP. - ServiceConntrackCommitTable.BuildFlow(priorityHigh).MatchProtocol(proto). - MatchRegMark(ToGatewayRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()). - MatchCTStateNew(true). - MatchCTStateTrk(true). - Action().CT(true, ServiceConntrackCommitTable.GetNext(), snatZone). - SNAT(&binding.IPRange{StartIP: serviceVirtualIP, EndIP: serviceVirtualIP}, nil). - CTDone(). - Done(), - // This flow is used to match the first packet of NodePort/LoadBalancer whose output port is not - // Antrea gateway, and externalTrafficPolicy is Cluster. This packet requires SNAT. Antrea gateway - // IP is used to perform SNAT for the packet. - ServiceConntrackCommitTable.BuildFlow(priorityNormal).MatchProtocol(proto). - MatchRegMark(ServiceNeedSNATRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()). - MatchCTStateNew(true). - MatchCTStateTrk(true). - Action().CT(true, ServiceConntrackCommitTable.GetNext(), snatZone). - SNAT(&binding.IPRange{StartIP: gatewayIP, EndIP: gatewayIP}, nil). - CTDone(). - Done(), - // This flow is used to match the consequent request packets of Service traffic whose first request packet has been committed - // and performed SNAT. For example: - /* - * 192.168.77.1 is the IP address of client. - * 192.168.77.100 is the IP address of k8s node. - * 30001 is a NodePort port. - * 10.10.0.1 is the IP address of Antrea gateway. - * 10.10.0.3 is the Endpoint of NodePort Service. - - * pkt 1 (request) - * client 192.168.77.1:12345->192.168.77.100:30001 - * ct zone SNAT 65521 192.168.77.1:12345->192.168.77.100:30001 - * ct zone DNAT 65520 192.168.77.1:12345->192.168.77.100:30001 - * ct commit DNAT zone 65520 192.168.77.1:12345->192.168.77.100:30001 => 192.168.77.1:12345->10.10.0.3:80 - * ct commit SNAT zone 65521 192.168.77.1:12345->10.10.0.3:80 => 10.10.0.1:12345->10.10.0.3:80 - * output - * pkt 2 (response) - * Pod 10.10.0.3:80->10.10.0.1:12345 - * ct zone SNAT 65521 10.10.0.3:80->10.10.0.1:12345 => 10.10.0.3:80->192.168.77.1:12345 - * ct zone DNAT 65520 10.10.0.3:80->192.168.77.1:12345 => 192.168.77.1:30001->192.168.77.1:12345 - * output - * pkt 3 (request) - * client 192.168.77.1:12345->192.168.77.100:30001 - * ct zone SNAT 65521 192.168.77.1:12345->192.168.77.100:30001 - * ct zone DNAT 65520 192.168.77.1:12345->10.10.0.3:80 - * ct zone SNAT 65521 192.168.77.1:12345->10.10.0.3:80 => 10.10.0.1:12345->10.10.0.3:80 - * output - * pkt ... - - The source IP address of pkt 3 cannot be transformed through zone 65521 as there is no connection track about - 192.168.77.1:12345<->192.168.77.100:30001, and the source IP is still 192.168.77.100. - Before output, pkt 3 needs SNAT, but the connection has been committed. The flow is for pkt 3 to perform SNAT. - */ - ServiceConntrackCommitTable.BuildFlow(priorityNormal).MatchProtocol(proto). - Cookie(c.cookieAllocator.Request(category).Raw()). - MatchCTStateNew(false). - MatchCTStateTrk(true). - Action().CT(false, ServiceConntrackCommitTable.GetNext(), snatZone). - NAT(). - CTDone(). - Done(), - ) - } - } - } else { - flows = append(flows, c.kubeProxyFlows(category)...) - } - - // TODO: following flows should move to function "kubeProxyFlows". Since another PR(#1198) is trying - // to polish the relevant logic, code refactoring is needed after that PR is merged. - for _, proto := range c.ipProtocols { - ctZone := CtZone - if proto == binding.ProtocolIPv6 { - ctZone = CtZoneV6 - } - flows = append(flows, - // Connections initiated through the gateway are marked with FromGatewayCTMark. - ConntrackCommitTable.BuildFlow(priorityNormal).MatchProtocol(proto). - MatchRegMark(FromGatewayRegMark). - MatchCTStateNew(true).MatchCTStateTrk(true). - Action().CT(true, ConntrackCommitTable.GetNext(), ctZone).LoadToCtMark(FromGatewayCTMark).CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // Connections initiated through the bridge port are marked with FromBridgeCTMark. - ConntrackCommitTable.BuildFlow(priorityNormal).MatchProtocol(proto). - MatchRegMark(FromBridgeRegMark). - MatchCTStateNew(true).MatchCTStateTrk(true). - Action().CT(true, ConntrackCommitTable.GetNext(), ctZone).LoadToCtMark(FromBridgeCTMark).CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // Add reject response packet bypass flow. - ) - } - return flows -} - -// dnsResponseBypassConntrackFlow generates a flow which is used to bypass the -// dns response packetout from conntrack, to avoid unexpected packet drop. -func (c *client) dnsResponseBypassConntrackFlow() binding.Flow { - table := ConntrackTable - if c.proxyAll { - table = ServiceConntrackTable - } - return table.BuildFlow(priorityHigh). - MatchRegFieldWithValue(CustomReasonField, CustomReasonDNS). - Cookie(c.cookieAllocator.Request(cookie.Default).Raw()). - Action().ResubmitToTable(L2ForwardingCalcTable.GetID()). - Done() -} - -// dnsResponseBypassPacketInFlow generates a flow which is used to bypass the -// dns packetIn conjunction flow for dns response packetOut. This packetOut -// should be sent directly to the requesting client without being intercepted -// again. -func (c *client) dnsResponseBypassPacketInFlow() binding.Flow { - // TODO: use a unified register bit to mark packetOuts. The pipeline does not need to be - // aware of why the packetOut is being set by the controller, it just needs to be aware that - // this is a packetOut message and that some pipeline stages (conntrack, policy enforcement) - // should therefore be skipped. - return AntreaPolicyIngressRuleTable.BuildFlow(priorityDNSBypass). - MatchRegFieldWithValue(CustomReasonField, CustomReasonDNS). - Cookie(c.cookieAllocator.Request(cookie.Default).Raw()). - Action().ResubmitToTable(L2ForwardingOutTable.GetID()). - Done() -} - -func (c *client) conntrackBasicFlows(category cookie.Category) []binding.Flow { - var flows []binding.Flow - for _, proto := range c.ipProtocols { - ctZone := CtZone - if proto == binding.ProtocolIPv6 { - ctZone = CtZoneV6 - } - flows = append(flows, - ConntrackStateTable.BuildFlow(priorityLow).MatchProtocol(proto). - MatchCTStateInv(true).MatchCTStateTrk(true). - Action().Drop(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ConntrackCommitTable.BuildFlow(priorityLow).MatchProtocol(proto). - MatchCTStateNew(true).MatchCTStateTrk(true). - Action().CT(true, ConntrackCommitTable.GetNext(), ctZone).CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ) - } - return flows -} - -func (c *client) kubeProxyFlows(category cookie.Category) []binding.Flow { - var flows []binding.Flow - for _, proto := range c.ipProtocols { - ctZone := CtZone - if proto == binding.ProtocolIPv6 { - ctZone = CtZoneV6 - } - flows = append(flows, - ConntrackTable.BuildFlow(priorityNormal).MatchProtocol(proto). - Action().CT(false, ConntrackTable.GetNext(), ctZone).CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ) - } - return flows -} - -// TODO: Use DuplicateToBuilder or integrate this function into original one to avoid unexpected -// difference. -// traceflowConnectionTrackFlows generates Traceflow specific flows in the -// connectionTrackStateTable or L2ForwardingCalcTable. When packet is not -// provided, the flows bypass the drop flow in connectionTrackFlows to avoid -// unexpected drop of the injected Traceflow packet, and to drop any Traceflow -// packet that has ct_state +rpl, which may happen when the Traceflow request -// destination is the Node's IP. -// When packet is provided, a flow is added to mark - the first packet of the -// first connection that matches the provided packet - as the Traceflow packet. -// The flow is added in connectionTrackStateTable when receiverOnly is false and -// it also matches in_port to be the provided ofPort (the sender Pod); otherwise -// when receiverOnly is true, the flow is added into L2ForwardingCalcTable and -// matches the destination MAC (the receiver Pod MAC). -func (c *client) traceflowConnectionTrackFlows(dataplaneTag uint8, receiverOnly bool, packet *binding.Packet, ofPort uint32, timeout uint16, category cookie.Category) []binding.Flow { - var flows []binding.Flow - if packet == nil { - for _, ipProtocol := range c.ipProtocols { - flowBuilder := ConntrackStateTable.BuildFlow(priorityLow + 1). - MatchProtocol(ipProtocol). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()) - if c.enableProxy { - flowBuilder = flowBuilder. - Action().ResubmitToTable(SessionAffinityTable.GetID()). - Action().ResubmitToTable(ServiceLBTable.GetID()) - } else { - flowBuilder = flowBuilder. - Action().ResubmitToTable(ConntrackStateTable.GetNext()) - } - flows = append(flows, flowBuilder.Done()) - - flows = append(flows, ConntrackStateTable.BuildFlow(priorityLow+2). - MatchProtocol(ipProtocol). - MatchIPDSCP(dataplaneTag). - MatchCTStateTrk(true).MatchCTStateRpl(true). - SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().Drop(). - Done()) - } - } else { - var flowBuilder binding.FlowBuilder - if !receiverOnly { - flowBuilder = ConntrackStateTable.BuildFlow(priorityLow). - MatchInPort(ofPort). - Action().LoadIPDSCP(dataplaneTag) - if packet.DestinationIP != nil { - flowBuilder = flowBuilder.MatchDstIP(packet.DestinationIP) - } - if c.enableProxy { - flowBuilder = flowBuilder. - Action().ResubmitToTable(SessionAffinityTable.GetID()). - Action().ResubmitToTable(ServiceLBTable.GetID()) - } else { - flowBuilder = flowBuilder. - Action().ResubmitToTable(ConntrackStateTable.GetNext()) - } - } else { - nextTable := c.ingressEntryTable - flowBuilder = L2ForwardingCalcTable.BuildFlow(priorityHigh). - MatchDstMAC(packet.DestinationMAC). - Action().LoadToRegField(TargetOFPortField, ofPort). - Action().LoadRegMark(OFPortFoundRegMark). - Action().LoadIPDSCP(dataplaneTag). - Action().GotoTable(nextTable) - if packet.SourceIP != nil { - flowBuilder = flowBuilder.MatchSrcIP(packet.SourceIP) - } - } - - flowBuilder = flowBuilder.MatchCTStateNew(true).MatchCTStateTrk(true). - SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()) - - // Match transport header - switch packet.IPProto { - case protocol.Type_ICMP: - flowBuilder = flowBuilder.MatchProtocol(binding.ProtocolICMP) - case protocol.Type_IPv6ICMP: - flowBuilder = flowBuilder.MatchProtocol(binding.ProtocolICMPv6) - case protocol.Type_TCP: - if packet.IsIPv6 { - flowBuilder = flowBuilder.MatchProtocol(binding.ProtocolTCPv6) - } else { - flowBuilder = flowBuilder.MatchProtocol(binding.ProtocolTCP) - } - case protocol.Type_UDP: - if packet.IsIPv6 { - flowBuilder = flowBuilder.MatchProtocol(binding.ProtocolUDPv6) - } else { - flowBuilder = flowBuilder.MatchProtocol(binding.ProtocolUDP) - } - default: - flowBuilder = flowBuilder.MatchIPProtocolValue(packet.IsIPv6, packet.IPProto) - - } - if packet.IPProto == protocol.Type_TCP || packet.IPProto == protocol.Type_UDP { - if packet.DestinationPort != 0 { - flowBuilder = flowBuilder.MatchDstPort(packet.DestinationPort, nil) - } - if packet.SourcePort != 0 { - flowBuilder = flowBuilder.MatchSrcPort(packet.SourcePort, nil) - } - } - flows = []binding.Flow{flowBuilder.Done()} - } - return flows -} - -func (c *client) traceflowNetworkPolicyFlows(dataplaneTag uint8, timeout uint16, category cookie.Category) []binding.Flow { - flows := []binding.Flow{} - c.conjMatchFlowLock.Lock() - defer c.conjMatchFlowLock.Unlock() - // Copy default drop rules. - for _, ctx := range c.globalConjMatchFlowCache { - if ctx.dropFlow != nil { - copyFlowBuilder := ctx.dropFlow.CopyToBuilder(priorityNormal+2, false) - if ctx.dropFlow.FlowProtocol() == "" { - copyFlowBuilderIPv6 := ctx.dropFlow.CopyToBuilder(priorityNormal+2, false) - copyFlowBuilderIPv6 = copyFlowBuilderIPv6.MatchProtocol(binding.ProtocolIPv6) - if c.ovsMetersAreSupported { - copyFlowBuilderIPv6 = copyFlowBuilderIPv6.Action().Meter(PacketInMeterIDTF) - } - flows = append(flows, copyFlowBuilderIPv6.MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().SendToController(uint8(PacketInReasonTF)). - Done()) - copyFlowBuilder = copyFlowBuilder.MatchProtocol(binding.ProtocolIP) - } - if c.ovsMetersAreSupported { - copyFlowBuilder = copyFlowBuilder.Action().Meter(PacketInMeterIDTF) - } - flows = append(flows, copyFlowBuilder.MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().SendToController(uint8(PacketInReasonTF)). - Done()) - } - } - // Copy Antrea NetworkPolicy drop rules. - for _, conj := range c.policyCache.List() { - for _, flow := range conj.(*policyRuleConjunction).metricFlows { - if flow.IsDropFlow() { - copyFlowBuilder := flow.CopyToBuilder(priorityNormal+2, false) - // Generate both IPv4 and IPv6 flows if the original drop flow doesn't match IP/IPv6. - // DSCP field is in IP/IPv6 headers so IP/IPv6 match is required in a flow. - if flow.FlowProtocol() == "" { - copyFlowBuilderIPv6 := flow.CopyToBuilder(priorityNormal+2, false) - copyFlowBuilderIPv6 = copyFlowBuilderIPv6.MatchProtocol(binding.ProtocolIPv6) - if c.ovsMetersAreSupported { - copyFlowBuilderIPv6 = copyFlowBuilderIPv6.Action().Meter(PacketInMeterIDTF) - } - flows = append(flows, copyFlowBuilderIPv6.MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().SendToController(uint8(PacketInReasonTF)). - Done()) - copyFlowBuilder = copyFlowBuilder.MatchProtocol(binding.ProtocolIP) - } - if c.ovsMetersAreSupported { - copyFlowBuilder = copyFlowBuilder.Action().Meter(PacketInMeterIDTF) - } - flows = append(flows, copyFlowBuilder.MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().SendToController(uint8(PacketInReasonTF)). - Done()) - } - } - } - return flows -} - -// serviceLBBypassFlows makes packets that belong to a tracked connection bypass -// service LB tables and enter egressRuleTable directly. -func (c *client) serviceLBBypassFlows(ipProtocol binding.Protocol) []binding.Flow { - flows := []binding.Flow{ - // Tracked connections with the ServiceCTMark (load-balanced by AntreaProxy) receive - // the macRewriteMark and are sent to egressRuleTable. - ConntrackStateTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchCTMark(ServiceCTMark). - MatchCTStateNew(false).MatchCTStateTrk(true). - Action().LoadRegMark(RewriteMACRegMark). - Action().GotoTable(EgressRuleTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done(), - // Tracked connections without the ServiceCTMark are sent to egressRuleTable - // directly. This is meant to match connections which were load-balanced by - // kube-proxy before AntreaProxy got enabled. - ConntrackStateTable.BuildFlow(priorityLow).MatchProtocol(ipProtocol). - MatchCTStateNew(false).MatchCTStateTrk(true). - Action().GotoTable(EgressRuleTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done(), - } - return flows -} - -// l2ForwardCalcFlow generates the flow that matches dst MAC and loads ofPort to reg. -func (c *client) l2ForwardCalcFlow(dstMAC net.HardwareAddr, ofPort uint32, skipIngressRules bool, category cookie.Category) binding.Flow { - nextTable := L2ForwardingCalcTable.GetNext() - if !skipIngressRules { - // Go to ingress NetworkPolicy tables for traffic to local Pods. - nextTable = c.ingressEntryTable - } - return L2ForwardingCalcTable.BuildFlow(priorityNormal). - MatchDstMAC(dstMAC). - Action().LoadToRegField(TargetOFPortField, ofPort). - Action().LoadRegMark(OFPortFoundRegMark). - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() - // Broadcast, multicast, and unknown unicast packets will be dropped by - // the default flow of L2ForwardingOutTable. -} - -// traceflowL2ForwardOutputFlows generates Traceflow specific flows that outputs traceflow packets -// to OVS port and Antrea Agent after L2forwarding calculation. -func (c *client) traceflowL2ForwardOutputFlows(dataplaneTag uint8, liveTraffic, droppedOnly bool, timeout uint16, category cookie.Category) []binding.Flow { - flows := []binding.Flow{} - for _, ipProtocol := range c.ipProtocols { - if c.networkConfig.TrafficEncapMode.SupportsEncap() { - // SendToController and Output if output port is tunnel port. - fb1 := L2ForwardingOutTable.BuildFlow(priorityNormal+3). - MatchRegFieldWithValue(TargetOFPortField, config.DefaultTunOFPort). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(OFPortFoundRegMark). - Action().OutputToRegField(TargetOFPortField). - Cookie(c.cookieAllocator.Request(category).Raw()) - // For injected packets, only SendToController if output port is local - // gateway. In encapMode, a Traceflow packet going out of the gateway - // port (i.e. exiting the overlay) essentially means that the Traceflow - // request is complete. - fb2 := L2ForwardingOutTable.BuildFlow(priorityNormal+2). - MatchRegFieldWithValue(TargetOFPortField, config.HostGatewayOFPort). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(OFPortFoundRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()) - - // Do not send to controller if captures only dropped packet. - if !droppedOnly { - if c.ovsMetersAreSupported { - fb1 = fb1.Action().Meter(PacketInMeterIDTF) - fb2 = fb2.Action().Meter(PacketInMeterIDTF) - } - fb1 = fb1.Action().SendToController(uint8(PacketInReasonTF)) - fb2 = fb2.Action().SendToController(uint8(PacketInReasonTF)) - } - if liveTraffic { - // Clear the loaded DSCP bits before output. - fb2 = fb2.Action().LoadIPDSCP(0). - Action().OutputToRegField(TargetOFPortField) - } - flows = append(flows, fb1.Done(), fb2.Done()) - } else { - // SendToController and Output if output port is local gateway. Unlike in - // encapMode, inter-Node Pod-to-Pod traffic is expected to go out of the - // gateway port on the way to its destination. - fb1 := L2ForwardingOutTable.BuildFlow(priorityNormal+2). - MatchRegFieldWithValue(TargetOFPortField, config.HostGatewayOFPort). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(OFPortFoundRegMark). - Action().OutputToRegField(TargetOFPortField). - Cookie(c.cookieAllocator.Request(category).Raw()) - if !droppedOnly { - if c.ovsMetersAreSupported { - fb1 = fb1.Action().Meter(PacketInMeterIDTF) - } - fb1 = fb1.Action().SendToController(uint8(PacketInReasonTF)) - } - flows = append(flows, fb1.Done()) - } - // Only SendToController if output port is local gateway and destination IP is gateway. - gatewayIP := c.nodeConfig.GatewayConfig.IPv4 - if ipProtocol == binding.ProtocolIPv6 { - gatewayIP = c.nodeConfig.GatewayConfig.IPv6 - } - if gatewayIP != nil { - fb := L2ForwardingOutTable.BuildFlow(priorityNormal+3). - MatchRegFieldWithValue(TargetOFPortField, config.HostGatewayOFPort). - MatchDstIP(gatewayIP). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(OFPortFoundRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()) - if !droppedOnly { - if c.ovsMetersAreSupported { - fb = fb.Action().Meter(PacketInMeterIDTF) - } - fb = fb.Action().SendToController(uint8(PacketInReasonTF)) - } - if liveTraffic { - fb = fb.Action().LoadIPDSCP(0). - Action().OutputToRegField(TargetOFPortField) - } - flows = append(flows, fb.Done()) - } - // Only SendToController if output port is Pod port. - fb := L2ForwardingOutTable.BuildFlow(priorityNormal + 2). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(OFPortFoundRegMark). - Cookie(c.cookieAllocator.Request(category).Raw()) - if !droppedOnly { - if c.ovsMetersAreSupported { - fb = fb.Action().Meter(PacketInMeterIDTF) - } - fb = fb.Action().SendToController(uint8(PacketInReasonTF)) - } - if liveTraffic { - fb = fb.Action().LoadIPDSCP(0). - Action().OutputToRegField(TargetOFPortField) - } - flows = append(flows, fb.Done()) - if c.enableProxy { - // Only SendToController for hairpin traffic. - // This flow must have higher priority than the one installed by l2ForwardOutputServiceHairpinFlow - fbHairpin := L2ForwardingOutTable.BuildFlow(priorityHigh + 2). - MatchIPDSCP(dataplaneTag). - SetHardTimeout(timeout). - MatchProtocol(ipProtocol). - MatchRegMark(HairpinRegMark). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()) - if !droppedOnly { - if c.ovsMetersAreSupported { - fbHairpin = fbHairpin.Action().Meter(PacketInMeterIDTF) - } - fbHairpin = fbHairpin.Action().SendToController(uint8(PacketInReasonTF)) - } - if liveTraffic { - fbHairpin = fbHairpin.Action().LoadIPDSCP(0). - Action().OutputInPort() - } - flows = append(flows, fbHairpin.Done()) - } - } - return flows -} - -// l2ForwardOutputServiceHairpinFlow uses in_port action for Service -// hairpin packets to avoid packets from being dropped by OVS. -func (c *client) l2ForwardOutputServiceHairpinFlow() binding.Flow { - return L2ForwardingOutTable.BuildFlow(priorityHigh). - MatchRegMark(HairpinRegMark). - Action().OutputInPort(). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done() -} - -// l2ForwardOutputFlows generates the flows that output packets to OVS port after L2 forwarding calculation. -func (c *client) l2ForwardOutputFlows(category cookie.Category) []binding.Flow { - var flows []binding.Flow - flows = append(flows, - L2ForwardingOutTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). - MatchRegMark(OFPortFoundRegMark). - Action().OutputToRegField(TargetOFPortField). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - L2ForwardingOutTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIPv6). - MatchRegMark(OFPortFoundRegMark). - Action().OutputToRegField(TargetOFPortField). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ) - return flows -} - -// l3FwdFlowToPod generates the L3 forward flows for traffic from tunnel to a -// local Pod. It rewrites the destination MAC (should be GlobalVirtualMAC) to -// the Pod interface MAC, and rewrites the source MAC to the gateway interface -// MAC. -func (c *client) l3FwdFlowToPod(localGatewayMAC net.HardwareAddr, podInterfaceIPs []net.IP, podInterfaceMAC net.HardwareAddr, category cookie.Category) []binding.Flow { - var flows []binding.Flow - for _, ip := range podInterfaceIPs { - ipProtocol := getIPProtocol(ip) - flowBuilder := L3ForwardingTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol) - if !c.connectUplinkToBridge { - // dstMAC will be overwritten always for AntreaFlexibleIPAM - flowBuilder = flowBuilder.MatchRegMark(RewriteMACRegMark) - } - flow := flowBuilder.MatchDstIP(ip). - Action().SetSrcMAC(localGatewayMAC). - // Rewrite src MAC to local gateway MAC, and rewrite dst MAC to pod MAC - Action().SetDstMAC(podInterfaceMAC). - Action().GotoTable(L3DecTTLTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() - flows = append(flows, flow) - } - return flows -} - -// l3FwdFlowRouteToPod generates the flows to route the traffic to a Pod based on -// the destination IP. It rewrites the destination MAC of the packets to the Pod -// interface MAC. The flow is used in the networkPolicyOnly mode for the traffic -// from the gateway to a local Pod. -func (c *client) l3FwdFlowRouteToPod(podInterfaceIPs []net.IP, podInterfaceMAC net.HardwareAddr, category cookie.Category) []binding.Flow { - var flows []binding.Flow - for _, ip := range podInterfaceIPs { - ipProtocol := getIPProtocol(ip) - flows = append(flows, L3ForwardingTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchDstIP(ip). - Action().SetDstMAC(podInterfaceMAC). - Action().GotoTable(L3DecTTLTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - return flows -} - -// l3FwdFlowRouteToGW generates the flows to route the traffic to the gateway -// interface. It rewrites the destination MAC of the packets to the gateway -// interface MAC. The flow is used in the networkPolicyOnly mode for the traffic -// from a local Pod to remote Pods, Nodes, or external network. -func (c *client) l3FwdFlowRouteToGW(gwMAC net.HardwareAddr, category cookie.Category) []binding.Flow { - var flows []binding.Flow - for _, ipProto := range c.ipProtocols { - flows = append(flows, L3ForwardingTable.BuildFlow(priorityLow).MatchProtocol(ipProto). - Action().SetDstMAC(gwMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ) - } - return flows -} - -// l3FwdFlowToGateway generates the L3 forward flows to rewrite the destination MAC of the packets to the gateway interface -// MAC if the destination IP is the gateway IP or the connection was initiated through the gateway interface. -func (c *client) l3FwdFlowToGateway(localGatewayIPs []net.IP, localGatewayMAC net.HardwareAddr, category cookie.Category) []binding.Flow { - var flows []binding.Flow - for _, ip := range localGatewayIPs { - ipProtocol := getIPProtocol(ip) - flows = append(flows, L3ForwardingTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchRegMark(RewriteMACRegMark). - MatchDstIP(ip). - Action().SetDstMAC(localGatewayMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - // Rewrite the destination MAC address with the local host gateway MAC if the packet is in the reply direction and - // is marked with FromGatewayCTMark. This is for connections which were initiated through the gateway, to ensure that - // this reply traffic gets forwarded correctly (back to the host network namespace, through the gateway). In - // particular, it is necessary in the following 2 cases: - // 1) reply traffic for connections from a local Pod to a ClusterIP Service (when AntreaProxy is disabled and - // kube-proxy is used). In this case the destination IP address of the reply traffic is the Pod which initiated the - // connection to the Service (no SNAT). We need to make sure that these packets are sent back through the gateway - // so that the source IP can be rewritten (Service backend IP -> Service ClusterIP). - // 2) when hair-pinning is involved, i.e. connections between 2 local Pods, for which NAT is performed. This - // applies regardless of whether AntreaProxy is enabled or not, and thus also applies to Windows Nodes (for which - // AntreaProxy is enabled by default). One example is a Pod accessing a NodePort Service for which - // externalTrafficPolicy is set to Local, using the local Node's IP address. - for _, proto := range c.ipProtocols { - // The following two OpenFlow entries are a workaround for issue: https://github.com/antrea-io/antrea/issues/2981. - // The issue is a Windows OVS bug, which identifies a reply packet as "new" in conntrack, and mark the connection - // with "FromGatewayCTMark". The OVS datapath might drop the packet if the reply packet is actually form - // antrea-gw0 because the input_port and output port number are the same. This workaround doesn't write the - // dst MAC if the reply packet of a connection marked with "FromGatewayCTMark" but it enters OVS from antrea-gw0. - flows = append(flows, L3ForwardingTable.BuildFlow(priorityHigh).MatchProtocol(proto). - MatchRegMark(FromLocalRegMark). - MatchCTMark(FromGatewayCTMark). - MatchCTStateRpl(true).MatchCTStateTrk(true). - Action().SetDstMAC(localGatewayMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ) - - if c.networkConfig.TrafficEncapMode.SupportsEncap() { - flows = append(flows, L3ForwardingTable.BuildFlow(priorityHigh).MatchProtocol(proto). - MatchRegMark(FromTunnelRegMark). - MatchCTMark(FromGatewayCTMark). - MatchCTStateRpl(true).MatchCTStateTrk(true). - Action().SetDstMAC(localGatewayMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - - if c.connectUplinkToBridge { - flows = append(flows, L3ForwardingTable.BuildFlow(priorityHigh).MatchProtocol(proto). - MatchCTMark(FromBridgeCTMark). - MatchCTStateRpl(true).MatchCTStateTrk(true). - Action().SetDstMAC(c.nodeConfig.UplinkNetConfig.MAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - } - return flows -} - -// l3FwdFlowToRemote generates the L3 forward flow for traffic to a remote Node -// (Pods or gateway) through the tunnel. -func (c *client) l3FwdFlowToRemote( - localGatewayMAC net.HardwareAddr, - peerSubnet net.IPNet, - tunnelPeer net.IP, - category cookie.Category) binding.Flow { - ipProto := getIPProtocol(peerSubnet.IP) - return L3ForwardingTable.BuildFlow(priorityNormal).MatchProtocol(ipProto). - MatchDstIPNet(peerSubnet). - // Rewrite src MAC to local gateway MAC and rewrite dst MAC to virtual MAC. - Action().SetSrcMAC(localGatewayMAC). - Action().SetDstMAC(GlobalVirtualMAC). - // Flow based tunnel. Set tunnel destination. - Action().SetTunnelDst(tunnelPeer). - Action().GotoTable(L3DecTTLTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() -} - -// l3FwdFlowToRemoteViaGW generates the L3 forward flow to support traffic to -// remote via gateway. It is used when the cross-Node traffic does not require -// encapsulation (in noEncap, networkPolicyOnly, or hybrid mode). -func (c *client) l3FwdFlowToRemoteViaGW( - localGatewayMAC net.HardwareAddr, - peerSubnet net.IPNet, - category cookie.Category, - isAntreaFlexibleIPAM bool) binding.Flow { - ipProto := getIPProtocol(peerSubnet.IP) - priority := priorityNormal - // AntreaFlexibleIPAM Pod -> Per-Node IPAM Pod traffic will be sent to remote Gw directly. - if isAntreaFlexibleIPAM { - priority = priorityHigh - } - flowBuilder := L3ForwardingTable.BuildFlow(priority).MatchProtocol(ipProto). - MatchDstIPNet(peerSubnet) - if isAntreaFlexibleIPAM { - flowBuilder = flowBuilder.MatchRegMark(AntreaFlexibleIPAMRegMark) - } - return flowBuilder.Action().SetDstMAC(localGatewayMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() -} - -// l3FwdServiceDefaultFlowsViaGW generates the default L3 forward flows to support Service traffic to pass through Antrea gateway. -func (c *client) l3FwdServiceDefaultFlowsViaGW(ipProto binding.Protocol, category cookie.Category) []binding.Flow { - gatewayMAC := c.nodeConfig.GatewayConfig.MAC - - flows := []binding.Flow{ - // This flow is used to match the packets of Service traffic: - // - NodePort/LoadBalancer request packets which pass through Antrea gateway and the Service Endpoint is not on - // local Pod CIDR or any remote Pod CIDRs. - // - ClusterIP request packets which are from Antrea gateway and the Service Endpoint is not on local Pod CIDR - // or any remote Pod CIDRs. - // - NodePort/LoadBalancer/ClusterIP response packets. - // The matched packets should leave through Antrea gateway, however, they also enter through Antrea gateway. This - // is hairpin traffic. - // Skip traffic from AntreaFlexibleIPAM Pods. - L3ForwardingTable.BuildFlow(priorityLow).MatchProtocol(ipProto). - MatchCTMark(ServiceCTMark). - MatchCTStateTrk(true). - MatchRegMark(RewriteMACRegMark). - MatchRegMark(NotAntreaFlexibleIPAMRegMark). - Action().SetDstMAC(gatewayMAC). - Action().GotoTable(L3DecTTLTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - } - return flows -} - -// arpResponderFlow generates the ARP responder flow entry that replies request comes from local gateway for peer -// gateway MAC. -func (c *client) arpResponderFlow(peerGatewayIP net.IP, category cookie.Category) binding.Flow { - return arpResponderTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). - MatchARPOp(arpOpRequest). - MatchARPTpa(peerGatewayIP). - Action().Move(binding.NxmFieldSrcMAC, binding.NxmFieldDstMAC). - Action().SetSrcMAC(GlobalVirtualMAC). - Action().LoadARPOperation(arpOpReply). - Action().Move(binding.NxmFieldARPSha, binding.NxmFieldARPTha). - Action().SetARPSha(GlobalVirtualMAC). - Action().Move(binding.NxmFieldARPSpa, binding.NxmFieldARPTpa). - Action().SetARPSpa(peerGatewayIP). - Action().OutputInPort(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() -} - -// arpResponderStaticFlow generates ARP reply for any ARP request with the same global virtual MAC. -// This flow is used in policy-only mode, where traffic are routed via IP not MAC. -func (c *client) arpResponderStaticFlow(category cookie.Category) binding.Flow { - return arpResponderTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). - MatchARPOp(arpOpRequest). - Action().Move(binding.NxmFieldSrcMAC, binding.NxmFieldDstMAC). - Action().SetSrcMAC(GlobalVirtualMAC). - Action().LoadARPOperation(arpOpReply). - Action().Move(binding.NxmFieldARPSha, binding.NxmFieldARPTha). - Action().SetARPSha(GlobalVirtualMAC). - Action().Move(binding.NxmFieldARPTpa, SwapField.GetNXFieldName()). - Action().Move(binding.NxmFieldARPSpa, binding.NxmFieldARPTpa). - Action().Move(SwapField.GetNXFieldName(), binding.NxmFieldARPSpa). - Action().OutputInPort(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() - -} - -// podIPSpoofGuardFlow generates the flow to check IP traffic sent out from local pod. Traffic from host gateway interface -// will not be checked, since it might be pod to service traffic or host namespace traffic. -func (c *client) podIPSpoofGuardFlow(ifIPs []net.IP, ifMAC net.HardwareAddr, ifOFPort uint32, category cookie.Category) []binding.Flow { - var flows []binding.Flow - for _, ifIP := range ifIPs { - ipProtocol := getIPProtocol(ifIP) - if ipProtocol == binding.ProtocolIP { - flows = append(flows, SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchInPort(ifOFPort). - MatchSrcMAC(ifMAC). - MatchSrcIP(ifIP). - Action().GotoTable(SpoofGuardTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } else if ipProtocol == binding.ProtocolIPv6 { - flows = append(flows, SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchInPort(ifOFPort). - MatchSrcMAC(ifMAC). - MatchSrcIP(ifIP). - Action().GotoTable(IPv6Table.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } + flows = append(flows, flowBuilder.Cookie(category).Done()) } return flows } @@ -1410,345 +442,6 @@ func getIPProtocol(ip net.IP) binding.Protocol { return ipProtocol } -// serviceHairpinResponseDNATFlow generates the flow which transforms destination -// IP of the hairpin packet to the source IP. -func (c *client) serviceHairpinResponseDNATFlow(ipProtocol binding.Protocol) binding.Flow { - hpIP := hairpinIP - from := binding.NxmFieldSrcIPv4 - to := binding.NxmFieldDstIPv4 - if ipProtocol == binding.ProtocolIPv6 { - hpIP = hairpinIPv6 - from = binding.NxmFieldSrcIPv6 - to = binding.NxmFieldDstIPv6 - } - return ServiceHairpinTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchDstIP(hpIP). - Action().Move(from, to). - Action().LoadRegMark(HairpinRegMark). - Action().GotoTable(ServiceHairpinTable.GetNext()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done() -} - -// serviceHairpinRegSetFlows generates the flow to set the hairpin mark for the packet which is from Antrea gateway and -// its output interface is also Antrea gateway. In table L2ForwardingOutTable #110, a packet with hairpin mark will be -// sent out with action IN_PORT, otherwise the packet with action output will be dropped. -func (c *client) serviceHairpinRegSetFlows(ipProtocol binding.Protocol) binding.Flow { - return HairpinSNATTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). - MatchRegMark(FromGatewayRegMark). - MatchRegMark(ToGatewayRegMark). - Action().LoadRegMark(HairpinRegMark). - Action().GotoTable(L2ForwardingOutTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done() -} - -// gatewayARPSpoofGuardFlow generates the flow to check ARP traffic sent out from the local gateway interface. -func (c *client) gatewayARPSpoofGuardFlows(gatewayIP net.IP, gatewayMAC net.HardwareAddr, category cookie.Category) (flows []binding.Flow) { - flows = append(flows, SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). - MatchInPort(config.HostGatewayOFPort). - MatchARPSha(gatewayMAC). - MatchARPSpa(gatewayIP). - Action().GotoTable(arpResponderTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - if c.connectUplinkToBridge { - // When the uplink bridged and NodePort/HostPort is accessed, Node will send ARP request to HostGatewayOFPort - // with arp_spa=NodeIP. This flow is to accept these ARP requests. - flows = append(flows, SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). - MatchInPort(config.HostGatewayOFPort). - MatchARPSha(gatewayMAC). - MatchARPSpa(c.nodeConfig.NodeIPv4Addr.IP). - Action().GotoTable(arpResponderTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - return -} - -// arpSpoofGuardFlow generates the flow to check ARP traffic sent out from local pods interfaces. -func (c *client) arpSpoofGuardFlow(ifIP net.IP, ifMAC net.HardwareAddr, ifOFPort uint32, category cookie.Category) binding.Flow { - return SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). - MatchInPort(ifOFPort). - MatchARPSha(ifMAC). - MatchARPSpa(ifIP). - Action().GotoTable(arpResponderTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() -} - -// sessionAffinityReselectFlow generates the flow which resubmits the service accessing -// packet back to ServiceLBTable if there is no endpointDNAT flow matched. This -// case will occur if an Endpoint is removed and is the learned Endpoint -// selection of the Service. -func (c *client) sessionAffinityReselectFlow() binding.Flow { - return EndpointDNATTable.BuildFlow(priorityLow). - MatchRegMark(EpSelectedRegMark). - Action().LoadRegMark(EpToSelectRegMark). - Action().ResubmitToTable(ServiceLBTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done() -} - -// gatewayIPSpoofGuardFlow generates the flow to skip spoof guard checking for traffic sent from gateway interface. -func (c *client) gatewayIPSpoofGuardFlows(category cookie.Category) []binding.Flow { - var flows []binding.Flow - for _, proto := range c.ipProtocols { - nextTable := SpoofGuardTable.GetNext() - if proto == binding.ProtocolIPv6 { - nextTable = IPv6Table.GetID() - } - flows = append(flows, - SpoofGuardTable.BuildFlow(priorityNormal).MatchProtocol(proto). - MatchInPort(config.HostGatewayOFPort). - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ) - } - return flows -} - -// serviceCIDRDNATFlow generates flows to match dst IP in service CIDR and output to host gateway interface directly. -func (c *client) serviceCIDRDNATFlows(serviceCIDRs []*net.IPNet) []binding.Flow { - var flows []binding.Flow - for _, serviceCIDR := range serviceCIDRs { - if serviceCIDR != nil { - ipProto := getIPProtocol(serviceCIDR.IP) - flows = append(flows, DNATTable.BuildFlow(priorityNormal).MatchProtocol(ipProto). - MatchDstIPNet(*serviceCIDR). - Action().LoadToRegField(TargetOFPortField, config.HostGatewayOFPort). - Action().LoadRegMark(OFPortFoundRegMark). - Action().GotoTable(ConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done()) - } - } - return flows -} - -// serviceNeedLBFlow generates flows to mark packets as LB needed. -func (c *client) serviceNeedLBFlow() binding.Flow { - return SessionAffinityTable.BuildFlow(priorityMiss). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Action().LoadRegMark(EpToSelectRegMark). - Done() -} - -// arpResponderLocalFlows generates the ARP responder flow entry that replies request from local Pods for local -// gateway MAC. -// Only used in AntreaFlexibleIPAM to avoid multiple ARP replies from antrea-gw0 and uplink. -// TODO(gran): use better method to process ARP and support IPv6. -func (c *client) arpResponderLocalFlows(category cookie.Category) (flows []binding.Flow) { - if c.connectUplinkToBridge && c.nodeConfig.GatewayConfig.IPv4 != nil { - flows = append(flows, arpResponderTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolARP). - MatchARPOp(1). - MatchARPTpa(c.nodeConfig.GatewayConfig.IPv4). - Action().Move(binding.NxmFieldSrcMAC, binding.NxmFieldDstMAC). - Action().SetSrcMAC(c.nodeConfig.GatewayConfig.MAC). - Action().LoadARPOperation(2). - Action().Move(binding.NxmFieldARPSha, binding.NxmFieldARPTha). - Action().SetARPSha(c.nodeConfig.GatewayConfig.MAC). - Action().Move(binding.NxmFieldARPSpa, binding.NxmFieldARPTpa). - Action().SetARPSpa(c.nodeConfig.GatewayConfig.IPv4). - Action().OutputInPort(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - return -} - -// arpNormalFlow generates the flow to response arp in normal way if no flow in arpResponderTable is matched. -func (c *client) arpNormalFlow(category cookie.Category) binding.Flow { - return arpResponderTable.BuildFlow(priorityLow).MatchProtocol(binding.ProtocolARP). - Action().Normal(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() -} - -func (c *client) allowRulesMetricFlows(conjunctionID uint32, ingress bool) []binding.Flow { - metricTable := IngressMetricTable - offset := 0 - // We use the 0..31 bits of the ct_label to store the ingress rule ID and use the 32..63 bits to store the - // egress rule ID. - field := IngressRuleCTLabel - if !ingress { - metricTable = EgressMetricTable - offset = 32 - field = EgressRuleCTLabel - } - metricFlow := func(isCTNew bool, protocol binding.Protocol) binding.Flow { - return metricTable.BuildFlow(priorityNormal). - MatchProtocol(protocol). - MatchCTStateNew(isCTNew). - MatchCTLabelField(0, uint64(conjunctionID)< 0 { - fb = fb.MatchDstPort(portValue.Value, portValue.Mask) - } - case MatchTCPSrcPort: - fallthrough - case MatchTCPv6SrcPort: - fallthrough - case MatchUDPSrcPort: - fallthrough - case MatchUDPv6SrcPort: - fb = fb.MatchProtocol(matchKey.GetOFProtocol()) - portValue := matchValue.(types.BitRange) - if portValue.Value > 0 { - fb = fb.MatchSrcPort(portValue.Value, portValue.Mask) - } - case MatchServiceGroupID: - fb = fb.MatchRegFieldWithValue(ServiceGroupIDField, matchValue.(uint32)) - } - return fb -} - -// conjunctionExceptionFlow generates the flow to jump to a specific table if both policyRuleConjunction ID and except address are matched. -// Keeping this for reference to generic exception flow. -func (c *client) conjunctionExceptionFlow(conjunctionID uint32, tableID uint8, nextTable uint8, matchKey *types.MatchKey, matchValue interface{}) binding.Flow { - conjReg := TFIngressConjIDField - if tableID == EgressRuleTable.GetID() { - conjReg = TFEgressConjIDField - } - fb := getTableByID(tableID).BuildFlow(priorityNormal).MatchConjID(conjunctionID) - return c.addFlowMatch(fb, matchKey, matchValue). - Action().LoadToRegField(conjReg, conjunctionID). // Traceflow. - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(cookie.Policy).Raw()). - Done() -} - -// conjunctiveMatchFlow generates the flow to set conjunctive actions if the match condition is matched. -func (c *client) conjunctiveMatchFlow(tableID uint8, matchKey *types.MatchKey, matchValue interface{}, priority *uint16, actions []*conjunctiveAction) binding.Flow { - var ofPriority uint16 - if priority != nil { - ofPriority = *priority - } else { - ofPriority = priorityNormal - } - fb := getTableByID(tableID).BuildFlow(ofPriority) - fb = c.addFlowMatch(fb, matchKey, matchValue) - if c.deterministic { - sort.Sort(conjunctiveActionsInOrder(actions)) - } - for _, act := range actions { - fb.Action().Conjunction(act.conjID, act.clauseID, act.nClause) - } - return fb.Cookie(c.cookieAllocator.Request(cookie.Policy).Raw()).Done() -} - -// defaultDropFlow generates the flow to drop packets if the match condition is matched. -func (c *client) defaultDropFlow(table binding.Table, matchKey *types.MatchKey, matchValue interface{}) binding.Flow { - fb := table.BuildFlow(priorityNormal) - if c.enableDenyTracking { - return c.addFlowMatch(fb, matchKey, matchValue). - Action().Drop(). - Action().LoadRegMark(DispositionDropRegMark). - Action().LoadRegMark(CustomReasonDenyRegMark). - Action().SendToController(uint8(PacketInReasonNP)). - Cookie(c.cookieAllocator.Request(cookie.Default).Raw()). - Done() - } - return c.addFlowMatch(fb, matchKey, matchValue). - Action().Drop(). - Cookie(c.cookieAllocator.Request(cookie.Default).Raw()). - Done() -} - -// dnsPacketInFlow generates the flow to send dns response packets of fqdn policy selected -// Pods to the fqdnController for processing. -func (c *client) dnsPacketInFlow(conjunctionID uint32) binding.Flow { - return AntreaPolicyIngressRuleTable.BuildFlow(priorityDNSIntercept). - MatchConjID(conjunctionID). - Cookie(c.cookieAllocator.Request(cookie.Default).Raw()). - Action().LoadToRegField(CustomReasonField, CustomReasonDNS). - Action().SendToController(uint8(PacketInReasonNP)). - Done() -} - -// localProbeFlow generates the flow to forward locally generated packets to ConntrackCommitTable, bypassing ingress -// rules of Network Policies. The packets are sent by kubelet to probe the liveness/readiness of local Pods. -// On Linux and when OVS kernel datapath is used, it identifies locally generated packets by matching the -// HostLocalSourceMark, otherwise it matches the source IP. The difference is because: -// 1. On Windows, kube-proxy userspace mode is used, and currently there is no way to distinguish kubelet generated -// traffic from kube-proxy proxied traffic. -// 2. pkt_mark field is not properly supported for OVS userspace (netdev) datapath. -// Note that there is a defect in the latter way that NodePort Service access by external clients will be masqueraded as -// a local gateway IP to bypass Network Policies. See https://github.com/antrea-io/antrea/issues/280. -// TODO: Fix it after replacing kube-proxy with AntreaProxy. -func (c *client) localProbeFlow(localGatewayIPs []net.IP, category cookie.Category) []binding.Flow { - var flows []binding.Flow - if runtime.IsWindowsPlatform() || c.ovsDatapathType == ovsconfig.OVSDatapathNetdev { - for _, ip := range localGatewayIPs { - ipProtocol := getIPProtocol(ip) - flows = append(flows, IngressRuleTable.BuildFlow(priorityHigh). - MatchProtocol(ipProtocol). - MatchSrcIP(ip). - Action().GotoTable(ConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - } else { - flows = append(flows, IngressRuleTable.BuildFlow(priorityHigh). - MatchPktMark(types.HostLocalSourceMark, &types.HostLocalSourceMark). - Action().GotoTable(ConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - return flows -} - -// snatSkipNodeFlow installs a flow to skip SNAT for traffic to the transport IP of the a remote Node. -func (c *client) snatSkipNodeFlow(nodeIP net.IP, category cookie.Category) binding.Flow { - ipProto := getIPProtocol(nodeIP) - // This flow is for the traffic to the remote Node IP. - return L3ForwardingTable.BuildFlow(priorityNormal). - MatchProtocol(ipProto). - MatchRegMark(FromLocalRegMark). - MatchDstIP(nodeIP). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() -} - -// snatCommonFlows installs the default flows for performing SNAT for traffic to -// the external network. The flows identify the packets to external, and send -// them to SNATTable, where SNAT IPs are looked up for the packets. -func (c *client) snatCommonFlows(nodeIP net.IP, localSubnet net.IPNet, localGatewayMAC net.HardwareAddr, exceptCIDRs []net.IPNet, category cookie.Category) []binding.Flow { - nextTable := L3ForwardingTable.GetNext() - ipProto := getIPProtocol(localSubnet.IP) - flows := []binding.Flow{ - // First install flows for traffic that should bypass SNAT. - // This flow is for traffic to the local Pod subnet that don't need MAC rewriting (L2 forwarding case). Other - // traffic to the local Pod subnet will be handled by L3 forwarding rules. - L3ForwardingTable.BuildFlow(priorityNormal). - MatchProtocol(ipProto). - MatchRegFieldWithValue(RewriteMACRegMark.GetField(), 0). - MatchDstIPNet(localSubnet). - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // This flow is for the traffic to the local Node IP. - L3ForwardingTable.BuildFlow(priorityNormal). - MatchProtocol(ipProto). - MatchRegMark(FromLocalRegMark). - MatchDstIP(nodeIP). - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // The return traffic of connections to a local Pod through the gateway interface (so FromGatewayCTMark is set) - // should bypass SNAT too. But it has been covered by the gatewayCT related flow generated in l3FwdFlowToGateway - // which forwards all reply traffic for such connections back to the gateway interface with the high priority. - - // Send the traffic to external to SNATTable. - L3ForwardingTable.BuildFlow(priorityLow). - MatchProtocol(ipProto). - MatchRegMark(FromLocalRegMark). - Action().GotoTable(SNATTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // For the traffic tunneled from remote Nodes, rewrite the - // destination MAC to the gateway interface MAC. - L3ForwardingTable.BuildFlow(priorityLow). - MatchProtocol(ipProto). - MatchRegMark(FromTunnelRegMark). - Action().SetDstMAC(localGatewayMAC). - Action().GotoTable(SNATTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - - // Drop the traffic from remote Nodes if no matched SNAT policy. - SNATTable.BuildFlow(priorityLow). - MatchProtocol(ipProto). - MatchCTStateNew(true).MatchCTStateTrk(true). - MatchRegMark(FromTunnelRegMark). - Action().Drop(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - } - for _, cidr := range exceptCIDRs { - flows = append(flows, L3ForwardingTable.BuildFlow(priorityNormal). - MatchProtocol(ipProto). - MatchRegMark(FromLocalRegMark). - MatchDstIPNet(cidr). - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - return flows -} - -// snatIPFromTunnelFlow generates a flow that marks SNAT packets tunnelled from -// remote Nodes. The SNAT IP matches the packet's tunnel destination IP. -func (c *client) snatIPFromTunnelFlow(snatIP net.IP, mark uint32) binding.Flow { - ipProto := getIPProtocol(snatIP) - return SNATTable.BuildFlow(priorityNormal). - MatchProtocol(ipProto). - MatchCTStateNew(true).MatchCTStateTrk(true). - MatchTunnelDst(snatIP). - Action().LoadPktMarkRange(mark, snatPktMarkRange). - Action().GotoTable(L3DecTTLTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()). - Done() -} - -// snatRuleFlow generates a flow that applies the SNAT rule for a local Pod. If -// the SNAT IP exists on the local Node, it sets the packet mark with the ID of -// the SNAT IP, for the traffic from the ofPort to external; if the SNAT IP is -// on a remote Node, it tunnels the packets to the SNAT IP. -func (c *client) snatRuleFlow(ofPort uint32, snatIP net.IP, snatMark uint32, localGatewayMAC net.HardwareAddr) binding.Flow { - ipProto := getIPProtocol(snatIP) - if snatMark != 0 { - // Local SNAT IP. - return SNATTable.BuildFlow(priorityNormal). - MatchProtocol(ipProto). - MatchCTStateNew(true).MatchCTStateTrk(true). - MatchInPort(ofPort). - Action().LoadPktMarkRange(snatMark, snatPktMarkRange). - Action().GotoTable(SNATTable.GetNext()). - Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()). - Done() - } - // SNAT IP should be on a remote Node. - return SNATTable.BuildFlow(priorityNormal). - MatchProtocol(ipProto). - MatchInPort(ofPort). - Action().SetSrcMAC(localGatewayMAC). - Action().SetDstMAC(GlobalVirtualMAC). - // Set tunnel destination to the SNAT IP. - Action().SetTunnelDst(snatIP). - Action().GotoTable(L3DecTTLTable.GetID()). - Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()). - Done() -} - -// loadBalancerServiceFromOutsideFlow generates the flow to forward LoadBalancer service traffic from outside node -// to gateway. kube-proxy will then handle the traffic. -// This flow is for Windows Node only. -func (c *client) loadBalancerServiceFromOutsideFlow(svcIP net.IP, svcPort uint16, protocol binding.Protocol) binding.Flow { - return UplinkTable.BuildFlow(priorityHigh). - MatchProtocol(protocol). - MatchDstPort(svcPort, nil). - MatchRegMark(FromUplinkRegMark). - MatchDstIP(svcIP). - Action().Output(config.HostGatewayOFPort). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done() -} - -// serviceClassifierFlows generate the flows to match the first packet of Service NodePort and set a bit of a register -// to mark the Service type as NodePort. -func (c *client) serviceClassifierFlows(nodePortAddresses []net.IP, ipProtocol binding.Protocol) []binding.Flow { - virtualServiceIP := config.VirtualServiceIPv4 - if ipProtocol == binding.ProtocolIPv6 { - virtualServiceIP = config.VirtualServiceIPv6 - } - // Generate flows for every NodePort IP address. The flows are used to match the first packet of Service NodePort from - // Pod. - var flows []binding.Flow - for i := range nodePortAddresses { - flows = append(flows, - ServiceClassifierTable.BuildFlow(priorityNormal). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - MatchProtocol(ipProtocol). - MatchDstIP(nodePortAddresses[i]). - Action().LoadRegMark(ToNodePortAddressRegMark). - Done()) - } - // Generate flow for the virtual IP. The flow is used to match the first packet of Service NodePort from Antrea gateway, - // because the destination IP of the packet has already performed DNAT with the virtual IP on host. - flows = append(flows, - ServiceClassifierTable.BuildFlow(priorityNormal). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - MatchProtocol(ipProtocol). - MatchDstIP(virtualServiceIP). - Action().LoadRegMark(ToNodePortAddressRegMark). - Done()) - - return flows -} - -// serviceLearnFlow generates the flow with learn action which adds new flows in -// sessionAffinityTable according to the Endpoint selection decision. -func (c *client) serviceLearnFlow(groupID binding.GroupIDType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, affinityTimeout uint16, nodeLocalExternal bool, svcType v1.ServiceType) binding.Flow { - // Using unique cookie ID here to avoid learned flow cascade deletion. - cookieID := c.cookieAllocator.RequestWithObjectID(cookie.Service, uint32(groupID)).Raw() - - var flowBuilder binding.FlowBuilder - if svcType == v1.ServiceTypeNodePort { - unionVal := (ToNodePortAddressRegMark.GetValue() << ServiceEPStateField.GetRange().Length()) + EpToLearnRegMark.GetValue() - flowBuilder = ServiceLBTable.BuildFlow(priorityLow). - Cookie(cookieID). - MatchRegFieldWithValue(NodePortUnionField, unionVal). - MatchProtocol(protocol). - MatchDstPort(svcPort, nil) - } else { - flowBuilder = ServiceLBTable.BuildFlow(priorityLow). - Cookie(cookieID). - MatchRegMark(EpToLearnRegMark). - MatchDstIP(svcIP). - MatchProtocol(protocol). - MatchDstPort(svcPort, nil) - } - - // affinityTimeout is used as the OpenFlow "hard timeout": learned flow will be removed from - // OVS after that time regarding of whether traffic is still hitting the flow. This is the - // desired behavior based on the K8s spec. Note that existing connections will keep going to - // the same endpoint because of connection tracking; and that is also the desired behavior. - learnFlowBuilderLearnAction := flowBuilder. - Action().Learn(SessionAffinityTable.GetID(), priorityNormal, 0, affinityTimeout, cookieID). - DeleteLearned() - ipProtocol := binding.ProtocolIP - switch protocol { - case binding.ProtocolTCP: - learnFlowBuilderLearnAction = learnFlowBuilderLearnAction.MatchLearnedTCPDstPort() - case binding.ProtocolUDP: - learnFlowBuilderLearnAction = learnFlowBuilderLearnAction.MatchLearnedUDPDstPort() - case binding.ProtocolSCTP: - learnFlowBuilderLearnAction = learnFlowBuilderLearnAction.MatchLearnedSCTPDstPort() - case binding.ProtocolTCPv6: - ipProtocol = binding.ProtocolIPv6 - learnFlowBuilderLearnAction = learnFlowBuilderLearnAction.MatchLearnedTCPv6DstPort() - case binding.ProtocolUDPv6: - ipProtocol = binding.ProtocolIPv6 - learnFlowBuilderLearnAction = learnFlowBuilderLearnAction.MatchLearnedUDPv6DstPort() - case binding.ProtocolSCTPv6: - ipProtocol = binding.ProtocolIPv6 - learnFlowBuilderLearnAction = learnFlowBuilderLearnAction.MatchLearnedSCTPv6DstPort() - } - // If externalTrafficPolicy of NodePort/LoadBalancer is Cluster, the learned flow which - // is used to match the first packet of NodePort/LoadBalancer also requires SNAT. - if (svcType == v1.ServiceTypeNodePort || svcType == v1.ServiceTypeLoadBalancer) && !nodeLocalExternal { - learnFlowBuilderLearnAction = learnFlowBuilderLearnAction.LoadRegMark(ServiceNeedSNATRegMark) - } - - if ipProtocol == binding.ProtocolIP { - return learnFlowBuilderLearnAction. - MatchLearnedDstIP(). - MatchLearnedSrcIP(). - LoadFieldToField(EndpointIPField, EndpointIPField). - LoadFieldToField(EndpointPortField, EndpointPortField). - LoadRegMark(EpSelectedRegMark). - LoadRegMark(RewriteMACRegMark). - Done(). - Action().LoadRegMark(EpSelectedRegMark). - Action().GotoTable(EndpointDNATTable.GetID()). - Done() - } else if ipProtocol == binding.ProtocolIPv6 { - return learnFlowBuilderLearnAction. - MatchLearnedDstIPv6(). - MatchLearnedSrcIPv6(). - LoadXXRegToXXReg(EndpointIP6Field, EndpointIP6Field). - LoadFieldToField(EndpointPortField, EndpointPortField). - LoadRegMark(EpSelectedRegMark). - LoadRegMark(RewriteMACRegMark). - Done(). - Action().LoadRegMark(EpSelectedRegMark). - Action().GotoTable(EndpointDNATTable.GetID()). - Done() - } - return nil -} - -// serviceLBFlow generates the flow which uses the specific group to do Endpoint -// selection. -func (c *client) serviceLBFlow(groupID binding.GroupIDType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, withSessionAffinity, nodeLocalExternal bool, svcType v1.ServiceType) binding.Flow { - var lbResultMark *binding.RegMark - if withSessionAffinity { - lbResultMark = EpToLearnRegMark - } else { - lbResultMark = EpSelectedRegMark - } - - var flowBuilder binding.FlowBuilder - if svcType == v1.ServiceTypeNodePort { - // If externalTrafficPolicy of NodePort is Cluster, the first packet of NodePort requires SNAT, so nodeLocalExternal - // will be false, and ServiceNeedSNATRegMark will be set. If externalTrafficPolicy of NodePort is Local, the first - // packet of NodePort doesn't require SNAT, ServiceNeedSNATRegMark won't be set. - unionVal := (ToNodePortAddressRegMark.GetValue() << ServiceEPStateField.GetRange().Length()) + EpToSelectRegMark.GetValue() - flowBuilder = ServiceLBTable.BuildFlow(priorityNormal). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - MatchProtocol(protocol). - MatchRegFieldWithValue(NodePortUnionField, unionVal). - MatchDstPort(svcPort, nil). - Action().LoadRegMark(lbResultMark). - Action().LoadRegMark(RewriteMACRegMark) - if !nodeLocalExternal { - flowBuilder = flowBuilder.Action().LoadRegMark(ServiceNeedSNATRegMark) - } - } else { - // If Service type is LoadBalancer, as above NodePort. - flowBuilder = ServiceLBTable.BuildFlow(priorityNormal). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - MatchProtocol(protocol). - MatchDstPort(svcPort, nil). - MatchDstIP(svcIP). - MatchRegMark(EpToSelectRegMark). - Action().LoadRegMark(lbResultMark). - Action().LoadRegMark(RewriteMACRegMark) - if svcType == v1.ServiceTypeLoadBalancer && !nodeLocalExternal { - flowBuilder = flowBuilder.Action().LoadRegMark(ServiceNeedSNATRegMark) - } - } - return flowBuilder. - Action().LoadToRegField(ServiceGroupIDField, uint32(groupID)). - Action().Group(groupID).Done() -} - -// endpointDNATFlow generates the flow which transforms the Service Cluster IP -// to the Endpoint IP according to the Endpoint selection decision which is stored -// in regs. -func (c *client) endpointDNATFlow(endpointIP net.IP, endpointPort uint16, protocol binding.Protocol) binding.Flow { - ipProtocol := getIPProtocol(endpointIP) - unionVal := (EpSelectedRegMark.GetValue() << EndpointPortField.GetRange().Length()) + uint32(endpointPort) - - flowBuilder := EndpointDNATTable.BuildFlow(priorityNormal). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - MatchRegFieldWithValue(EpUnionField, unionVal). - MatchProtocol(protocol) - ctZone := CtZone - if ipProtocol == binding.ProtocolIP { - ipVal := binary.BigEndian.Uint32(endpointIP.To4()) - flowBuilder = flowBuilder.MatchRegFieldWithValue(EndpointIPField, ipVal) - } else { - ctZone = CtZoneV6 - ipVal := []byte(endpointIP) - flowBuilder = flowBuilder.MatchXXReg(EndpointIP6Field.GetRegID(), ipVal) - } - return flowBuilder.Action().CT(true, EndpointDNATTable.GetNext(), ctZone). - DNAT( - &binding.IPRange{StartIP: endpointIP, EndIP: endpointIP}, - &binding.PortRange{StartPort: endpointPort, EndPort: endpointPort}, - ). - LoadToCtMark(ServiceCTMark). - CTDone(). - Done() -} - -// hairpinSNATFlow generates the flow which does SNAT for Service -// hairpin packets and loads the hairpin mark to markReg. -func (c *client) hairpinSNATFlow(endpointIP net.IP) binding.Flow { - ipProtocol := getIPProtocol(endpointIP) - hpIP := hairpinIP - if ipProtocol == binding.ProtocolIPv6 { - hpIP = hairpinIPv6 - } - return HairpinSNATTable.BuildFlow(priorityNormal). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - MatchProtocol(ipProtocol). - MatchDstIP(endpointIP). - MatchSrcIP(endpointIP). - Action().SetSrcIP(hpIP). - Action().LoadRegMark(HairpinRegMark). - Action().GotoTable(L2ForwardingOutTable.GetID()). - Done() -} - -// serviceEndpointGroup creates/modifies the group/buckets of Endpoints. If the -// withSessionAffinity is true, then buckets will resubmit packets back to -// ServiceLBTable to trigger the learn flow, the learn flow will then send packets -// to EndpointDNATTable. Otherwise, buckets will resubmit packets to -// EndpointDNATTable directly. -func (c *client) serviceEndpointGroup(groupID binding.GroupIDType, withSessionAffinity bool, endpoints ...proxy.Endpoint) binding.Group { - group := c.bridge.CreateGroup(groupID).ResetBuckets() - var resubmitTableID uint8 - if withSessionAffinity { - resubmitTableID = ServiceLBTable.GetID() - } else { - resubmitTableID = EndpointDNATTable.GetID() - } - - for _, endpoint := range endpoints { - endpointPort, _ := endpoint.Port() - endpointIP := net.ParseIP(endpoint.IP()) - portVal := portToUint16(endpointPort) - ipProtocol := getIPProtocol(endpointIP) - if ipProtocol == binding.ProtocolIP { - ipVal := binary.BigEndian.Uint32(endpointIP.To4()) - group = group.Bucket().Weight(100). - LoadToRegField(EndpointIPField, ipVal). - LoadToRegField(EndpointPortField, uint32(portVal)). - ResubmitToTable(resubmitTableID). - Done() - } else if ipProtocol == binding.ProtocolIPv6 { - ipVal := []byte(endpointIP) - group = group.Bucket().Weight(100). - LoadXXReg(EndpointIP6Field.GetRegID(), ipVal). - LoadToRegField(EndpointPortField, uint32(portVal)). - ResubmitToTable(resubmitTableID). - Done() - } - } - return group -} - -// decTTLFlows decrements TTL by one for the packets forwarded across Nodes. -// The TTL decrement should be skipped for the packets which enter OVS pipeline -// from the gateway interface, as the host IP stack should have decremented the -// TTL already for such packets. -func (c *client) decTTLFlows(category cookie.Category) []binding.Flow { - var flows []binding.Flow - for _, proto := range c.ipProtocols { - flows = append(flows, - // Skip packets from the gateway interface. - L3DecTTLTable.BuildFlow(priorityHigh). - Cookie(c.cookieAllocator.Request(category).Raw()). - MatchProtocol(proto). - MatchRegMark(FromGatewayRegMark). - Action().GotoTable(L3DecTTLTable.GetNext()). - Done(), - L3DecTTLTable.BuildFlow(priorityNormal). - Cookie(c.cookieAllocator.Request(category).Raw()). - MatchProtocol(proto). - Action().DecTTL(). - Action().GotoTable(L3DecTTLTable.GetNext()). - Done(), - ) - } - return flows -} - -// externalFlows returns the flows needed to enable SNAT for external traffic. -func (c *client) externalFlows(nodeIP net.IP, localSubnet net.IPNet, localGatewayMAC net.HardwareAddr, exceptCIDRs []net.IPNet) []binding.Flow { - if !c.enableEgress { - return nil - } - return c.snatCommonFlows(nodeIP, localSubnet, localGatewayMAC, exceptCIDRs, cookie.SNAT) -} - // policyConjKeyFuncKeyFunc knows how to get key of a *policyRuleConjunction. func policyConjKeyFunc(obj interface{}) (string, error) { conj := obj.(*policyRuleConjunction) @@ -2516,63 +476,93 @@ func (c *client) genPacketInMeter(meterID binding.MeterIDType, rate uint32) bind return meter } -func (c *client) generatePipeline() { - c.createOFTable(ClassifierTable, SpoofGuardTable.GetID(), binding.TableMissActionDrop) - c.createOFTable(arpResponderTable, binding.LastTableID, binding.TableMissActionDrop) - c.createOFTable(ConntrackTable, ConntrackStateTable.GetID(), binding.TableMissActionNone) - c.createOFTable(EgressRuleTable, EgressDefaultTable.GetID(), binding.TableMissActionNext) - c.createOFTable(EgressDefaultTable, EgressMetricTable.GetID(), binding.TableMissActionNext) - c.createOFTable(EgressMetricTable, L3ForwardingTable.GetID(), binding.TableMissActionNext) - c.createOFTable(L3ForwardingTable, L2ForwardingCalcTable.GetID(), binding.TableMissActionNext) - c.createOFTable(L3DecTTLTable, L2ForwardingCalcTable.GetID(), binding.TableMissActionNext) - // Packets from L2ForwardingCalcTable should be forwarded to IngressMetricTable by default to collect ingress stats. - c.createOFTable(L2ForwardingCalcTable, IngressMetricTable.GetID(), binding.TableMissActionNext) - c.createOFTable(IngressRuleTable, IngressDefaultTable.GetID(), binding.TableMissActionNext) - c.createOFTable(IngressDefaultTable, IngressMetricTable.GetID(), binding.TableMissActionNext) - c.createOFTable(IngressMetricTable, ConntrackCommitTable.GetID(), binding.TableMissActionNext) - c.createOFTable(L2ForwardingOutTable, binding.LastTableID, binding.TableMissActionDrop) - if c.enableProxy { - SpoofGuardTable = c.createOFTable(SpoofGuardTable, ServiceHairpinTable.GetID(), binding.TableMissActionDrop) - IPv6Table = c.createOFTable(IPv6Table, ServiceHairpinTable.GetID(), binding.TableMissActionNext) - if c.proxyAll { - ServiceHairpinTable = c.createOFTable(ServiceHairpinTable, ServiceConntrackTable.GetID(), binding.TableMissActionNext) - ServiceConntrackTable = c.createOFTable(ServiceConntrackTable, ConntrackTable.GetID(), binding.TableMissActionNext) - ServiceClassifierTable = c.createOFTable(ServiceClassifierTable, binding.LastTableID, binding.TableMissActionNone) - ServiceConntrackCommitTable = c.createOFTable(ServiceConntrackCommitTable, HairpinSNATTable.GetID(), binding.TableMissActionNext) - } else { - ServiceHairpinTable = c.createOFTable(ServiceHairpinTable, ConntrackTable.GetID(), binding.TableMissActionNext) +func generatePipeline(templates []*pipelineTemplate) binding.Pipeline { + pipelineID := binding.NewPipelineID() + sortedTableMap := make(map[binding.StageID][]binding.Table) + + // PipelineClassifierTable ID is always 0, and it is the first table for all pipelines. Create PipelineClassifierTable + // on the bridge when building a pipeline. + if PipelineClassifierTable.ofTable == nil { + PipelineClassifierTable.ofTable = binding.NewOFTable(binding.NextTableID(), PipelineClassifierTable.name, binding.ClassifierStage, binding.AllPipelines) + } + for i := binding.ClassifierStage; i <= binding.LastStage; i++ { + tableMap := make(map[*FeatureTable]uint8, 0) + + for _, template := range templates { + if tables, found := template.stageTables[i]; found { + for _, tr := range tables { + t := tr.table + p := tr.priority + op, ok := tableMap[t] + if !ok { + t.features = sets.NewInt(int(template.feature)) + tableMap[t] = p + continue + } + if op < p { + tableMap[t] = p + t.features.Insert(int(template.feature)) + } + } + } } - ConntrackStateTable = c.createOFTable(ConntrackStateTable, EndpointDNATTable.GetID(), binding.TableMissActionNext) - SessionAffinityTable = c.createOFTable(SessionAffinityTable, binding.LastTableID, binding.TableMissActionNone) - ServiceLBTable = c.createOFTable(ServiceLBTable, EndpointDNATTable.GetID(), binding.TableMissActionNext) - EndpointDNATTable = c.createOFTable(EndpointDNATTable, c.egressEntryTable, binding.TableMissActionNext) - ConntrackCommitTable = c.createOFTable(ConntrackCommitTable, HairpinSNATTable.GetID(), binding.TableMissActionNext) - HairpinSNATTable = c.createOFTable(HairpinSNATTable, L2ForwardingOutTable.GetID(), binding.TableMissActionNext) - } else { - c.createOFTable(SpoofGuardTable, ConntrackTable.GetID(), binding.TableMissActionDrop) - c.createOFTable(IPv6Table, ConntrackTable.GetID(), binding.TableMissActionNext) - c.createOFTable(ConntrackStateTable, DNATTable.GetID(), binding.TableMissActionNext) - c.createOFTable(DNATTable, c.egressEntryTable, binding.TableMissActionNext) - c.createOFTable(ConntrackCommitTable, L2ForwardingOutTable.GetID(), binding.TableMissActionNext) - } - // The default SNAT is implemented with OVS on Windows. - if c.enableEgress || runtime.IsWindowsPlatform() { - c.createOFTable(SNATTable, L2ForwardingCalcTable.GetID(), binding.TableMissActionNext) - } - if runtime.IsWindowsPlatform() || c.connectUplinkToBridge { - c.createOFTable(UplinkTable, SpoofGuardTable.GetID(), binding.TableMissActionNone) + if len(tableMap) == 0 { + continue + } + + // Sort the tables according to the priority in the same stage. + type tablePriority struct { + *FeatureTable + priority uint8 + } + tempSlice := make([]tablePriority, 0) + for t, p := range tableMap { + tempSlice = append(tempSlice, tablePriority{t, p}) + } + sort.Slice(tempSlice, func(i, j int) bool { + return tempSlice[i].priority > tempSlice[j].priority + }) + + tableSlice := make([]binding.Table, 0) + for id := range tempSlice { + // Generate the sequencing IDs for tables. + tableID := binding.NextTableID() + ft := tempSlice[id].FeatureTable + ft.ofTable = binding.NewOFTable(tableID, ft.name, i, pipelineID) + addTableToCache(ft) + tableSlice = append(tableSlice, ft.ofTable) + } + sortedTableMap[i] = tableSlice } - if c.enableAntreaPolicy { - c.createOFTable(AntreaPolicyEgressRuleTable, EgressRuleTable.GetID(), binding.TableMissActionNext) - c.createOFTable(AntreaPolicyIngressRuleTable, IngressRuleTable.GetID(), binding.TableMissActionNext) + return binding.NewPipeline(pipelineID, sortedTableMap) +} + +func createPipelineOnBridge(bridge binding.Bridge, pipelines map[ofProtocol]binding.Pipeline) { + bridge.CreateTable(PipelineClassifierTable.ofTable, binding.LastTableID, binding.TableMissActionDrop) + for _, pipeline := range pipelines { + tables := pipeline.ListAllTables() + for i, t := range tables { + var nextID uint8 + var missAction binding.MissActionType + if pipeline.IsLastTable(t) { + nextID = binding.LastTableID + missAction = binding.TableMissActionDrop + } else { + nextID = tables[i+1].GetID() + missAction = binding.TableMissActionNext + } + tables[i].SetNext(nextID) + bridge.CreateTable(t, nextID, missAction) + } } } -// createOFTable sets the missAction and the next table ID of the given table according to the pipeline. Then it creates the table on the bridge. At last, it adds the table into the ofTableCache. -func (c *client) createOFTable(table binding.Table, nextID uint8, missAction binding.MissActionType) binding.Table { - c.bridge.CreateTable(table, nextID, missAction) - ofTableCache.Add(table) - return table +func pipelineClassifyFlow(protocol binding.Protocol, pipeline binding.Pipeline) binding.Flow { + targetTable := pipeline.GetFirstTable() + return PipelineClassifierTable.ofTable.BuildFlow(priorityNormal). + MatchProtocol(protocol). + Action().ResubmitToTables(targetTable.GetID()). + Done() } // NewClient is the constructor of the Client interface. @@ -2586,40 +576,20 @@ func NewClient(bridgeName string, proxyAll bool, connectUplinkToBridge bool) Client { bridge := binding.NewOFBridge(bridgeName, mgmtAddr) - policyCache := cache.NewIndexer( - policyConjKeyFunc, - cache.Indexers{priorityIndex: priorityIndexFunc}, - ) c := &client{ - bridge: bridge, - enableProxy: enableProxy, - proxyAll: proxyAll, - enableAntreaPolicy: enableAntreaPolicy, - enableDenyTracking: enableDenyTracking, - enableEgress: enableEgress, - connectUplinkToBridge: connectUplinkToBridge, - nodeFlowCache: newFlowCategoryCache(), - podFlowCache: newFlowCategoryCache(), - serviceFlowCache: newFlowCategoryCache(), - tfFlowCache: newFlowCategoryCache(), - policyCache: policyCache, - groupCache: sync.Map{}, - globalConjMatchFlowCache: map[string]*conjMatchFlowContext{}, - packetInHandlers: map[uint8]map[string]PacketInHandler{}, - ovsctlClient: ovsctl.NewClient(bridgeName), - ovsDatapathType: ovsDatapathType, - ovsMetersAreSupported: ovsMetersAreSupported(ovsDatapathType), + bridge: bridge, + enableProxy: enableProxy, + proxyAll: proxyAll, + enableAntreaPolicy: enableAntreaPolicy, + enableDenyTracking: enableDenyTracking, + enableEgress: enableEgress, + connectUplinkToBridge: connectUplinkToBridge, + pipelines: make(map[ofProtocol]binding.Pipeline), + ovsctlClient: ovsctl.NewClient(bridgeName), + ovsDatapathType: ovsDatapathType, + ovsMetersAreSupported: ovsMetersAreSupported(ovsDatapathType), } c.ofEntryOperations = c - if enableAntreaPolicy { - c.egressEntryTable, c.ingressEntryTable = AntreaPolicyEgressRuleTable.GetID(), AntreaPolicyIngressRuleTable.GetID() - } else { - c.egressEntryTable, c.ingressEntryTable = EgressRuleTable.GetID(), IngressRuleTable.GetID() - } - if enableEgress { - c.snatFlowCache = newFlowCategoryCache() - } - c.generatePipeline() return c } @@ -2636,3 +606,10 @@ func (sl conjunctiveActionsInOrder) Less(i, j int) bool { } return sl[i].nClause < sl[j].nClause } + +func addTableToCache(ft *FeatureTable) { + _, exists, _ := tableCache.GetByKey(fmt.Sprintf("%d", ft.GetID())) + if !exists { + tableCache.Add(ft) + } +} diff --git a/pkg/agent/openflow/pipeline_other.go b/pkg/agent/openflow/pipeline_other.go deleted file mode 100644 index 3794280041a..00000000000 --- a/pkg/agent/openflow/pipeline_other.go +++ /dev/null @@ -1,98 +0,0 @@ -//go:build !windows -// +build !windows - -// package openflow is needed by antctl which is compiled for macOS too. - -// Copyright 2021 Antrea Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package openflow - -import ( - "net" - - "antrea.io/antrea/pkg/agent/config" - "antrea.io/antrea/pkg/agent/openflow/cookie" - binding "antrea.io/antrea/pkg/ovs/openflow" -) - -func (c *client) snatMarkFlows(snatIP net.IP, mark uint32) []binding.Flow { - return []binding.Flow{c.snatIPFromTunnelFlow(snatIP, mark)} -} - -// hostBridgeUplinkFlows generates the flows that forward traffic between the -// bridge local port and the uplink port to support the host traffic. -// TODO(gran): sync latest changes from pipeline_windows.go -func (c *client) hostBridgeUplinkFlows(localSubnet net.IPNet, category cookie.Category) (flows []binding.Flow) { - flows = []binding.Flow{ - ClassifierTable.BuildFlow(priorityNormal). - MatchInPort(config.UplinkOFPort). - Action().Output(config.BridgeOFPort). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ClassifierTable.BuildFlow(priorityNormal). - MatchInPort(config.BridgeOFPort). - Action().Output(config.UplinkOFPort). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - } - // Do not forward packet to per-Node IPAM Pod directly to avoid traffic issue. - flows = append(flows, - // Handle incoming ARP request for AntreaFlexibleIPAM Pods. - ClassifierTable.BuildFlow(priorityHigh). - MatchInPort(config.UplinkOFPort). - MatchProtocol(binding.ProtocolARP). - Action().Normal(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ClassifierTable.BuildFlow(priorityHigh). - MatchInPort(config.BridgeOFPort). - MatchProtocol(binding.ProtocolARP). - Action().Normal(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // Handle packet to Node. - // Must use a separate flow to Output(config.BridgeOFPort), otherwise OVS will drop the packet: - // output:NXM_NX_REG1[] - // >> output port 4294967294 is out of range - // Datapath actions: drop - // TODO(gran): support Traceflow - L2ForwardingCalcTable.BuildFlow(priorityNormal). - MatchDstMAC(c.nodeConfig.UplinkNetConfig.MAC). - Action().LoadToRegField(TargetOFPortField, config.BridgeOFPort). - Action().LoadRegMark(OFPortFoundRegMark). - Action().GotoTable(ConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - L2ForwardingOutTable.BuildFlow(priorityHigh).MatchProtocol(binding.ProtocolIP). - MatchRegMark(ToBridgeRegMark). - MatchRegMark(OFPortFoundRegMark). - Action().Output(config.BridgeOFPort). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // Handle outgoing packet from AntreaFlexibleIPAM Pods. Broadcast is not supported. - L2ForwardingCalcTable.BuildFlow(priorityLow). - MatchRegMark(AntreaFlexibleIPAMRegMark). - Action().LoadToRegField(TargetOFPortField, config.UplinkOFPort). - Action().LoadRegMark(OFPortFoundRegMark). - Action().GotoTable(ConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - return flows -} - -func (c *client) l3FwdFlowToRemoteViaRouting(localGatewayMAC net.HardwareAddr, remoteGatewayMAC net.HardwareAddr, - category cookie.Category, peerIP net.IP, peerPodCIDR *net.IPNet) []binding.Flow { - return []binding.Flow{c.l3FwdFlowToRemoteViaGW(localGatewayMAC, *peerPodCIDR, category, false)} -} diff --git a/pkg/agent/openflow/pipeline_test.go b/pkg/agent/openflow/pipeline_test.go new file mode 100644 index 00000000000..2af2306ef23 --- /dev/null +++ b/pkg/agent/openflow/pipeline_test.go @@ -0,0 +1,237 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + binding "antrea.io/antrea/pkg/ovs/openflow" +) + +type ipStack int + +const ( + ipv4Only ipStack = iota + ipv6Only + dualStack +) + +func TestBuildPipeline(t *testing.T) { + ipStackMap := map[ipStack][]binding.Protocol{ + ipv4Only: {binding.ProtocolIP}, + ipv6Only: {binding.ProtocolIPv6}, + dualStack: {binding.ProtocolIP, binding.ProtocolIPv6}, + } + for _, tc := range []struct { + ipStack ipStack + features []feature + expectedTables map[ofProtocol][]*FeatureTable + }{ + { + ipStack: dualStack, + features: []feature{ + &featurePodConnectivity{ipProtocols: ipStackMap[dualStack]}, + &featureNetworkPolicy{enableAntreaPolicy: true}, + &featureService{enableProxy: true, proxyAll: true}, + &featureEgress{}, + }, + expectedTables: map[ofProtocol][]*FeatureTable{ + ofProtocolIP: { + ClassifierTable, + SpoofGuardTable, + IPv6Table, + SNATConntrackTable, + ConntrackTable, + ConntrackStateTable, + NodePortProbeTable, + SessionAffinityTable, + ServiceLBTable, + EndpointDNATTable, + AntreaPolicyEgressRuleTable, + EgressRuleTable, + EgressDefaultTable, + EgressMetricTable, + L3ForwardingTable, + ServiceHairpinMarkTable, + L3DecTTLTable, + SNATTable, + SNATConntrackCommitTable, + L2ForwardingCalcTable, + AntreaPolicyIngressRuleTable, + IngressRuleTable, + IngressDefaultTable, + IngressMetricTable, + ConntrackCommitTable, + L2ForwardingOutTable, + }, + ofProtocolARP: { + ARPSpoofGuardTable, + ARPResponderTable, + }, + }, + }, + { + ipStack: ipv6Only, + features: []feature{ + &featurePodConnectivity{ipProtocols: ipStackMap[ipv6Only]}, + &featureNetworkPolicy{enableAntreaPolicy: true}, + &featureService{enableProxy: true, proxyAll: true}, + &featureEgress{}, + }, + expectedTables: map[ofProtocol][]*FeatureTable{ + ofProtocolIP: { + ClassifierTable, + SpoofGuardTable, + IPv6Table, + SNATConntrackTable, + ConntrackTable, + ConntrackStateTable, + NodePortProbeTable, + SessionAffinityTable, + ServiceLBTable, + EndpointDNATTable, + AntreaPolicyEgressRuleTable, + EgressRuleTable, + EgressDefaultTable, + EgressMetricTable, + L3ForwardingTable, + ServiceHairpinMarkTable, + L3DecTTLTable, + SNATTable, + SNATConntrackCommitTable, + L2ForwardingCalcTable, + AntreaPolicyIngressRuleTable, + IngressRuleTable, + IngressDefaultTable, + IngressMetricTable, + ConntrackCommitTable, + L2ForwardingOutTable, + }, + }, + }, + { + ipStack: ipv4Only, + features: []feature{ + &featurePodConnectivity{ipProtocols: ipStackMap[ipv4Only]}, + &featureNetworkPolicy{enableAntreaPolicy: true}, + &featureService{enableProxy: false}, + &featureEgress{}, + }, + expectedTables: map[ofProtocol][]*FeatureTable{ + ofProtocolIP: { + ClassifierTable, + SpoofGuardTable, + ConntrackTable, + ConntrackStateTable, + DNATTable, + AntreaPolicyEgressRuleTable, + EgressRuleTable, + EgressDefaultTable, + EgressMetricTable, + L3ForwardingTable, + L3DecTTLTable, + SNATTable, + L2ForwardingCalcTable, + AntreaPolicyIngressRuleTable, + IngressRuleTable, + IngressDefaultTable, + IngressMetricTable, + ConntrackCommitTable, + L2ForwardingOutTable, + }, + ofProtocolARP: { + ARPSpoofGuardTable, + ARPResponderTable, + }, + }, + }, + { + ipStack: ipv4Only, + features: []feature{ + &featurePodConnectivity{ipProtocols: ipStackMap[ipv4Only]}, + &featureNetworkPolicy{enableAntreaPolicy: true}, + &featureService{enableProxy: true, proxyAll: false}, + &featureEgress{enableProxy: true}, + }, + expectedTables: map[ofProtocol][]*FeatureTable{ + ofProtocolIP: { + ClassifierTable, + SpoofGuardTable, + SNATConntrackTable, + ConntrackTable, + ConntrackStateTable, + SessionAffinityTable, + ServiceLBTable, + EndpointDNATTable, + AntreaPolicyEgressRuleTable, + EgressRuleTable, + EgressDefaultTable, + EgressMetricTable, + L3ForwardingTable, + ServiceHairpinMarkTable, + L3DecTTLTable, + SNATTable, + SNATConntrackCommitTable, + L2ForwardingCalcTable, + AntreaPolicyIngressRuleTable, + IngressRuleTable, + IngressDefaultTable, + IngressMetricTable, + ConntrackCommitTable, + L2ForwardingOutTable, + }, + ofProtocolARP: { + ARPSpoofGuardTable, + ARPResponderTable, + }, + }, + }, + } { + templatesMap := make(map[ofProtocol][]*pipelineTemplate) + for _, f := range tc.features { + templatesMap[ofProtocolIP] = append(templatesMap[ofProtocolIP], f.getTemplate(ofProtocolIP)) + if tc.ipStack != ipv6Only { + template := f.getTemplate(ofProtocolARP) + if template != nil { + templatesMap[ofProtocolARP] = append(templatesMap[ofProtocolARP], template) + } + } + } + + for proto, templates := range templatesMap { + generatePipeline(templates) + tables := tc.expectedTables[proto] + + for i := 0; i < len(tables)-1; i++ { + require.NotNil(t, tables[i].ofTable, fmt.Sprintf("table %s should be initialized", tables[i].name)) + require.Less(t, tables[i].GetID(), tables[i+1].GetID(), fmt.Sprintf("id of table %s should less than that of table %s", tables[i].GetName(), tables[i+1].GetName())) + } + require.NotNil(t, tables[len(tables)-1].ofTable, fmt.Sprintf("table %s should be initialized", tables[len(tables)-1].name)) + + reset(tables) + } + } +} + +func reset(tables []*FeatureTable) { + PipelineClassifierTable.ofTable = nil + for _, table := range tables { + table.ofTable = nil + } + binding.ResetTableID() +} diff --git a/pkg/agent/openflow/pipeline_windows.go b/pkg/agent/openflow/pipeline_windows.go deleted file mode 100644 index ccec1250340..00000000000 --- a/pkg/agent/openflow/pipeline_windows.go +++ /dev/null @@ -1,140 +0,0 @@ -//go:build windows -// +build windows - -// Copyright 2021 Antrea Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package openflow - -import ( - "net" - - "antrea.io/antrea/pkg/agent/config" - "antrea.io/antrea/pkg/agent/openflow/cookie" - "antrea.io/antrea/pkg/agent/types" - binding "antrea.io/antrea/pkg/ovs/openflow" -) - -const ( - // ctZoneSNAT is only used on Windows and only when AntreaProxy is enabled. - // When a Pod access a ClusterIP Service, and the IP of the selected endpoint - // is not in "cluster-cidr". The request packets need to be SNAT'd(set src IP to local Node IP) - // after have been DNAT'd(set dst IP to endpoint IP). - // For example, the endpoint Pod may run in hostNetwork mode and the IP of the endpoint - // will be the current Node IP. - // We need to use a different ct_zone to track the SNAT'd connection because OVS - // does not support doing both DNAT and SNAT in the same ct_zone. - // - // An example of the connection is a Pod accesses kubernetes API service: - // Pod --> DNAT(CtZone) --> SNAT(ctZoneSNAT) --> Endpoint(API server NodeIP) - // Pod <-- unDNAT(CtZone) <-- unSNAT(ctZoneSNAT) <-- Endpoint(API server NodeIP) - ctZoneSNAT = 0xffdc -) - -var ( - // snatCTMark indicates SNAT is performed for packets of the connection. - snatCTMark = binding.NewCTMark(0x40, 0, 31) -) - -func (c *client) snatMarkFlows(snatIP net.IP, mark uint32) []binding.Flow { - snatIPRange := &binding.IPRange{StartIP: snatIP, EndIP: snatIP} - nextTable := ConntrackCommitTable.GetNext() - flows := []binding.Flow{ - c.snatIPFromTunnelFlow(snatIP, mark), - ConntrackCommitTable.BuildFlow(priorityNormal). - MatchProtocol(binding.ProtocolIP). - MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(false). - MatchPktMark(mark, &types.SNATIPMarkMask). - Action().CT(true, nextTable, CtZone). - SNAT(snatIPRange, nil). - LoadToCtMark(snatCTMark).CTDone(). - Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()). - Done(), - } - - if c.enableProxy { - flows = append(flows, ConntrackCommitTable.BuildFlow(priorityNormal). - MatchProtocol(binding.ProtocolIP). - MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(true). - MatchPktMark(mark, &types.SNATIPMarkMask). - Action().CT(true, nextTable, ctZoneSNAT). - SNAT(snatIPRange, nil). - LoadToCtMark(snatCTMark).CTDone(). - Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()). - Done()) - } - return flows -} - -// hostBridgeUplinkFlows generates the flows that forward traffic between the -// bridge local port and the uplink port to support the host traffic with -// outside. -func (c *client) hostBridgeUplinkFlows(localSubnet net.IPNet, category cookie.Category) (flows []binding.Flow) { - flows = []binding.Flow{ - ClassifierTable.BuildFlow(priorityNormal). - MatchInPort(config.UplinkOFPort). - Action().Output(config.BridgeOFPort). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - ClassifierTable.BuildFlow(priorityNormal). - MatchInPort(config.BridgeOFPort). - Action().Output(config.UplinkOFPort). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - } - if c.networkConfig.TrafficEncapMode.SupportsNoEncap() { - // If NoEncap is enabled, the reply packets from remote Pod can be forwarded to local Pod directly. - // by explicitly resubmitting them to ServiceHairpinTable and marking "macRewriteMark" at same time. - flows = append(flows, ClassifierTable.BuildFlow(priorityHigh).MatchProtocol(binding.ProtocolIP). - MatchInPort(config.UplinkOFPort). - MatchDstIPNet(localSubnet). - Action().LoadRegMark(FromUplinkRegMark). - Action().LoadRegMark(RewriteMACRegMark). - Action().GotoTable(ServiceHairpinTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - return flows -} - -func (c *client) l3FwdFlowToRemoteViaRouting(localGatewayMAC net.HardwareAddr, remoteGatewayMAC net.HardwareAddr, - category cookie.Category, peerIP net.IP, peerPodCIDR *net.IPNet) []binding.Flow { - if c.networkConfig.NeedsDirectRoutingToPeer(peerIP, c.nodeConfig.NodeTransportIPv4Addr) && remoteGatewayMAC != nil { - ipProto := getIPProtocol(peerIP) - // It enhances Windows Noencap mode performance by bypassing host network. - flows := []binding.Flow{L2ForwardingCalcTable.BuildFlow(priorityNormal). - MatchDstMAC(remoteGatewayMAC). - Action().LoadToRegField(TargetOFPortField, config.UplinkOFPort). - Action().LoadRegMark(OFPortFoundRegMark). - Action().GotoTable(ConntrackCommitTable.GetID()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // Output the reply packet to the uplink interface if the destination is another Node's IP. - // This is for the scenario that another Node directly accesses Pods on this Node. Since the request - // packet enters OVS from the uplink interface, the reply should go back in the same path. Otherwise, - // Windows host will perform stateless SNAT on the reply, and the packets are possibly dropped on peer - // Node because of the wrong source address. - L3ForwardingTable.BuildFlow(priorityNormal).MatchProtocol(ipProto). - MatchDstIP(peerIP). - MatchCTStateRpl(true).MatchCTStateTrk(true). - Action().SetDstMAC(remoteGatewayMAC). - Action().GotoTable(L3ForwardingTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - } - flows = append(flows, c.l3FwdFlowToRemoteViaGW(remoteGatewayMAC, *peerPodCIDR, category, false)) - return flows - } - return []binding.Flow{c.l3FwdFlowToRemoteViaGW(localGatewayMAC, *peerPodCIDR, category, false)} -} diff --git a/pkg/agent/openflow/pod_connectivity.go b/pkg/agent/openflow/pod_connectivity.go new file mode 100644 index 00000000000..21a4d0ffe52 --- /dev/null +++ b/pkg/agent/openflow/pod_connectivity.go @@ -0,0 +1,76 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "net" + + "antrea.io/antrea/pkg/agent/config" + "antrea.io/antrea/pkg/agent/openflow/cookie" + binding "antrea.io/antrea/pkg/ovs/openflow" +) + +type featurePodConnectivity struct { + cookieAllocator cookie.Allocator + ipProtocols []binding.Protocol + + nodeFlowCache *flowCategoryCache + podFlowCache *flowCategoryCache + gatewayFlows []binding.Flow + defaultTunnelFlows []binding.Flow + hostNetworkingFlows []binding.Flow + + gatewayIPs map[binding.Protocol]net.IP + ctZones map[binding.Protocol]int + nodeConfig *config.NodeConfig + networkConfig *config.NetworkConfig + + connectUplinkToBridge bool +} + +func (c *featurePodConnectivity) getFeatureID() featureID { + return PodConnectivity +} + +func newFeaturePodConnectivity( + cookieAllocator cookie.Allocator, + ipProtocols []binding.Protocol, + nodeConfig *config.NodeConfig, + networkConfig *config.NetworkConfig, + connectUplinkToBridge bool) feature { + ctZones := make(map[binding.Protocol]int) + gatewayIPs := make(map[binding.Protocol]net.IP) + for _, ipProtocol := range ipProtocols { + if ipProtocol == binding.ProtocolIP { + ctZones[ipProtocol] = CtZone + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv4 + } else if ipProtocol == binding.ProtocolIPv6 { + ctZones[ipProtocol] = CtZoneV6 + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv6 + } + } + + return &featurePodConnectivity{ + cookieAllocator: cookieAllocator, + ipProtocols: ipProtocols, + nodeFlowCache: newFlowCategoryCache(), + podFlowCache: newFlowCategoryCache(), + gatewayIPs: gatewayIPs, + ctZones: ctZones, + nodeConfig: nodeConfig, + networkConfig: networkConfig, + connectUplinkToBridge: connectUplinkToBridge, + } +} diff --git a/pkg/agent/openflow/service.go b/pkg/agent/openflow/service.go new file mode 100644 index 00000000000..322bb545e88 --- /dev/null +++ b/pkg/agent/openflow/service.go @@ -0,0 +1,88 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "net" + "sync" + + "antrea.io/antrea/pkg/agent/config" + "antrea.io/antrea/pkg/agent/openflow/cookie" + binding "antrea.io/antrea/pkg/ovs/openflow" +) + +type featureService struct { + cookieAllocator cookie.Allocator + ipProtocols []binding.Protocol + bridge binding.Bridge + + serviceFlowCache *flowCategoryCache + defaultServiceFlows []binding.Flow + groupCache sync.Map + + gatewayIPs map[binding.Protocol]net.IP + virtualIPs map[binding.Protocol]net.IP + dnatCtZones map[binding.Protocol]int + snatCtZones map[binding.Protocol]int + gatewayMAC net.HardwareAddr + + enableProxy bool + proxyAll bool +} + +func (c *featureService) getFeatureID() featureID { + return Service +} + +func newFeatureService( + cookieAllocator cookie.Allocator, + ipProtocols []binding.Protocol, + nodeConfig *config.NodeConfig, + bridge binding.Bridge, + enableProxy, + proxyAll bool) feature { + gatewayIPs := make(map[binding.Protocol]net.IP) + virtualIPs := make(map[binding.Protocol]net.IP) + dnatCtZones := make(map[binding.Protocol]int) + snatCtZones := make(map[binding.Protocol]int) + for _, ipProtocol := range ipProtocols { + if ipProtocol == binding.ProtocolIP { + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv4 + virtualIPs[ipProtocol] = config.VirtualServiceIPv4 + dnatCtZones[ipProtocol] = CtZone + snatCtZones[ipProtocol] = SNATCtZone + } else if ipProtocol == binding.ProtocolIPv6 { + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv6 + virtualIPs[ipProtocol] = config.VirtualServiceIPv6 + dnatCtZones[ipProtocol] = CtZoneV6 + snatCtZones[ipProtocol] = SNATCtZoneV6 + } + } + + return &featureService{ + cookieAllocator: cookieAllocator, + ipProtocols: ipProtocols, + bridge: bridge, + serviceFlowCache: newFlowCategoryCache(), + groupCache: sync.Map{}, + gatewayIPs: gatewayIPs, + virtualIPs: virtualIPs, + dnatCtZones: dnatCtZones, + snatCtZones: snatCtZones, + gatewayMAC: nodeConfig.GatewayConfig.MAC, + enableProxy: enableProxy, + proxyAll: proxyAll, + } +} diff --git a/pkg/agent/openflow/traceflow.go b/pkg/agent/openflow/traceflow.go new file mode 100644 index 00000000000..4cbfe82a19a --- /dev/null +++ b/pkg/agent/openflow/traceflow.go @@ -0,0 +1,68 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "net" + + "antrea.io/antrea/pkg/agent/config" + "antrea.io/antrea/pkg/agent/openflow/cookie" + binding "antrea.io/antrea/pkg/ovs/openflow" + "antrea.io/antrea/pkg/ovs/ovsconfig" +) + +type featureTraceflow struct { + cookieAllocator cookie.Allocator + ipProtocols []binding.Protocol + + tfFlowCache *flowCategoryCache + + gatewayIPs map[binding.Protocol]net.IP + ovsMetersAreSupported bool + enableProxy bool + enableAntreaPolicy bool + supportEncap bool +} + +func (c *featureTraceflow) getFeatureID() featureID { + return Traceflow +} + +func newFeatureTraceflow(cookieAllocator cookie.Allocator, + ipProtocols []binding.Protocol, + ovsDatapathType ovsconfig.OVSDatapathType, + nodeConfig *config.NodeConfig, + enableProxy, + enableAntreaPolicy, + supportEncap bool) feature { + gatewayIPs := make(map[binding.Protocol]net.IP) + for _, ipProtocol := range ipProtocols { + if ipProtocol == binding.ProtocolIP && nodeConfig.GatewayConfig.IPv4 != nil { + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv4 + } else if ipProtocol == binding.ProtocolIPv6 && nodeConfig.GatewayConfig.IPv6 != nil { + gatewayIPs[ipProtocol] = nodeConfig.GatewayConfig.IPv6 + } + } + + return &featureTraceflow{ + cookieAllocator: cookieAllocator, + ipProtocols: ipProtocols, + tfFlowCache: newFlowCategoryCache(), + ovsMetersAreSupported: ovsMetersAreSupported(ovsDatapathType), + enableProxy: enableProxy, + enableAntreaPolicy: enableAntreaPolicy, + supportEncap: supportEncap, + } +} diff --git a/pkg/agent/openflow/vm_connectivity.go b/pkg/agent/openflow/vm_connectivity.go new file mode 100644 index 00000000000..28059f58003 --- /dev/null +++ b/pkg/agent/openflow/vm_connectivity.go @@ -0,0 +1,22 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +type featureVMConnectivity struct { +} + +func (c *featureVMConnectivity) getFeatureID() featureID { + return VMConnectivity +} diff --git a/pkg/ovs/openflow/interfaces.go b/pkg/ovs/openflow/interfaces.go index c40e6d98b6f..e06389fd120 100644 --- a/pkg/ovs/openflow/interfaces.go +++ b/pkg/ovs/openflow/interfaces.go @@ -146,6 +146,35 @@ type Table interface { GetNext() uint8 SetNext(next uint8) SetMissAction(action MissActionType) + GetStageID() StageID +} + +type StageID uint8 + +const ( + ClassifierStage StageID = iota + ValidationStage + ConntrackStateStage + PreRoutingStage + EgressSecurityStage + RoutingStage + PostRoutingStage + SwitchingStage + IngressSecurityStage + ConntrackStage + OutputStage + + LastStage StageID = OutputStage +) + +type Pipeline interface { + GetNextStage(id StageID) StageID + GetFirstTableInStage(id StageID) Table + ListTablesInStage(id StageID) []Table + GetFirstTable() Table + GetLastTable() Table + ListAllTables() []Table + IsLastTable(t Table) bool } type EntryType string @@ -194,7 +223,7 @@ type Action interface { Move(from, to string) FlowBuilder MoveRange(fromName, toName string, from, to Range) FlowBuilder Resubmit(port uint16, table uint8) FlowBuilder - ResubmitToTable(table uint8) FlowBuilder + ResubmitToTables(tables ...uint8) FlowBuilder CT(commit bool, tableID uint8, zone int) CTAction Drop() FlowBuilder Output(port uint32) FlowBuilder @@ -216,6 +245,8 @@ type Action interface { Group(id GroupIDType) FlowBuilder Learn(id uint8, priority uint16, idleTimeout, hardTimeout uint16, cookieID uint64) LearnAction GotoTable(table uint8) FlowBuilder + NextTable() FlowBuilder + GotoStage(stage StageID) FlowBuilder SendToController(reason uint8) FlowBuilder Note(notes string) FlowBuilder Meter(meterID uint32) FlowBuilder diff --git a/pkg/ovs/openflow/ofctrl_action.go b/pkg/ovs/openflow/ofctrl_action.go index 93e876bc25b..177a43cb7c9 100644 --- a/pkg/ovs/openflow/ofctrl_action.go +++ b/pkg/ovs/openflow/ofctrl_action.go @@ -283,8 +283,12 @@ func (a *ofFlowAction) Resubmit(ofPort uint16, tableID uint8) FlowBuilder { return a.builder } -func (a *ofFlowAction) ResubmitToTable(table uint8) FlowBuilder { - return a.Resubmit(openflow13.OFPP_IN_PORT, table) +func (a *ofFlowAction) ResubmitToTables(tables ...uint8) FlowBuilder { + var fb FlowBuilder + for _, t := range tables { + fb = a.Resubmit(openflow13.OFPP_IN_PORT, t) + } + return fb } // DecTTL is an action to decrease TTL. It is used in routing functions implemented by Openflow. @@ -552,3 +556,18 @@ func (a *ofFlowAction) GotoTable(tableID uint8) FlowBuilder { a.builder.ofFlow.Goto(tableID) return a.builder } + +func (a *ofFlowAction) NextTable() FlowBuilder { + tableID := a.builder.ofFlow.table.next + a.builder.ofFlow.Goto(tableID) + return a.builder +} + +func (a *ofFlowAction) GotoStage(stage StageID) FlowBuilder { + pipeline := pipelineCache[a.builder.ofFlow.table.pipelineID] + table := pipeline.GetFirstTableInStage(stage) + if table != nil { + a.builder.ofFlow.Goto(table.GetID()) + } + return a.builder +} diff --git a/pkg/ovs/openflow/ofctrl_bridge.go b/pkg/ovs/openflow/ofctrl_bridge.go index 467363a1e1d..2e79f6b7280 100644 --- a/pkg/ovs/openflow/ofctrl_bridge.go +++ b/pkg/ovs/openflow/ofctrl_bridge.go @@ -29,6 +29,8 @@ type ofTable struct { missAction MissActionType flowCount uint updateTime time.Time + stage StageID + pipelineID uint8 *ofctrl.Table } @@ -69,6 +71,14 @@ func (t *ofTable) SetMissAction(action MissActionType) { t.missAction = action } +func (t *ofTable) GetStageID() StageID { + return t.stage +} + +func (t *ofTable) GetPipelineID() uint8 { + return t.pipelineID +} + func (t *ofTable) UpdateStatus(flowCountDelta int) { t.Lock() defer t.Unlock() @@ -127,10 +137,12 @@ func (t *ofTable) DumpFlows(cookieID, cookieMask uint64) (map[uint64]*FlowStates return flowStats, nil } -func NewOFTable(id uint8, name string) Table { +func NewOFTable(id uint8, name string, stage StageID, pipelineID uint8) Table { return &ofTable{ - id: id, - name: name, + id: id, + name: name, + stage: stage, + pipelineID: pipelineID, } } @@ -724,3 +736,17 @@ func NewOFBridge(br string, mgmtAddr string) Bridge { s.controller = ofctrl.NewController(s) return s } + +var tableID uint8 + +func NextTableID() (id uint8) { + id = tableID + tableID += 1 + return +} + +// ResetTableID is used to reset the initial tableID so that the table ID increases from 0. +// This function is only for test. +func ResetTableID() { + tableID = 0 +} diff --git a/pkg/ovs/openflow/ofctrl_pipeline.go b/pkg/ovs/openflow/ofctrl_pipeline.go new file mode 100644 index 00000000000..8d7197d2f5c --- /dev/null +++ b/pkg/ovs/openflow/ofctrl_pipeline.go @@ -0,0 +1,113 @@ +// Copyright 2019 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import "sort" + +const ( + AllPipelines uint8 = 255 +) + +var ( + pipelineCache = make(map[uint8]*ofPipeline) + pipelineID uint8 +) + +type ofPipeline struct { + pipelineID uint8 + sortedTableMap map[StageID][]Table + firstTable Table + lastTable Table + firstStage StageID + lastStage StageID +} + +func (p *ofPipeline) GetNextStage(id StageID) StageID { + for { + stage := id + 1 + if stage >= LastStage { + return LastStage + } + if _, ok := p.sortedTableMap[stage]; ok { + return stage + } + } +} + +func (p *ofPipeline) GetFirstTableInStage(id StageID) Table { + tables, ok := p.sortedTableMap[id] + if ok { + return tables[0] + } + return nil +} + +func (p *ofPipeline) ListTablesInStage(id StageID) []Table { + return p.sortedTableMap[id] +} + +func (p *ofPipeline) IsStageValid(stage StageID) bool { + _, ok := p.sortedTableMap[stage] + return ok +} + +func (p *ofPipeline) GetFirstTable() Table { + return p.firstTable +} + +func (p *ofPipeline) GetLastTable() Table { + return p.lastTable +} + +func (p *ofPipeline) IsLastTable(t Table) bool { + return t.GetID() == p.lastTable.GetID() +} + +func (p *ofPipeline) ListAllTables() []Table { + tables := make([]Table, 0) + for _, t := range p.sortedTableMap { + tables = append(tables, t...) + } + sort.Slice(tables, func(i, j int) bool { + return tables[i].GetID() < tables[j].GetID() + }) + return tables +} + +func NewPipeline(id uint8, stageTableMap map[StageID][]Table) Pipeline { + p := &ofPipeline{pipelineID: id, sortedTableMap: stageTableMap} + for s := ClassifierStage; s <= LastStage; s++ { + if tables, ok := stageTableMap[s]; ok { + p.firstStage = s + p.firstTable = tables[0] + break + } + } + for s := LastStage; true; s-- { + if tables, ok := stageTableMap[s]; ok { + p.lastStage = s + tableCount := len(tables) + p.lastTable = tables[tableCount-1] + break + } + } + pipelineCache[id] = p + return p +} + +func NewPipelineID() uint8 { + pipelineID += 1 + return pipelineID +} diff --git a/pkg/ovs/openflow/testing/mock_openflow.go b/pkg/ovs/openflow/testing/mock_openflow.go index e654db4ca99..cdca0dfd374 100644 --- a/pkg/ovs/openflow/testing/mock_openflow.go +++ b/pkg/ovs/openflow/testing/mock_openflow.go @@ -410,6 +410,20 @@ func (mr *MockTableMockRecorder) GetNext() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetNext", reflect.TypeOf((*MockTable)(nil).GetNext)) } +// GetStageID mocks base method +func (m *MockTable) GetStageID() openflow.StageID { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetStageID") + ret0, _ := ret[0].(openflow.StageID) + return ret0 +} + +// GetStageID indicates an expected call of GetStageID +func (mr *MockTableMockRecorder) GetStageID() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetStageID", reflect.TypeOf((*MockTable)(nil).GetStageID)) +} + // SetMissAction mocks base method func (m *MockTable) SetMissAction(arg0 openflow.MissActionType) { m.ctrl.T.Helper() @@ -717,6 +731,20 @@ func (mr *MockActionMockRecorder) Drop() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Drop", reflect.TypeOf((*MockAction)(nil).Drop)) } +// GotoStage mocks base method +func (m *MockAction) GotoStage(arg0 openflow.StageID) openflow.FlowBuilder { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GotoStage", arg0) + ret0, _ := ret[0].(openflow.FlowBuilder) + return ret0 +} + +// GotoStage indicates an expected call of GotoStage +func (mr *MockActionMockRecorder) GotoStage(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GotoStage", reflect.TypeOf((*MockAction)(nil).GotoStage), arg0) +} + // GotoTable mocks base method func (m *MockAction) GotoTable(arg0 byte) openflow.FlowBuilder { m.ctrl.T.Helper() @@ -885,6 +913,20 @@ func (mr *MockActionMockRecorder) MoveRange(arg0, arg1, arg2, arg3 interface{}) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MoveRange", reflect.TypeOf((*MockAction)(nil).MoveRange), arg0, arg1, arg2, arg3) } +// NextTable mocks base method +func (m *MockAction) NextTable() openflow.FlowBuilder { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "NextTable") + ret0, _ := ret[0].(openflow.FlowBuilder) + return ret0 +} + +// NextTable indicates an expected call of NextTable +func (mr *MockActionMockRecorder) NextTable() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NextTable", reflect.TypeOf((*MockAction)(nil).NextTable)) +} + // Normal mocks base method func (m *MockAction) Normal() openflow.FlowBuilder { m.ctrl.T.Helper() @@ -983,18 +1025,22 @@ func (mr *MockActionMockRecorder) Resubmit(arg0, arg1 interface{}) *gomock.Call return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Resubmit", reflect.TypeOf((*MockAction)(nil).Resubmit), arg0, arg1) } -// ResubmitToTable mocks base method -func (m *MockAction) ResubmitToTable(arg0 byte) openflow.FlowBuilder { +// ResubmitToTables mocks base method +func (m *MockAction) ResubmitToTables(arg0 ...byte) openflow.FlowBuilder { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ResubmitToTable", arg0) + varargs := []interface{}{} + for _, a := range arg0 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "ResubmitToTables", varargs...) ret0, _ := ret[0].(openflow.FlowBuilder) return ret0 } -// ResubmitToTable indicates an expected call of ResubmitToTable -func (mr *MockActionMockRecorder) ResubmitToTable(arg0 interface{}) *gomock.Call { +// ResubmitToTables indicates an expected call of ResubmitToTables +func (mr *MockActionMockRecorder) ResubmitToTables(arg0 ...interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResubmitToTable", reflect.TypeOf((*MockAction)(nil).ResubmitToTable), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResubmitToTables", reflect.TypeOf((*MockAction)(nil).ResubmitToTables), arg0...) } // SendToController mocks base method