Skip to content

Commit

Permalink
Configure conntrack cache table size
Browse files Browse the repository at this point in the history
  • Loading branch information
jayanthvn committed Jun 20, 2024
1 parent dc9a6a8 commit bf64bc5
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 12 deletions.
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,23 @@ Network Policy agent can operate in either IPv4 or IPv6 mode. Setting this flag

**Note:** VPC CNI by default creates an egress only IPv4 interface for IPv6 pods and this network interface will not be secured by the Network policy feature. Network policies will only be enforced on the Pod's primary interface (i.e.,) `eth0`. If you want to block the egress IPv4 access, please disable the interface creation via [ENABLE_V4_EGRESS](https://github.com/aws/amazon-vpc-cni-k8s#enable_v4_egress-v1151) flag in VPC CNI.

#### `conntrack-cache-cleanup-period` (from v1.0.7+)

Type: Integer

Default: 300

Network Policy agent maintains a local conntrack cache. This configuration (in seconds) will determine how fast the local conntrack cache should be cleaned up from stale/expired entries. Based on the time interval set, network policy agent checks every entry in the local conntrack cache with kernel conntrack table and determine if the entry has to be deleted.

#### `conntrack-table-cache-size` (from v1.1.3+)

Type: Integer

Default: 1024 * 256

Network Policy agent maintains a local conntrack cache. Ideally this should be of the same size as kernel conntrack table. Note, this should be configured on new nodes before enabling network policy or if network policy is already enabled the change in configuration would need a reload of the nodes. Dynamic update of conntrack map size would lead to traffic disruption hence we won't support it. The value supported is between 32K and 1024K.


## Network Policy Agent CLI
The Amazon VPC CNI plugin for Kubernetes installs eBPF SDK collection of tools on the nodes. You can use the eBPF SDK tools to identify issues with network policies. For example, the following command lists the programs that are running on the node.

Expand Down
4 changes: 2 additions & 2 deletions controllers/policyendpoints_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func prometheusRegister() {

// NewPolicyEndpointsReconciler constructs new PolicyEndpointReconciler
func NewPolicyEndpointsReconciler(k8sClient client.Client, log logr.Logger,
enablePolicyEventLogs, enableCloudWatchLogs bool, enableIPv6 bool, enableNetworkPolicy bool, conntrackTTL int) (*PolicyEndpointsReconciler, error) {
enablePolicyEventLogs, enableCloudWatchLogs bool, enableIPv6 bool, enableNetworkPolicy bool, conntrackTTL int, conntrackTableSize int) (*PolicyEndpointsReconciler, error) {
r := &PolicyEndpointsReconciler{
k8sClient: k8sClient,
log: log,
Expand All @@ -89,7 +89,7 @@ func NewPolicyEndpointsReconciler(k8sClient client.Client, log logr.Logger,
var err error
if enableNetworkPolicy {
r.ebpfClient, err = ebpf.NewBpfClient(&r.policyEndpointeBPFContext, r.nodeIP,
enablePolicyEventLogs, enableCloudWatchLogs, enableIPv6, conntrackTTL)
enablePolicyEventLogs, enableCloudWatchLogs, enableIPv6, conntrackTTL, conntrackTableSize)

// Start prometheus
prometheusRegister()
Expand Down
6 changes: 3 additions & 3 deletions controllers/policyendpoints_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ func TestDeriveIngressAndEgressFirewallRules(t *testing.T) {

mockClient := mock_client.NewMockClient(ctrl)
policyEndpointReconciler, _ := NewPolicyEndpointsReconciler(mockClient, logr.New(&log.NullLogSink{}),
false, false, false, false, 300)
false, false, false, false, 300, 524288)
var policyEndpointsList []string
policyEndpointsList = append(policyEndpointsList, tt.policyEndpointName)
policyEndpointReconciler.podIdentifierToPolicyEndpointMap.Store(tt.podIdentifier, policyEndpointsList)
Expand Down Expand Up @@ -748,7 +748,7 @@ func TestArePoliciesAvailableInLocalCache(t *testing.T) {

mockClient := mock_client.NewMockClient(ctrl)
policyEndpointReconciler, _ := NewPolicyEndpointsReconciler(mockClient, logr.New(&log.NullLogSink{}),
false, false, false, false, 300)
false, false, false, false, 300, 524288)
var policyEndpointsList []string
policyEndpointsList = append(policyEndpointsList, tt.policyEndpointName...)
policyEndpointReconciler.podIdentifierToPolicyEndpointMap.Store(tt.podIdentifier, policyEndpointsList)
Expand Down Expand Up @@ -994,7 +994,7 @@ func TestDeriveFireWallRulesPerPodIdentifier(t *testing.T) {

mockClient := mock_client.NewMockClient(ctrl)
policyEndpointReconciler, _ := NewPolicyEndpointsReconciler(mockClient, logr.New(&log.NullLogSink{}),
false, false, false, false, 300)
false, false, false, false, 300, 524288)
var policyEndpointsList []string
policyEndpointsList = append(policyEndpointsList, tt.policyEndpointName)
policyEndpointReconciler.podIdentifierToPolicyEndpointMap.Store(tt.podIdentifier, policyEndpointsList)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.21

require (
github.com/aws/amazon-vpc-cni-k8s v1.18.1
github.com/aws/aws-ebpf-sdk-go v1.0.8
github.com/aws/aws-ebpf-sdk-go v1.0.9
github.com/aws/aws-sdk-go v1.50.30
github.com/go-logr/logr v1.4.1
github.com/go-logr/zapr v1.3.0
Expand Down
8 changes: 6 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
github.com/aws/amazon-vpc-cni-k8s v1.18.1 h1:u/OeBgnUUX6f3PCEOpA4dbG0+iZ71CnY6tEljjrl3iw=
github.com/aws/amazon-vpc-cni-k8s v1.18.1/go.mod h1:m/J5GsxF0Th2iQTOE3ww4W9LFvwdC0tGyA9dIL4h6iQ=
github.com/aws/aws-ebpf-sdk-go v1.0.8 h1:GyfMwkfS6Z8+5FgqRWlq+Sa3J97Qyb4fVY3KPkkyTW0=
github.com/aws/aws-ebpf-sdk-go v1.0.8/go.mod h1:RR0L0fJn8cJGgRH6zEYU4N64j6aee5P8gpUUFgkUQMA=
github.com/aws/aws-ebpf-sdk-go v1.0.9-rc1 h1:vDtkvNEvdF8L+2/qBahIuyLvOTeQs+ToVbkGw4QGJvI=
github.com/aws/aws-ebpf-sdk-go v1.0.9-rc1/go.mod h1:6lwTHtNgTp/kQzx4pdnp09LJevvIVqYf0ce8pP2u66E=
github.com/aws/aws-ebpf-sdk-go v1.0.9-rc2 h1:W2mdC1KjMk/fh7jfF/YP6s+Y9FsiEYc33PdJVsfix1g=
github.com/aws/aws-ebpf-sdk-go v1.0.9-rc2/go.mod h1:SBy1vl1WXMingLbqPZfHd1VXTqB9cD473JwUfoEM+Qs=
github.com/aws/aws-ebpf-sdk-go v1.0.9 h1:FvkyeRUKNvbUFgzh+Ia7XbBb5U86dHW6dCrljt76Fao=
github.com/aws/aws-ebpf-sdk-go v1.0.9/go.mod h1:SBy1vl1WXMingLbqPZfHd1VXTqB9cD473JwUfoEM+Qs=
github.com/aws/aws-sdk-go v1.50.30 h1:2OelKH1eayeaH7OuL1Y9Ombfw4HK+/k0fEnJNWjyLts=
github.com/aws/aws-sdk-go v1.50.30/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
Expand Down
17 changes: 16 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,15 @@ func main() {
os.Exit(1)
}

if !validControllerFlags(ctrlConfig) {
setupLog.Error(err, "Controller flags validation failed")
os.Exit(1)
}

ctx := ctrl.SetupSignalHandler()
policyEndpointController, err := controllers.NewPolicyEndpointsReconciler(mgr.GetClient(),
ctrl.Log.WithName("controllers").WithName("policyEndpoints"), ctrlConfig.EnablePolicyEventLogs, ctrlConfig.EnableCloudWatchLogs,
ctrlConfig.EnableIPv6, ctrlConfig.EnableNetworkPolicy, ctrlConfig.ConntrackCacheCleanupPeriod)
ctrlConfig.EnableIPv6, ctrlConfig.EnableNetworkPolicy, ctrlConfig.ConntrackCacheCleanupPeriod, ctrlConfig.ConntrackCacheTableSize)
if err != nil {
setupLog.Error(err, "unable to setup controller", "controller", "PolicyEndpoints init failed")
os.Exit(1)
Expand Down Expand Up @@ -136,3 +141,13 @@ func getLoggerWithLogLevel(logLevel string, logFilePath string) (logr.Logger, er
ctrlLogger := logger.New(logLevel, logFilePath)
return zapr.NewLogger(ctrlLogger), nil
}

// validate controller flags
func validControllerFlags(ctrlConfig config.ControllerConfig) bool {
//validate conntrack cache table size
if ctrlConfig.ConntrackCacheTableSize < (32*1024) || ctrlConfig.ConntrackCacheTableSize > (1024*1024) {
setupLog.Info("Invalid conntrack cache table size, should be between 32K and 1024K")
return false
}
return true
}
6 changes: 6 additions & 0 deletions pkg/config/controller_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ const (
defaultLogFile = "/var/log/aws-routed-eni/network-policy-agent.log"
defaultMaxConcurrentReconciles = 3
defaultConntrackCacheCleanupPeriod = 300
defaultConntrackCacheTableSize = 256 * 1024
flagEnablePolicyEventLogs = "enable-policy-event-logs"
flagEnableCloudWatchLogs = "enable-cloudwatch-logs"
flagEnableIPv6 = "enable-ipv6"
flagEnableNetworkPolicy = "enable-network-policy"
flagConntrackCacheCleanupPeriod = "conntrack-cache-cleanup-period"
flagConntrackCacheTableSize = "conntrack-cache-table-size"
)

// ControllerConfig contains the controller configuration
Expand All @@ -35,6 +37,8 @@ type ControllerConfig struct {
EnableNetworkPolicy bool
// ConntrackCacheCleanupPeriod specifies the cleanup period
ConntrackCacheCleanupPeriod int
// ConntrackTableSize specifies the conntrack table size for the agent
ConntrackCacheTableSize int
// Configurations for the Controller Runtime
RuntimeConfig RuntimeConfig
}
Expand All @@ -52,6 +56,8 @@ func (cfg *ControllerConfig) BindFlags(fs *pflag.FlagSet) {
fs.BoolVar(&cfg.EnableNetworkPolicy, flagEnableNetworkPolicy, false, "If enabled, Network Policy agent will initialize BPF maps and start reconciler")
fs.IntVar(&cfg.ConntrackCacheCleanupPeriod, flagConntrackCacheCleanupPeriod, defaultConntrackCacheCleanupPeriod, ""+
"Cleanup interval for network policy agent conntrack cache")
fs.IntVar(&cfg.ConntrackCacheTableSize, flagConntrackCacheTableSize, defaultConntrackCacheTableSize, ""+
"Table size for network policy agent conntrack cache")

cfg.RuntimeConfig.BindFlags(fs)
}
15 changes: 12 additions & 3 deletions pkg/ebpf/bpf_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ type EbpfFirewallRules struct {
}

func NewBpfClient(policyEndpointeBPFContext *sync.Map, nodeIP string, enablePolicyEventLogs, enableCloudWatchLogs bool,
enableIPv6 bool, conntrackTTL int) (*bpfClient, error) {
enableIPv6 bool, conntrackTTL int, conntrackTableSize int) (*bpfClient, error) {
var conntrackMap goebpfmaps.BpfMap

ebpfClient := &bpfClient{
Expand Down Expand Up @@ -181,10 +181,19 @@ func NewBpfClient(policyEndpointeBPFContext *sync.Map, nodeIP string, enablePoli
if enableIPv6 {
eventsProbe = EVENTS_V6_BINARY
}
_, globalMapInfo, err := ebpfClient.bpfSDKClient.LoadBpfFile(eventsProbe, "global")
var bpfSdkInputData goelf.BpfCustomData
bpfSdkInputData.FilePath = eventsProbe
bpfSdkInputData.CustomPinPath = "global"
bpfSdkInputData.CustomMapSize = make(map[string]int)

bpfSdkInputData.CustomMapSize[AWS_CONNTRACK_MAP] = conntrackTableSize

ebpfClient.logger.Info("Setting conntrack cache map size: ", "max entries", conntrackTableSize)

_, globalMapInfo, err := ebpfClient.bpfSDKClient.LoadBpfFileWithCustomData(bpfSdkInputData)
if err != nil {
ebpfClient.logger.Error(err, "Unable to load events binary. Required for policy enforcement, exiting..")
sdkAPIErr.WithLabelValues("LoadBpfFile").Inc()
sdkAPIErr.WithLabelValues("LoadBpfFileWithCustomData").Inc()
return nil, err
}
ebpfClient.logger.Info("Successfully loaded events probe")
Expand Down

0 comments on commit bf64bc5

Please sign in to comment.