Skip to content

Commit

Permalink
Add Prometheus metrics to count ANP and ACNP Status updates (#1801)
Browse files Browse the repository at this point in the history
Too frequent Status updates could generate too many versions of the CRD,
that would need to be stored in etcd until the next compaction by
kube-apiserver. Too many updates could also cause fragmentation of the
database. It is useful to have access to the number of updates over time
in production clusters.
  • Loading branch information
antoninbas committed Feb 3, 2021
1 parent 33a3749 commit 9a8939a
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 0 deletions.
4 changes: 4 additions & 0 deletions docs/prometheus-integration.md
Expand Up @@ -172,10 +172,14 @@ tables.

#### Antrea Controller Metrics

- **antrea_controller_acnp_status_updates:** The total number of actual
status updates performed for Antrea ClusterNetworkPolicy Custom Resources
- **antrea_controller_address_group_processed:** The total number of
address-group processed
- **antrea_controller_address_group_sync_duration_milliseconds:** The duration
of syncing address-group
- **antrea_controller_anp_status_updates:** The total number of actual status
updates performed for Antrea NetworkPolicy Custom Resources
- **antrea_controller_applied_to_group_processed:** The total number of
applied-to-group processed
- **antrea_controller_applied_to_group_sync_duration_milliseconds:** The
Expand Down
20 changes: 20 additions & 0 deletions pkg/controller/metrics/prometheus.go
Expand Up @@ -89,6 +89,20 @@ var (
Help: "The length of InternalNetworkPolicyQueue",
StabilityLevel: metrics.STABLE,
})
AntreaNetworkPolicyStatusUpdates = metrics.NewCounter(&metrics.CounterOpts{
Namespace: metricNamespaceAntrea,
Subsystem: metricSubsystemController,
Name: "anp_status_updates",
Help: "The total number of actual status updates performed for Antrea NetworkPolicy Custom Resources",
StabilityLevel: metrics.ALPHA,
})
AntreaClusterNetworkPolicyStatusUpdates = metrics.NewCounter(&metrics.CounterOpts{
Namespace: metricNamespaceAntrea,
Subsystem: metricSubsystemController,
Name: "acnp_status_updates",
Help: "The total number of actual status updates performed for Antrea ClusterNetworkPolicy Custom Resources",
StabilityLevel: metrics.ALPHA,
})
)

// Initialize Prometheus metrics collection.
Expand Down Expand Up @@ -122,4 +136,10 @@ func InitializePrometheusMetrics() {
if err := legacyregistry.Register(LengthInternalNetworkPolicyQueue); err != nil {
klog.Errorf("Failed to register antrea_controller_length_network_policy_queue with Prometheus: %s", err.Error())
}
if err := legacyregistry.Register(AntreaNetworkPolicyStatusUpdates); err != nil {
klog.Errorf("Failed to register antrea_controller_anp_status_updates with Prometheus: %s", err.Error())
}
if err := legacyregistry.Register(AntreaClusterNetworkPolicyStatusUpdates); err != nil {
klog.Errorf("Failed to register antrea_controller_acnp_status_updates with Prometheus: %s", err.Error())
}
}
3 changes: 3 additions & 0 deletions pkg/controller/networkpolicy/status_controller.go
Expand Up @@ -34,6 +34,7 @@ import (
antreaclientset "github.com/vmware-tanzu/antrea/pkg/client/clientset/versioned"
secinformers "github.com/vmware-tanzu/antrea/pkg/client/informers/externalversions/security/v1alpha1"
seclisters "github.com/vmware-tanzu/antrea/pkg/client/listers/security/v1alpha1"
"github.com/vmware-tanzu/antrea/pkg/controller/metrics"
antreatypes "github.com/vmware-tanzu/antrea/pkg/controller/types"
)

Expand Down Expand Up @@ -328,6 +329,7 @@ func (c *networkPolicyControl) UpdateAntreaNetworkPolicyStatus(namespace, name s
if anp.Status == *status {
return nil
}
metrics.AntreaNetworkPolicyStatusUpdates.Inc()
toUpdate := anp.DeepCopy()
toUpdate.Status = *status
_, err = c.antreaClient.SecurityV1alpha1().NetworkPolicies(namespace).UpdateStatus(context.TODO(), toUpdate, v1.UpdateOptions{})
Expand All @@ -344,6 +346,7 @@ func (c *networkPolicyControl) UpdateAntreaClusterNetworkPolicyStatus(name strin
if cnp.Status == *status {
return nil
}
metrics.AntreaClusterNetworkPolicyStatusUpdates.Inc()
toUpdate := cnp.DeepCopy()
toUpdate.Status = *status
_, err = c.antreaClient.SecurityV1alpha1().ClusterNetworkPolicies().UpdateStatus(context.TODO(), toUpdate, v1.UpdateOptions{})
Expand Down

0 comments on commit 9a8939a

Please sign in to comment.