Skip to content

Commit

Permalink
controllers/vmrule: skip bad rules exceed max size
Browse files Browse the repository at this point in the history
skip ealier rules with too big content for configmap. It should fix a errors with broken vmalert pipeline
improves logging for content exceed limit error.
Adds webhook validation for vmrule size
  • Loading branch information
f41gh7 committed Apr 13, 2023
1 parent 32c439f commit bb754d5
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 23 deletions.
5 changes: 5 additions & 0 deletions api/v1beta1/vmrule_types.go
@@ -1,10 +1,15 @@
package v1beta1

import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"net/url"
)

// MaxConfigMapDataSize is a maximum `Data` field size of a ConfigMap.
// Limit it to the half size of constant value, since it may be different for kubernetes versions.
var MaxConfigMapDataSize = int(float64(v1.MaxSecretSize) * 0.5)

// VMRuleSpec defines the desired state of VMRule
type VMRuleSpec struct {
// Groups list of group rules
Expand Down
5 changes: 5 additions & 0 deletions api/v1beta1/vmrule_webhook.go
Expand Up @@ -25,6 +25,7 @@ var _ webhook.Validator = &VMRule{}
func (r *VMRule) sanityCheck() error {

uniqNames := make(map[string]struct{})
var totalSize int
for i := range r.Spec.Groups {
group := &r.Spec.Groups[i]
errContext := fmt.Sprintf("VMRule: %s/%s group: %s", r.Namespace, r.Name, group.Name)
Expand All @@ -37,13 +38,17 @@ func (r *VMRule) sanityCheck() error {
return fmt.Errorf("cannot marshal %s, err: %w", errContext, err)
}
var vmalertGroup config.Group
totalSize += len(groupBytes)
if err := yaml.Unmarshal(groupBytes, &vmalertGroup); err != nil {
return fmt.Errorf("cannot parse vmalert group %s, err: %w, r: \n%s", errContext, err, string(groupBytes))
}
if err := vmalertGroup.Validate(nil, true); err != nil {
return fmt.Errorf("validation failed for %s err: %w", errContext, err)
}
}
if totalSize > MaxConfigMapDataSize {
return fmt.Errorf("VMRule's content size: %d exceed single rule limit: %d", totalSize, MaxConfigMapDataSize)
}
vmrulelog.Info("successfully validated rule", "name", r.Name)
return nil
}
Expand Down
34 changes: 14 additions & 20 deletions controllers/factory/rulescm.go
Expand Up @@ -19,12 +19,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
)

// The maximum `Data` size of a ConfigMap seems to differ between
// environments. This is probably due to different meta data sizes which count
// into the overall maximum size of a ConfigMap. Thereby lets leave a
// large buffer.
var maxConfigMapDataSize = int(float64(v1.MaxSecretSize) * 0.5)

var (
managedByOperatorLabel = "managed-by"
managedByOperatorLabelValue = "vm-operator"
Expand Down Expand Up @@ -250,7 +244,17 @@ func SelectRules(ctx context.Context, cr *victoriametricsv1beta1.VMAlert, rclien
errors = append(errors, fmt.Sprintf("cannot generate content for rule: %s, err :%s", pRule.Name, err))
continue
}
rules[fmt.Sprintf("%v-%v.yaml", pRule.Namespace, pRule.Name)] = content

// check if none of the rule files is too large for a single ConfigMap
if len(content) > victoriametricsv1beta1.MaxConfigMapDataSize {
badRules++
errors = append(errors, fmt.Sprintf(
"rule file %q with size %d is too large for a single Kubernetes ConfigMap limit: %d",
pRule.Namespace+"-"+pRule.Name, len(content), victoriametricsv1beta1.MaxConfigMapDataSize,
))
continue
}
rules[fmt.Sprintf("%s-%s.yaml", pRule.Namespace, pRule.Name)] = content
}

ruleNames := make([]string, 0, len(rules))
Expand Down Expand Up @@ -298,23 +302,13 @@ func generateContent(promRule victoriametricsv1beta1.VMRuleSpec, enforcedNsLabel
}

// makeRulesConfigMaps takes a VMAlert configuration and rule files and
// returns a list of Kubernetes ConfigMaps to be later on mounted into the
// Prometheus instance.
// returns a list of Kubernetes ConfigMaps to be later on mounted
// If the total size of rule files exceeds the Kubernetes ConfigMap limit,
// they are split up via the simple first-fit [1] bin packing algorithm. In the
// future this can be replaced by a more sophisticated algorithm, but for now
// simplicity should be sufficient.
// [1] https://en.wikipedia.org/wiki/Bin_packing_problem#First-fit_algorithm
func makeRulesConfigMaps(cr *victoriametricsv1beta1.VMAlert, ruleFiles map[string]string) ([]v1.ConfigMap, error) {
// check if none of the rule files is too large for a single ConfigMap
for filename, file := range ruleFiles {
if len(file) > maxConfigMapDataSize {
return nil, fmt.Errorf(
"rule file '%v' is too large for a single Kubernetes ConfigMap",
filename,
)
}
}

buckets := []map[string]string{
{},
Expand All @@ -331,14 +325,14 @@ func makeRulesConfigMaps(cr *victoriametricsv1beta1.VMAlert, ruleFiles map[strin

for _, filename := range fileNames {
// If rule file doesn't fit into current bucket, create new bucket.
if bucketSize(buckets[currBucketIndex])+len(ruleFiles[filename]) > maxConfigMapDataSize {
if bucketSize(buckets[currBucketIndex])+len(ruleFiles[filename]) > victoriametricsv1beta1.MaxConfigMapDataSize {
buckets = append(buckets, map[string]string{})
currBucketIndex++
}
buckets[currBucketIndex][filename] = ruleFiles[filename]
}

ruleFileConfigMaps := []v1.ConfigMap{}
ruleFileConfigMaps := make([]v1.ConfigMap, 0, len(buckets))
for i, bucket := range buckets {
cm := makeRulesConfigMap(cr, bucket)
cm.Name = cm.Name + "-" + strconv.Itoa(i)
Expand Down
5 changes: 2 additions & 3 deletions controllers/vmrule_controller.go
Expand Up @@ -79,7 +79,7 @@ func (r *VMRuleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res
currVMAlert := &vmalert
match, err := isSelectorsMatches(instance, currVMAlert, currVMAlert.Spec.RuleSelector)
if err != nil {
reqLogger.Error(err, "cannot match vmalert and vmRule")
reqLogger.Error(err, "cannot match vmalert and vmRule", "vmalert", currVMAlert.Name)
continue
}
// fast path, not match
Expand All @@ -88,8 +88,7 @@ func (r *VMRuleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res
}
maps, err := factory.CreateOrUpdateRuleConfigMaps(ctx, currVMAlert, r)
if err != nil {
reqLogger.Error(err, "cannot update rules configmaps")
return ctrl.Result{}, err
return ctrl.Result{}, fmt.Errorf("cannot update rules configmaps: %w", err)
}

if err := factory.CreateOrUpdateVMAlert(ctx, currVMAlert, r, r.BaseConf, maps); err != nil {
Expand Down

0 comments on commit bb754d5

Please sign in to comment.