Skip to content

Commit

Permalink
operator: Add a custom metric that collects Lokistacks requiring a sc…
Browse files Browse the repository at this point in the history
…hema upgrade (grafana#11513)

Co-authored-by: Robert Jacob <rojacob@redhat.com>
Co-authored-by: Periklis Tsirakidis <periklis@redhat.com>
  • Loading branch information
3 people authored and rhnasc committed Apr 12, 2024
1 parent bea6c09 commit 7942b4e
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 2 deletions.
1 change: 1 addition & 0 deletions operator/CHANGELOG.md
@@ -1,5 +1,6 @@
## Main

- [11513](https://github.com/grafana/loki/pull/11513) **btaani**: Add a custom metric that collects Lokistacks requiring a schema upgrade
- [11718](https://github.com/grafana/loki/pull/11718) **periklis**: Upgrade k8s.io, sigs.k8s.io and openshift deps
- [11671](https://github.com/grafana/loki/pull/11671) **JoaoBraveCoding**: Update mixins to fix structured metadata dashboards
- [11624](https://github.com/grafana/loki/pull/11624) **xperimental**: React to changes in ConfigMap used for storage CA
Expand Down
25 changes: 25 additions & 0 deletions operator/docs/lokistack/sop.md
Expand Up @@ -308,3 +308,28 @@ The query queue is currently under high load.
### Steps

- Increase the number of queriers

## Lokistack Storage Schema Warning

### Impact

The LokiStack warns on a newer object storage schema being available for configuration.

### Summary

The schema configuration does not contain the most recent schema version and needs an update.

### Severity

`Warning`

### Access Required

- Console access to the cluster
- Edit access to the namespace where the LokiStack is deployed:
- OpenShift
- `openshift-logging` (LokiStack)

### Steps

- Add a new object storage schema V13 with a future EffectiveDate
4 changes: 2 additions & 2 deletions operator/internal/handlers/lokistack_create_or_update.go
Expand Up @@ -208,9 +208,9 @@ func CreateOrUpdateLokiStack(
return kverrors.New("failed to configure lokistack resources", "name", req.NamespacedName)
}

// 1x.extra-small is used only for development, so the metrics will not
// 1x.demo is used only for development, so the metrics will not
// be collected.
if opts.Stack.Size != lokiv1.SizeOneXExtraSmall && opts.Stack.Size != lokiv1.SizeOneXDemo {
if opts.Stack.Size != lokiv1.SizeOneXDemo {
metrics.Collect(&opts.Stack, opts.Name)
}

Expand Down
10 changes: 10 additions & 0 deletions operator/internal/manifests/internal/alerts/prometheus-alerts.yaml
Expand Up @@ -175,3 +175,13 @@ groups:
for: 15m
labels:
severity: warning
- alert: LokistackSchemaUpgradesRequired
annotations:
message: |-
Object storage schema needs upgrade.
summary: "The applied storage schema config is old and should be upgraded."
runbook_url: "[[ .RunbookURL ]]#Lokistack-Schema-Upgrades-Required"
expr: sum by(stack_id) (lokistack_warnings_count) > 0
labels:
severity: warning
resource: '{{ $labels.stack_id}}'
20 changes: 20 additions & 0 deletions operator/internal/metrics/metrics.go
Expand Up @@ -51,6 +51,14 @@ var (
},
[]string{"size", "stack_id"},
)

lokistackWarningsCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "lokistack_warnings_count",
Help: "Counts the number of warnings set on a LokiStack.",
},
[]string{"reason", "stack_id"},
)
)

// RegisterMetricCollectors registers the prometheus collectors with the k8 default metrics
Expand All @@ -60,6 +68,7 @@ func RegisterMetricCollectors() {
userDefinedLimitsMetric,
globalStreamLimitMetric,
averageTenantStreamLimitMetric,
lokistackWarningsCount,
}

for _, collector := range metricCollectors {
Expand Down Expand Up @@ -104,6 +113,17 @@ func Collect(spec *lokiv1.LokiStackSpec, stackName string) {
setGlobalStreamLimitMetric(size, stackName, globalRate)
setAverageTenantStreamLimitMetric(size, stackName, tenantRate)
}

if len(spec.Storage.Schemas) > 0 && spec.Storage.Schemas[len(spec.Storage.Schemas)-1].Version != lokiv1.ObjectStorageSchemaV13 {
setLokistackSchemaUpgradesRequired(stackName, true)
}
}

func setLokistackSchemaUpgradesRequired(identifier string, active bool) {
lokistackWarningsCount.With(prometheus.Labels{
"reason": string(lokiv1.ReasonStorageNeedsSchemaUpdate),
"stack_id": identifier,
}).Set(boolValue(active))
}

func setDeploymentMetric(size lokiv1.LokiStackSizeType, identifier string, active bool) {
Expand Down

0 comments on commit 7942b4e

Please sign in to comment.