Skip to content

Commit

Permalink
[v9.5.x] Alerting: Remove and revert flag alertingBigTransactions (#7…
Browse files Browse the repository at this point in the history
…0910)

Alerting: Remove and revert flag alertingBigTransactions (#65976)

* Alerting: Remove and revert flag alertingBigTransactions

This is a partial revert of #56575 and a removal of the `alertingBigTransactions` flag.

Real-word use has seen no clear performance incentive to maintain this flag. Lowered db connection count
came at the cost of significant increase in CPU usage and query latency.

* Fix lint backend

* Removed last bits of alertingBigTransactions

---------

Co-authored-by: Armand Grillet <2117580+armandgrillet@users.noreply.github.com>
(cherry picked from commit 63187fa)

Co-authored-by: Matthew Jacobson <matthew.jacobson@grafana.com>
  • Loading branch information
santihernandezc and JacobsonMT committed Jun 29, 2023
1 parent 2745d3b commit fd80720
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 218 deletions.
Expand Up @@ -59,7 +59,6 @@ Alpha features might be changed or removed without prior notice.

| Feature toggle name | Description |
| ---------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `alertingBigTransactions` | Use big transactions for alerting database writes |
| `dashboardPreviews` | Create and show thumbnails for dashboard search results |
| `live-service-web-worker` | This will use a webworker thread to processes events rather than the main thread |
| `queryOverLive` | Use Grafana Live WebSocket to execute backend queries |
Expand Down
1 change: 0 additions & 1 deletion packages/grafana-data/src/types/featureToggles.gen.ts
Expand Up @@ -18,7 +18,6 @@
* @public
*/
export interface FeatureToggles {
alertingBigTransactions?: boolean;
trimDefaults?: boolean;
disableEnvelopeEncryption?: boolean;
database_metrics?: boolean;
Expand Down
6 changes: 0 additions & 6 deletions pkg/services/featuremgmt/registry.go
Expand Up @@ -9,12 +9,6 @@ package featuremgmt
var (
// Register each toggle here
standardFeatureFlags = []FeatureFlag{
{
Name: "alertingBigTransactions",
Description: "Use big transactions for alerting database writes",
State: FeatureStateAlpha,
Owner: grafanaAlertingSquad,
},
{
Name: "trimDefaults",
Description: "Use cue schema to remove values that will be applied automatically",
Expand Down
1 change: 0 additions & 1 deletion pkg/services/featuremgmt/toggles_gen.csv
@@ -1,5 +1,4 @@
Name,State,Owner,requiresDevMode,RequiresLicense,RequiresRestart,FrontendOnly
alertingBigTransactions,alpha,@grafana/alerting-squad,false,false,false,false
trimDefaults,beta,@grafana/grafana-as-code,false,false,false,false
disableEnvelopeEncryption,stable,@grafana/grafana-as-code,false,false,false,false
database_metrics,stable,@grafana/hosted-grafana-team,false,false,false,false
Expand Down
4 changes: 0 additions & 4 deletions pkg/services/featuremgmt/toggles_gen.go
Expand Up @@ -7,10 +7,6 @@
package featuremgmt

const (
// FlagAlertingBigTransactions
// Use big transactions for alerting database writes
FlagAlertingBigTransactions = "alertingBigTransactions"

// FlagTrimDefaults
// Use cue schema to remove values that will be applied automatically
FlagTrimDefaults = "trimDefaults"
Expand Down
22 changes: 4 additions & 18 deletions pkg/services/ngalert/state/manager.go
Expand Up @@ -351,8 +351,6 @@ func (st *Manager) saveAlertStates(ctx context.Context, logger log.Logger, state
}

logger.Debug("Saving alert states", "count", len(states))
instances := make([]ngModels.AlertInstance, 0, len(states))

for _, s := range states {
// Do not save normal state to database and remove transition to Normal state but keep mapped states
if st.doNotSaveNormalState && IsNormalStateWithNoReason(s.State) && !s.Changed() {
Expand All @@ -364,7 +362,7 @@ func (st *Manager) saveAlertStates(ctx context.Context, logger log.Logger, state
logger.Error("Failed to create a key for alert state to save it to database. The state will be ignored ", "cacheID", s.CacheID, "error", err, "labels", s.Labels.String())
continue
}
fields := ngModels.AlertInstance{
instance := ngModels.AlertInstance{
AlertInstanceKey: key,
Labels: ngModels.InstanceLabels(s.Labels),
CurrentState: ngModels.InstanceStateType(s.State.State.String()),
Expand All @@ -373,23 +371,11 @@ func (st *Manager) saveAlertStates(ctx context.Context, logger log.Logger, state
CurrentStateSince: s.StartsAt,
CurrentStateEnd: s.EndsAt,
}
instances = append(instances, fields)
}

if len(instances) == 0 {
return
}

if err := st.instanceStore.SaveAlertInstances(ctx, instances...); err != nil {
type debugInfo struct {
State string
Labels string
}
debug := make([]debugInfo, 0)
for _, inst := range instances {
debug = append(debug, debugInfo{string(inst.CurrentState), data.Labels(inst.Labels).String()})
err = st.instanceStore.SaveAlertInstance(ctx, instance)
if err != nil {
logger.Error("Failed to save alert state", "labels", s.Labels.String(), "state", s.State, "error", err)
}
logger.Error("Failed to save alert states", "states", debug, "error", err)
}
}

Expand Down
36 changes: 23 additions & 13 deletions pkg/services/ngalert/state/manager_test.go
Expand Up @@ -113,9 +113,11 @@ func TestWarmStateCache(t *testing.T) {
},
}

instances := make([]models.AlertInstance, 0)

labels := models.InstanceLabels{"test1": "testValue1"}
_, hash, _ := labels.StringAndHash()
instance1 := models.AlertInstance{
instances = append(instances, models.AlertInstance{
AlertInstanceKey: models.AlertInstanceKey{
RuleOrgID: rule.OrgID,
RuleUID: rule.UID,
Expand All @@ -126,11 +128,11 @@ func TestWarmStateCache(t *testing.T) {
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
Labels: labels,
}
})

labels = models.InstanceLabels{"test2": "testValue2"}
_, hash, _ = labels.StringAndHash()
instance2 := models.AlertInstance{
instances = append(instances, models.AlertInstance{
AlertInstanceKey: models.AlertInstanceKey{
RuleOrgID: rule.OrgID,
RuleUID: rule.UID,
Expand All @@ -141,11 +143,11 @@ func TestWarmStateCache(t *testing.T) {
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
Labels: labels,
}
})

labels = models.InstanceLabels{"test3": "testValue3"}
_, hash, _ = labels.StringAndHash()
instance3 := models.AlertInstance{
instances = append(instances, models.AlertInstance{
AlertInstanceKey: models.AlertInstanceKey{
RuleOrgID: rule.OrgID,
RuleUID: rule.UID,
Expand All @@ -156,11 +158,11 @@ func TestWarmStateCache(t *testing.T) {
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
Labels: labels,
}
})

labels = models.InstanceLabels{"test4": "testValue4"}
_, hash, _ = labels.StringAndHash()
instance4 := models.AlertInstance{
instances = append(instances, models.AlertInstance{
AlertInstanceKey: models.AlertInstanceKey{
RuleOrgID: rule.OrgID,
RuleUID: rule.UID,
Expand All @@ -171,11 +173,11 @@ func TestWarmStateCache(t *testing.T) {
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
Labels: labels,
}
})

labels = models.InstanceLabels{"test5": "testValue5"}
_, hash, _ = labels.StringAndHash()
instance5 := models.AlertInstance{
instances = append(instances, models.AlertInstance{
AlertInstanceKey: models.AlertInstanceKey{
RuleOrgID: rule.OrgID,
RuleUID: rule.UID,
Expand All @@ -186,8 +188,10 @@ func TestWarmStateCache(t *testing.T) {
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
Labels: labels,
})
for _, instance := range instances {
_ = dbstore.SaveAlertInstance(ctx, instance)
}
_ = dbstore.SaveAlertInstances(ctx, instance1, instance2, instance3, instance4, instance5)

cfg := state.ManagerCfg{
Metrics: testMetrics.GetStateMetrics(),
Expand Down Expand Up @@ -2370,7 +2374,9 @@ func TestStaleResultsHandler(t *testing.T) {
},
}

_ = dbstore.SaveAlertInstances(ctx, instances...)
for _, instance := range instances {
_ = dbstore.SaveAlertInstance(ctx, instance)
}

testCases := []struct {
desc string
Expand Down Expand Up @@ -2621,7 +2627,9 @@ func TestDeleteStateByRuleUID(t *testing.T) {
},
}

_ = dbstore.SaveAlertInstances(ctx, instances...)
for _, instance := range instances {
_ = dbstore.SaveAlertInstance(ctx, instance)
}

testCases := []struct {
desc string
Expand Down Expand Up @@ -2755,7 +2763,9 @@ func TestResetStateByRuleUID(t *testing.T) {
},
}

_ = dbstore.SaveAlertInstances(ctx, instances...)
for _, instance := range instances {
_ = dbstore.SaveAlertInstance(ctx, instance)
}

testCases := []struct {
desc string
Expand Down
2 changes: 1 addition & 1 deletion pkg/services/ngalert/state/persist.go
Expand Up @@ -11,7 +11,7 @@ import (
type InstanceStore interface {
FetchOrgIds(ctx context.Context) ([]int64, error)
ListAlertInstances(ctx context.Context, cmd *models.ListAlertInstancesQuery) ([]*models.AlertInstance, error)
SaveAlertInstances(ctx context.Context, cmd ...models.AlertInstance) error
SaveAlertInstance(ctx context.Context, instance models.AlertInstance) error
DeleteAlertInstances(ctx context.Context, keys ...models.AlertInstanceKey) error
DeleteAlertInstancesByRule(ctx context.Context, key models.AlertRuleKey) error
}
Expand Down
6 changes: 2 additions & 4 deletions pkg/services/ngalert/state/testing.go
Expand Up @@ -28,12 +28,10 @@ func (f *FakeInstanceStore) ListAlertInstances(_ context.Context, q *models.List
return nil, nil
}

func (f *FakeInstanceStore) SaveAlertInstances(_ context.Context, q ...models.AlertInstance) error {
func (f *FakeInstanceStore) SaveAlertInstance(_ context.Context, q models.AlertInstance) error {
f.mtx.Lock()
defer f.mtx.Unlock()
for _, inst := range q {
f.RecordedOps = append(f.RecordedOps, inst)
}
f.RecordedOps = append(f.RecordedOps, q)
return nil
}

Expand Down
93 changes: 0 additions & 93 deletions pkg/services/ngalert/store/instance_database.go
Expand Up @@ -43,99 +43,6 @@ func (st DBstore) ListAlertInstances(ctx context.Context, cmd *models.ListAlertI
return result, err
}

// SaveAlertInstances saves all the provided alert instances to the store.
func (st DBstore) SaveAlertInstances(ctx context.Context, cmd ...models.AlertInstance) error {
if !st.FeatureToggles.IsEnabled(featuremgmt.FlagAlertingBigTransactions) {
// This mimics the replace code-path by calling SaveAlertInstance in a loop, with a transaction per call.
for _, c := range cmd {
err := st.SaveAlertInstance(ctx, c)
if err != nil {
return err
}
}
return nil
} else {
// Batches write into statements with `maxRows` instances per statements.
// This makes sure we don't create statements that are too long for some
// databases to process. For example, SQLite has a limit of 999 variables
// per write.
keyNames := []string{"rule_org_id", "rule_uid", "labels_hash"}
fieldNames := []string{
"rule_org_id", "rule_uid", "labels", "labels_hash", "current_state",
"current_reason", "current_state_since", "current_state_end", "last_eval_time",
}
fieldsPerRow := len(fieldNames)
maxRows := 20
maxArgs := maxRows * fieldsPerRow

bigUpsertSQL, err := st.SQLStore.GetDialect().UpsertMultipleSQL(
"alert_instance", keyNames, fieldNames, maxRows)
if err != nil {
return err
}

// Args contains the SQL statement, and the values to fill into the SQL statement.
args := make([]interface{}, 0, maxArgs)
args = append(args, bigUpsertSQL)
values := func(a []interface{}) int {
return len(a) - 1
}

// Generate batches of `maxRows` and write the statements when full.
for _, alertInstance := range cmd {
labelTupleJSON, err := alertInstance.Labels.StringKey()
if err != nil {
return err
}

if err := models.ValidateAlertInstance(alertInstance); err != nil {
return err
}

args = append(args,
alertInstance.RuleOrgID, alertInstance.RuleUID, labelTupleJSON, alertInstance.LabelsHash,
alertInstance.CurrentState, alertInstance.CurrentReason, alertInstance.CurrentStateSince.Unix(),
alertInstance.CurrentStateEnd.Unix(), alertInstance.LastEvalTime.Unix())

// If we've reached the maximum batch size, write to the database.
if values(args) >= maxArgs {
err = st.SQLStore.WithDbSession(ctx, func(sess *db.Session) error {
_, err := sess.Exec(args...)
return err
})
if err != nil {
return fmt.Errorf("failed to save alert instances: %w", err)
}

// Reset args so we can re-use the allocated interface pointers.
args = args[:1]
}
}

// Write the final batch of up to maxRows in size.
if values(args) != 0 && values(args)%fieldsPerRow == 0 {
upsertSQL, err := st.SQLStore.GetDialect().UpsertMultipleSQL(
"alert_instance", keyNames, fieldNames, values(args)/fieldsPerRow)
if err != nil {
return err
}

args[0] = upsertSQL
err = st.SQLStore.WithDbSession(ctx, func(sess *db.Session) error {
_, err := sess.Exec(args...)
return err
})
if err != nil {
return fmt.Errorf("failed to save alert instances: %w", err)
}
} else if values(args) != 0 {
return fmt.Errorf("failed to upsert alert instances. Last statements had %v fields, which is not a multiple of the number of fields, %v", len(args), fieldsPerRow)
}

return nil
}
}

// SaveAlertInstance is a handler for saving a new alert instance.
func (st DBstore) SaveAlertInstance(ctx context.Context, alertInstance models.AlertInstance) error {
return st.SQLStore.WithDbSession(ctx, func(sess *db.Session) error {
Expand Down

0 comments on commit fd80720

Please sign in to comment.