Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Alerting: In migration, create one label per channel #76527

Merged
merged 8 commits into from Dec 19, 2023
115 changes: 66 additions & 49 deletions pkg/services/ngalert/migration/alert_rule.go
Expand Up @@ -8,108 +8,117 @@ import (

"github.com/prometheus/common/model"

"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/infra/log"
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
"github.com/grafana/grafana/pkg/services/datasources"
migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models"
migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/tsdb/graphite"
"github.com/grafana/grafana/pkg/util"
)

const (
// ContactLabel is a private label created during migration and used in notification policies.
// It stores a string array of all contact point names an alert rule should send to.
// It was created as a means to simplify post-migration notification policies.
ContactLabel = "__contacts__"
)
func addLabelsAndAnnotations(l log.Logger, alert *legacymodels.Alert, dashboardUID string, channels []*legacymodels.AlertNotification) (data.Labels, data.Labels) {
tags := alert.GetTagsFromSettings()
lbls := make(data.Labels, len(tags)+len(channels)+1)

func addMigrationInfo(da *migrationStore.DashAlert, dashboardUID string) (map[string]string, map[string]string) {
tagsMap := simplejson.NewFromAny(da.ParsedSettings.AlertRuleTags).MustMap()
lbls := make(map[string]string, len(tagsMap))
for _, t := range tags {
lbls[t.Key] = t.Value
}

for k, v := range tagsMap {
lbls[k] = simplejson.NewFromAny(v).MustString()
// Add a label for routing
lbls[ngmodels.MigratedUseLegacyChannelsLabel] = "true"
for _, c := range channels {
lbls[contactLabel(c.Name)] = "true"
}

annotations := make(map[string]string, 3)
annotations := make(data.Labels, 4)
annotations[ngmodels.DashboardUIDAnnotation] = dashboardUID
annotations[ngmodels.PanelIDAnnotation] = fmt.Sprintf("%v", da.PanelID)
annotations["__alertId__"] = fmt.Sprintf("%v", da.ID)
annotations[ngmodels.PanelIDAnnotation] = fmt.Sprintf("%v", alert.PanelID)
annotations[ngmodels.MigratedAlertIdAnnotation] = fmt.Sprintf("%v", alert.ID)

message := MigrateTmpl(l.New("field", "message"), alert.Message)
annotations[ngmodels.MigratedMessageAnnotation] = message

return lbls, annotations
}

// MigrateAlert migrates a single dashboard alert from legacy alerting to unified alerting.
func (om *OrgMigration) migrateAlert(ctx context.Context, l log.Logger, da *migrationStore.DashAlert, info migmodels.DashboardUpgradeInfo) (*ngmodels.AlertRule, error) {
// migrateAlert migrates a single dashboard alert from legacy alerting to unified alerting.
func (om *OrgMigration) migrateAlert(ctx context.Context, l log.Logger, alert *legacymodels.Alert, info migmodels.DashboardUpgradeInfo) (*ngmodels.AlertRule, error) {
l.Debug("Migrating alert rule to Unified Alerting")
cond, err := transConditions(ctx, l, da, om.migrationStore)
rawSettings, err := json.Marshal(alert.Settings)
if err != nil {
return nil, fmt.Errorf("get settings: %w", err)
}
var parsedSettings dashAlertSettings
err = json.Unmarshal(rawSettings, &parsedSettings)
if err != nil {
return nil, fmt.Errorf("parse settings: %w", err)
}
cond, err := transConditions(ctx, l, parsedSettings, alert.OrgID, om.migrationStore)
if err != nil {
return nil, fmt.Errorf("transform conditions: %w", err)
}

lbls, annotations := addMigrationInfo(da, info.DashboardUID)
channels := om.extractChannels(l, parsedSettings)

message := MigrateTmpl(l.New("field", "message"), da.Message)
annotations["message"] = message
lbls, annotations := addLabelsAndAnnotations(l, alert, info.DashboardUID, channels)

data, err := migrateAlertRuleQueries(l, cond.Data)
if err != nil {
return nil, fmt.Errorf("failed to migrate alert rule queries: %w", err)
return nil, fmt.Errorf("queries: %w", err)
}

isPaused := false
if da.State == "paused" {
if alert.State == "paused" {
isPaused = true
}

// Here we ensure that the alert rule title is unique within the folder.
titleDeduplicator := om.titleDeduplicatorForFolder(info.NewFolderUID)
name, err := titleDeduplicator.Deduplicate(da.Name)
name, err := titleDeduplicator.Deduplicate(alert.Name)
if err != nil {
return nil, err
}
if name != da.Name {
l.Info(fmt.Sprintf("Alert rule title modified to be unique within the folder and fit within the maximum length of %d", store.AlertDefinitionMaxTitleLength), "old", da.Name, "new", name)
if name != alert.Name {
l.Info(fmt.Sprintf("Alert rule title modified to be unique within the folder and fit within the maximum length of %d", store.AlertDefinitionMaxTitleLength), "old", alert.Name, "new", name)
}

dashUID := info.DashboardUID
ar := &ngmodels.AlertRule{
OrgID: da.OrgID,
OrgID: alert.OrgID,
Title: name,
UID: util.GenerateShortUID(),
Condition: cond.Condition,
Data: data,
IntervalSeconds: ruleAdjustInterval(da.Frequency),
IntervalSeconds: ruleAdjustInterval(alert.Frequency),
Version: 1,
NamespaceUID: info.NewFolderUID,
DashboardUID: &dashUID,
PanelID: &da.PanelID,
RuleGroup: groupName(ruleAdjustInterval(da.Frequency), info.DashboardName),
For: da.For,
PanelID: &alert.PanelID,
RuleGroup: groupName(ruleAdjustInterval(alert.Frequency), info.DashboardName),
For: alert.For,
Updated: time.Now().UTC(),
Annotations: annotations,
Labels: lbls,
RuleGroupIndex: 1, // Every rule is in its own group.
IsPaused: isPaused,
NoDataState: transNoData(l, da.ParsedSettings.NoDataState),
ExecErrState: transExecErr(l, da.ParsedSettings.ExecutionErrorState),
NoDataState: transNoData(l, parsedSettings.NoDataState),
ExecErrState: transExecErr(l, parsedSettings.ExecutionErrorState),
}

// Label for routing and silences.
n, v := getLabelForSilenceMatching(ar.UID)
ar.Labels[n] = v

if da.ParsedSettings.ExecutionErrorState == string(legacymodels.ExecutionErrorKeepState) {
if parsedSettings.ExecutionErrorState == string(legacymodels.ExecutionErrorKeepState) {
if err := om.addErrorSilence(ar); err != nil {
om.log.Error("Alert migration error: failed to create silence for Error", "rule_name", ar.Title, "err", err)
}
}

if da.ParsedSettings.NoDataState == string(legacymodels.NoDataKeepState) {
if parsedSettings.NoDataState == string(legacymodels.NoDataKeepState) {
if err := om.addNoDataSilence(ar); err != nil {
om.log.Error("Alert migration error: failed to create silence for NoData", "rule_name", ar.Title, "err", err)
}
Expand Down Expand Up @@ -220,7 +229,7 @@ func isPrometheusQuery(queryData map[string]json.RawMessage) (bool, error) {
Type string `json:"type"`
}
if err := json.Unmarshal(ds, &datasource); err != nil {
return false, fmt.Errorf("failed to parse datasource '%s': %w", string(ds), err)
return false, fmt.Errorf("parse datasource '%s': %w", string(ds), err)
}
if datasource.Type == "" {
return false, fmt.Errorf("missing type field '%s'", string(ds))
Expand Down Expand Up @@ -277,21 +286,29 @@ func truncate(daName string, length int) string {
return daName
}

func extractChannelIDs(d *migrationStore.DashAlert) (channelUids []migrationStore.UidOrID) {
// Extracting channel UID/ID.
for _, ui := range d.ParsedSettings.Notifications {
if ui.UID != "" {
channelUids = append(channelUids, ui.UID)
continue
// extractChannels extracts notification channels from the given legacy dashboard alert parsed settings.
func (om *OrgMigration) extractChannels(l log.Logger, parsedSettings dashAlertSettings) []*legacymodels.AlertNotification {
// Extracting channels.
channels := make([]*legacymodels.AlertNotification, 0, len(parsedSettings.Notifications))
for _, key := range parsedSettings.Notifications {
// Either id or uid can be defined in the dashboard alert notification settings. See alerting.NewRuleFromDBAlert.
if key.ID > 0 {
if c, ok := om.channelCache.GetChannelByID(key.ID); ok {
channels = append(channels, c)
continue
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this answers my question above: we are going to drop a notification without UID. I wonder what is the reason for such decision?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is basically a simplified version of how it works in legacy alerting, see

for _, v := range ruleDef.Settings.Get("notifications").MustArray() {
jsonModel := simplejson.NewFromAny(v)
if id, err := jsonModel.Get("id").Int64(); err == nil {
uid, err := translateNotificationIDToUID(ctx, store, id, ruleDef.OrgID)
if err != nil {
if !errors.Is(err, models.ErrAlertNotificationFailedTranslateUniqueID) {
logger.Error("Failed to translate notification id to uid", "error", err.Error(), "dashboardId", model.DashboardID, "alert", model.Name, "panelId", model.PanelID, "notificationId", id)
}
if logTranslationFailures {
logger.Warn("Unable to translate notification id to uid", "dashboardId", model.DashboardID, "alert", model.Name, "panelId", model.PanelID, "notificationId", id)
}
} else {
model.Notifications = append(model.Notifications, uid)
}
} else if uid, err := jsonModel.Get("uid").String(); err == nil {
model.Notifications = append(model.Notifications, uid)
} else {
return nil, ValidationError{Reason: "Neither id nor uid is specified in 'notifications' block, " + err.Error(), DashboardID: model.DashboardID, AlertID: model.ID, PanelID: model.PanelID}
}
}
.

At least one of the id or uid is guaranteed to be present on each entry of parsedSettings.Notifications. So, we performa cached lookup of the UID for a given ID, or use the UID if one exists.

There was a point in time a while ago where UIDs didn't exist for notification channels, but we are guaranteed to have one now since we run after sqlstore migrations (specifically

mg.AddMigration("Update uid column values in alert_notification", new(RawSQLMigration).
SQLite("UPDATE alert_notification SET uid=printf('%09d',id) WHERE uid IS NULL;").
Postgres("UPDATE alert_notification SET uid=lpad('' || id::text,9,'0') WHERE uid IS NULL;").
Mysql("UPDATE alert_notification SET uid=lpad(id,9,'0') WHERE uid IS NULL;"))
)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If neither id not uid is present on the entry, or if the id is invalid, then that legacy alert would be failing to send to the channel in legacy alerting anyways.

}
// In certain circumstances, id is used instead of uid.
// We add this if there was no uid.
if ui.ID > 0 {
channelUids = append(channelUids, ui.ID)

if key.UID != "" {
if c, ok := om.channelCache.GetChannelByUID(key.UID); ok {
channels = append(channels, c)
continue
}
}
}

return channelUids
l.Warn("Failed to get alert notification, skipping", "notificationKey", key)
}
return channels
}

// groupName constructs a group name from the dashboard title and the interval. It truncates the dashboard title
Expand Down
71 changes: 36 additions & 35 deletions pkg/services/ngalert/migration/alert_rule_test.go
Expand Up @@ -7,14 +7,14 @@ import (
"testing"

"github.com/google/uuid"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/stretchr/testify/require"

"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/infra/db"
"github.com/grafana/grafana/pkg/infra/log/logtest"
legacymodels "github.com/grafana/grafana/pkg/services/alerting/models"
migmodels "github.com/grafana/grafana/pkg/services/ngalert/migration/models"
migrationStore "github.com/grafana/grafana/pkg/services/ngalert/migration/store"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
)
Expand Down Expand Up @@ -97,30 +97,33 @@ func TestMigrateAlertRuleQueries(t *testing.T) {
func TestAddMigrationInfo(t *testing.T) {
tt := []struct {
name string
alert *migrationStore.DashAlert
alert *legacymodels.Alert
dashboard string
expectedLabels map[string]string
expectedAnnotations map[string]string
expectedLabels data.Labels
expectedAnnotations data.Labels
}{
{
name: "when alert rule tags are a JSON array, they're ignored.",
alert: &migrationStore.DashAlert{Alert: &legacymodels.Alert{ID: 43, PanelID: 42}, ParsedSettings: &migrationStore.DashAlertSettings{AlertRuleTags: []string{"one", "two", "three", "four"}}},
name: "when alert rule tags are a JSON array, they're ignored.",
alert: &legacymodels.Alert{ID: 43, PanelID: 42, Message: "message", Settings: simplejson.NewFromAny(map[string]any{
"alertRuleTags": []string{"one", "two", "three", "four"},
})},
dashboard: "dashboard",
expectedLabels: map[string]string{},
expectedAnnotations: map[string]string{"__alertId__": "43", "__dashboardUid__": "dashboard", "__panelId__": "42"},
expectedLabels: data.Labels{models.MigratedUseLegacyChannelsLabel: "true"},
expectedAnnotations: data.Labels{models.MigratedAlertIdAnnotation: "43", models.DashboardUIDAnnotation: "dashboard", models.PanelIDAnnotation: "42", "message": "message"},
},
{
name: "when alert rule tags are a JSON object",
alert: &migrationStore.DashAlert{Alert: &legacymodels.Alert{ID: 43, PanelID: 42}, ParsedSettings: &migrationStore.DashAlertSettings{AlertRuleTags: map[string]any{"key": "value", "key2": "value2"}}},
dashboard: "dashboard",
expectedLabels: map[string]string{"key": "value", "key2": "value2"},
expectedAnnotations: map[string]string{"__alertId__": "43", "__dashboardUid__": "dashboard", "__panelId__": "42"},
name: "when alert rule tags are a JSON object",
alert: &legacymodels.Alert{ID: 43, PanelID: 42, Message: "message", Settings: simplejson.NewFromAny(map[string]any{
"alertRuleTags": map[string]any{"key": "value", "key2": "value2"},
})}, dashboard: "dashboard",
expectedLabels: data.Labels{models.MigratedUseLegacyChannelsLabel: "true", "key": "value", "key2": "value2"},
expectedAnnotations: data.Labels{models.MigratedAlertIdAnnotation: "43", models.DashboardUIDAnnotation: "dashboard", models.PanelIDAnnotation: "42", "message": "message"},
},
}

for _, tc := range tt {
t.Run(tc.name, func(t *testing.T) {
labels, annotations := addMigrationInfo(tc.alert, tc.dashboard)
labels, annotations := addLabelsAndAnnotations(&logtest.Fake{}, tc.alert, tc.dashboard, nil)
require.Equal(t, tc.expectedLabels, labels)
require.Equal(t, tc.expectedAnnotations, annotations)
})
Expand All @@ -132,7 +135,7 @@ func TestMakeAlertRule(t *testing.T) {
info := migmodels.DashboardUpgradeInfo{
DashboardUID: "dashboarduid",
DashboardName: "dashboardname",
NewFolderUID: "ewfolderuid",
NewFolderUID: "newfolderuid",
NewFolderName: "newfoldername",
}
t.Run("when mapping rule names", func(t *testing.T) {
Expand All @@ -141,7 +144,7 @@ func TestMakeAlertRule(t *testing.T) {
m := service.newOrgMigration(1)
da := createTestDashAlert()

ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, &da, info)
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info)

require.NoError(t, err)
require.Equal(t, da.Name, ar.Title)
Expand All @@ -153,7 +156,7 @@ func TestMakeAlertRule(t *testing.T) {
da := createTestDashAlert()
da.Name = strings.Repeat("a", store.AlertDefinitionMaxTitleLength+1)

ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, &da, info)
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info)

require.NoError(t, err)
require.Len(t, ar.Title, store.AlertDefinitionMaxTitleLength)
Expand All @@ -165,15 +168,15 @@ func TestMakeAlertRule(t *testing.T) {
da := createTestDashAlert()
da.Name = strings.Repeat("a", store.AlertDefinitionMaxTitleLength+1)

ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, &da, info)
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info)

require.NoError(t, err)
require.Len(t, ar.Title, store.AlertDefinitionMaxTitleLength)

da = createTestDashAlert()
da.Name = strings.Repeat("a", store.AlertDefinitionMaxTitleLength+1)

ar, err = m.migrateAlert(context.Background(), &logtest.Fake{}, &da, info)
ar, err = m.migrateAlert(context.Background(), &logtest.Fake{}, da, info)

require.NoError(t, err)
require.Len(t, ar.Title, store.AlertDefinitionMaxTitleLength)
Expand All @@ -186,7 +189,7 @@ func TestMakeAlertRule(t *testing.T) {
m := service.newOrgMigration(1)
da := createTestDashAlert()

ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, &da, info)
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info)
require.NoError(t, err)
require.False(t, ar.IsPaused)
})
Expand All @@ -197,7 +200,7 @@ func TestMakeAlertRule(t *testing.T) {
da := createTestDashAlert()
da.State = "paused"

ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, &da, info)
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info)
require.NoError(t, err)
require.True(t, ar.IsPaused)
})
Expand All @@ -206,9 +209,9 @@ func TestMakeAlertRule(t *testing.T) {
service := NewTestMigrationService(t, sqlStore, nil)
m := service.newOrgMigration(1)
da := createTestDashAlert()
da.ParsedSettings.NoDataState = uuid.NewString()
da.Settings.Set("noDataState", uuid.NewString())

ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, &da, info)
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info)
require.Nil(t, err)
require.Equal(t, models.NoData, ar.NoDataState)
})
Expand All @@ -217,9 +220,9 @@ func TestMakeAlertRule(t *testing.T) {
service := NewTestMigrationService(t, sqlStore, nil)
m := service.newOrgMigration(1)
da := createTestDashAlert()
da.ParsedSettings.ExecutionErrorState = uuid.NewString()
da.Settings.Set("executionErrorState", uuid.NewString())

ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, &da, info)
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info)
require.Nil(t, err)
require.Equal(t, models.ErrorErrState, ar.ExecErrState)
})
Expand All @@ -230,7 +233,7 @@ func TestMakeAlertRule(t *testing.T) {
da := createTestDashAlert()
da.Message = "Instance ${instance} is down"

ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, &da, info)
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info)
require.Nil(t, err)
expected :=
"{{- $mergedLabels := mergeLabelValues $values -}}\n" +
Expand Down Expand Up @@ -279,7 +282,7 @@ func TestMakeAlertRule(t *testing.T) {
t.Run(fmt.Sprintf("interval %ds should be %s", test.interval, test.expected), func(t *testing.T) {
da.Frequency = test.interval

ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, &da, info)
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info)

require.NoError(t, err)
require.Equal(t, fmt.Sprintf("%s - %s", info.DashboardName, test.expected), ar.RuleGroup)
Expand All @@ -298,7 +301,7 @@ func TestMakeAlertRule(t *testing.T) {
NewFolderName: "newfoldername",
}

ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, &da, info)
ar, err := m.migrateAlert(context.Background(), &logtest.Fake{}, da, info)

require.NoError(t, err)
require.Len(t, ar.RuleGroup, store.AlertRuleMaxRuleGroupNameLength)
Expand All @@ -307,12 +310,10 @@ func TestMakeAlertRule(t *testing.T) {
})
}

func createTestDashAlert() migrationStore.DashAlert {
return migrationStore.DashAlert{
Alert: &legacymodels.Alert{
ID: 1,
Name: "test",
},
ParsedSettings: &migrationStore.DashAlertSettings{},
func createTestDashAlert() *legacymodels.Alert {
return &legacymodels.Alert{
ID: 1,
Name: "test",
Settings: simplejson.New(),
}
}