From 206ddc89dcf59517a22055c6b90415e5f44972b3 Mon Sep 17 00:00:00 2001 From: Hannah Vernon Date: Mon, 9 Mar 2026 12:36:14 -0500 Subject: [PATCH 01/26] Add alert muting feature (both editions) Implement pattern-based and instance-based alert mute rules that suppress notifications while continuing to log alerts for auditability. Core: - MuteRule model with AND-logic matching across server, metric, database, query text, wait type, and job name fields - MuteRuleService with JSON persistence (Dashboard) and DuckDB persistence (Lite) - Optional expiration (1h/24h/7d/permanent) with automatic skip on expired rules Alert Pipeline: - Mute checks wired into all 7 alert metrics in both editions - Muted alerts logged with muted=true flag, notifications suppressed - DuckDB schema v20: muted column on config_alert_log, config_mute_rules table UI: - MuteRuleDialog for creating/editing rules with match criteria and expiration - ManageMuteRulesWindow with DataGrid listing, enable/disable/delete/purge - Settings button in both editions to access mute rule management - Right-click context menus in alert history: Mute This Alert / Mute Similar - Muted alerts shown with reduced opacity and italic text in alert history MCP: - get_mute_rules tool added to both editions' MCP servers - MuteRuleService registered in DI for MCP tool resolution Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Dashboard/Controls/AlertsHistoryContent.xaml | 11 + .../Controls/AlertsHistoryContent.xaml.cs | 53 +++- Dashboard/MainWindow.xaml.cs | 247 ++++++++++++------ Dashboard/ManageMuteRulesWindow.xaml | 80 ++++++ Dashboard/ManageMuteRulesWindow.xaml.cs | 102 ++++++++ Dashboard/Mcp/McpAlertTools.cs | 39 +++ Dashboard/Mcp/McpHostService.cs | 5 +- Dashboard/Models/MuteRule.cs | 86 ++++++ Dashboard/MuteRuleDialog.xaml | 111 ++++++++ Dashboard/MuteRuleDialog.xaml.cs | 124 +++++++++ Dashboard/Services/EmailAlertService.cs | 6 +- Dashboard/Services/MuteRuleService.cs | 144 ++++++++++ Dashboard/SettingsWindow.xaml | 8 + Dashboard/SettingsWindow.xaml.cs | 11 +- Lite.Tests/DuckDbSchemaTests.cs | 7 +- Lite/Controls/AlertsHistoryTab.xaml | 11 + Lite/Controls/AlertsHistoryTab.xaml.cs | 47 ++++ Lite/Database/DuckDbInitializer.cs | 15 +- Lite/Database/Schema.cs | 19 +- Lite/MainWindow.xaml.cs | 138 +++++++--- Lite/Mcp/McpAlertTools.cs | 39 +++ Lite/Mcp/McpHostService.cs | 5 +- Lite/Models/MuteRule.cs | 85 ++++++ Lite/Services/EmailAlertService.cs | 18 +- .../Services/LocalDataService.AlertHistory.cs | 10 +- Lite/Services/MuteRuleService.cs | 238 +++++++++++++++++ Lite/Windows/ManageMuteRulesWindow.xaml | 81 ++++++ Lite/Windows/ManageMuteRulesWindow.xaml.cs | 101 +++++++ Lite/Windows/MuteRuleDialog.xaml | 112 ++++++++ Lite/Windows/MuteRuleDialog.xaml.cs | 123 +++++++++ Lite/Windows/SettingsWindow.xaml | 10 + Lite/Windows/SettingsWindow.xaml.cs | 12 +- 32 files changed, 1952 insertions(+), 146 deletions(-) create mode 100644 Dashboard/ManageMuteRulesWindow.xaml create mode 100644 Dashboard/ManageMuteRulesWindow.xaml.cs create mode 100644 Dashboard/Models/MuteRule.cs create mode 100644 Dashboard/MuteRuleDialog.xaml create mode 100644 Dashboard/MuteRuleDialog.xaml.cs create mode 100644 Dashboard/Services/MuteRuleService.cs create mode 100644 Lite/Models/MuteRule.cs create mode 100644 Lite/Services/MuteRuleService.cs create mode 100644 Lite/Windows/ManageMuteRulesWindow.xaml create mode 100644 Lite/Windows/ManageMuteRulesWindow.xaml.cs create mode 100644 Lite/Windows/MuteRuleDialog.xaml create mode 100644 Lite/Windows/MuteRuleDialog.xaml.cs diff --git a/Dashboard/Controls/AlertsHistoryContent.xaml b/Dashboard/Controls/AlertsHistoryContent.xaml index b97fbc3d..944247a6 100644 --- a/Dashboard/Controls/AlertsHistoryContent.xaml +++ b/Dashboard/Controls/AlertsHistoryContent.xaml @@ -22,6 +22,13 @@ + + + + + + + diff --git a/Dashboard/Controls/AlertsHistoryContent.xaml.cs b/Dashboard/Controls/AlertsHistoryContent.xaml.cs index 22b9ed79..1f35bc17 100644 --- a/Dashboard/Controls/AlertsHistoryContent.xaml.cs +++ b/Dashboard/Controls/AlertsHistoryContent.xaml.cs @@ -25,6 +25,8 @@ public partial class AlertsHistoryContent : UserControl { public event EventHandler? AlertsDismissed; + public MuteRuleService? MuteRuleService { get; set; } + private List _allAlerts = new(); /* Column filter state */ @@ -71,7 +73,8 @@ private void LoadAlerts() IsResolved = e.MetricName.Contains("Cleared") || e.MetricName.Contains("Resolved"), IsCritical = e.MetricName.Contains("Deadlock") || e.MetricName.Contains("Poison"), IsWarning = !e.MetricName.Contains("Cleared") && !e.MetricName.Contains("Resolved") - && !e.MetricName.Contains("Deadlock") && !e.MetricName.Contains("Poison") + && !e.MetricName.Contains("Deadlock") && !e.MetricName.Contains("Poison"), + Muted = e.Muted }).ToList(); ApplyFilters(); @@ -432,6 +435,53 @@ private void ExportToCsv_Click(object sender, RoutedEventArgs e) } #endregion + + #region Mute Handlers + + private void MuteThisAlert_Click(object sender, RoutedEventArgs e) + { + if (MuteRuleService == null) return; + if (sender is not MenuItem menuItem) return; + var contextMenu = menuItem.Parent as ContextMenu; + var dataGrid = TabHelpers.FindDataGridFromContextMenu(contextMenu!); + if (dataGrid?.SelectedItem is not AlertHistoryDisplayItem item) return; + + var rule = new MuteRule + { + ServerName = item.ServerName, + MetricName = item.MetricName + }; + + var dialog = new MuteRuleDialog(rule) { Owner = Window.GetWindow(this) }; + if (dialog.ShowDialog() == true) + { + MuteRuleService.AddRule(dialog.Rule); + LoadAlerts(); + } + } + + private void MuteSimilarAlerts_Click(object sender, RoutedEventArgs e) + { + if (MuteRuleService == null) return; + if (sender is not MenuItem menuItem) return; + var contextMenu = menuItem.Parent as ContextMenu; + var dataGrid = TabHelpers.FindDataGridFromContextMenu(contextMenu!); + if (dataGrid?.SelectedItem is not AlertHistoryDisplayItem item) return; + + var rule = new MuteRule + { + MetricName = item.MetricName + }; + + var dialog = new MuteRuleDialog(rule) { Owner = Window.GetWindow(this) }; + if (dialog.ShowDialog() == true) + { + MuteRuleService.AddRule(dialog.Rule); + LoadAlerts(); + } + } + + #endregion } public class AlertHistoryDisplayItem @@ -447,5 +497,6 @@ public class AlertHistoryDisplayItem public bool IsResolved { get; set; } public bool IsCritical { get; set; } public bool IsWarning { get; set; } + public bool Muted { get; set; } } } diff --git a/Dashboard/MainWindow.xaml.cs b/Dashboard/MainWindow.xaml.cs index 54d9f5d3..00a5a676 100644 --- a/Dashboard/MainWindow.xaml.cs +++ b/Dashboard/MainWindow.xaml.cs @@ -41,6 +41,7 @@ public partial class MainWindow : Window private readonly DispatcherTimer _connectionStatusTimer; private NotificationService? _notificationService; private readonly AlertStateService _alertStateService; + private readonly MuteRuleService _muteRuleService; private readonly Dictionary _previousConnectionStates; private readonly Dictionary _tabBadges; private readonly Dictionary _latestHealthStatus; @@ -92,6 +93,7 @@ public MainWindow() _openTabs = new Dictionary(); _preferencesService = new UserPreferencesService(); _alertStateService = new AlertStateService(); + _muteRuleService = new MuteRuleService(); _serverListItems = new ObservableCollection(); _previousConnectionStates = new Dictionary(); _tabBadges = new Dictionary(); @@ -209,7 +211,7 @@ private async void StartMcpServerIfEnabled() return; } - _mcpHostService = new McpHostService(_serverManager, _credentialService, prefs.McpPort); + _mcpHostService = new McpHostService(_serverManager, _credentialService, _muteRuleService, prefs.McpPort); _mcpCts = new CancellationTokenSource(); _ = _mcpHostService.StartAsync(_mcpCts.Token); } @@ -673,6 +675,7 @@ private void OpenAlertsTab() } _alertsHistoryContent = new AlertsHistoryContent(); + _alertsHistoryContent.MuteRuleService = _muteRuleService; _alertsHistoryContent.AlertsDismissed += (_, _) => UpdateAlertBadge(); var headerPanel = new StackPanel { Orientation = Orientation.Horizontal }; @@ -993,7 +996,7 @@ private void Settings_Click(object sender, RoutedEventArgs e) bool wasEnabled = oldPrefs.McpEnabled; int oldPort = oldPrefs.McpPort; - var dialog = new SettingsWindow(_preferencesService); + var dialog = new SettingsWindow(_preferencesService, _muteRuleService); dialog.Owner = this; if (dialog.ShowDialog() == true) { @@ -1221,25 +1224,34 @@ private async Task EvaluateAlertConditionsAsync( _activeBlockingAlert[serverId] = true; if (!_lastBlockingAlert.TryGetValue(serverId, out var lastAlert) || (now - lastAlert) >= alertCooldown) { - _notificationService?.ShowBlockingNotification( - serverName, - (int)health.TotalBlocked, - (int)health.LongestBlockedSeconds); + var muteCtx = new AlertMuteContext { ServerName = serverName, MetricName = "Blocking Detected" }; + bool isMuted = _muteRuleService.IsAlertMuted(muteCtx); _lastBlockingAlert[serverId] = now; + if (!isMuted) + { + _notificationService?.ShowBlockingNotification( + serverName, + (int)health.TotalBlocked, + (int)health.LongestBlockedSeconds); + } + _emailAlertService.RecordAlert(serverId, serverName, "Blocking Detected", $"{(int)health.TotalBlocked} session(s), longest {(int)health.LongestBlockedSeconds}s", - $"{prefs.BlockingThresholdSeconds}s", true, "tray"); - - var blockingContext = await BuildBlockingContextAsync(databaseService, prefs.AlertExcludedDatabases); + $"{prefs.BlockingThresholdSeconds}s", !isMuted, isMuted ? "muted" : "tray", muted: isMuted); - await _emailAlertService.TrySendAlertEmailAsync( - "Blocking Detected", - serverName, - $"{(int)health.TotalBlocked} session(s), longest {(int)health.LongestBlockedSeconds}s", - $"{prefs.BlockingThresholdSeconds}s", - serverId, - blockingContext); + if (!isMuted) + { + var blockingContext = await BuildBlockingContextAsync(databaseService, prefs.AlertExcludedDatabases); + + await _emailAlertService.TrySendAlertEmailAsync( + "Blocking Detected", + serverName, + $"{(int)health.TotalBlocked} session(s), longest {(int)health.LongestBlockedSeconds}s", + $"{prefs.BlockingThresholdSeconds}s", + serverId, + blockingContext); + } } } else if (_activeBlockingAlert.TryRemove(serverId, out var wasBlocking) && wasBlocking) @@ -1272,24 +1284,33 @@ Falls back to the raw delta when no databases are excluded. */ _activeDeadlockAlert[serverId] = true; if (!_lastDeadlockAlert.TryGetValue(serverId, out var lastAlert) || (now - lastAlert) >= alertCooldown) { - _notificationService?.ShowDeadlockNotification( - serverName, - (int)effectiveDeadlockDelta); + var muteCtx = new AlertMuteContext { ServerName = serverName, MetricName = "Deadlocks Detected" }; + bool isMuted = _muteRuleService.IsAlertMuted(muteCtx); _lastDeadlockAlert[serverId] = now; + if (!isMuted) + { + _notificationService?.ShowDeadlockNotification( + serverName, + (int)effectiveDeadlockDelta); + } + _emailAlertService.RecordAlert(serverId, serverName, "Deadlocks Detected", effectiveDeadlockDelta.ToString(), - prefs.DeadlockThreshold.ToString(), true, "tray"); - - var deadlockContext = await BuildDeadlockContextAsync(databaseService, prefs.AlertExcludedDatabases); + prefs.DeadlockThreshold.ToString(), !isMuted, isMuted ? "muted" : "tray", muted: isMuted); - await _emailAlertService.TrySendAlertEmailAsync( - "Deadlocks Detected", - serverName, - effectiveDeadlockDelta.ToString(), - prefs.DeadlockThreshold.ToString(), - serverId, - deadlockContext); + if (!isMuted) + { + var deadlockContext = await BuildDeadlockContextAsync(databaseService, prefs.AlertExcludedDatabases); + + await _emailAlertService.TrySendAlertEmailAsync( + "Deadlocks Detected", + serverName, + effectiveDeadlockDelta.ToString(), + prefs.DeadlockThreshold.ToString(), + serverId, + deadlockContext); + } } } else if (_activeDeadlockAlert.TryRemove(serverId, out var wasDeadlock) && wasDeadlock) @@ -1311,21 +1332,30 @@ await _emailAlertService.TrySendAlertEmailAsync( _activeHighCpuAlert[serverId] = true; if (!_lastHighCpuAlert.TryGetValue(serverId, out var lastAlert) || (now - lastAlert) >= alertCooldown) { - _notificationService?.ShowHighCpuNotification( - serverName, - totalCpu); + var muteCtx = new AlertMuteContext { ServerName = serverName, MetricName = "High CPU" }; + bool isMuted = _muteRuleService.IsAlertMuted(muteCtx); _lastHighCpuAlert[serverId] = now; + if (!isMuted) + { + _notificationService?.ShowHighCpuNotification( + serverName, + totalCpu); + } + _emailAlertService.RecordAlert(serverId, serverName, "High CPU", $"{totalCpu:F0}%", - $"{prefs.CpuThresholdPercent}%", true, "tray"); + $"{prefs.CpuThresholdPercent}%", !isMuted, isMuted ? "muted" : "tray", muted: isMuted); - await _emailAlertService.TrySendAlertEmailAsync( - "High CPU", - serverName, - $"{totalCpu:F0}%", - $"{prefs.CpuThresholdPercent}%", - serverId); + if (!isMuted) + { + await _emailAlertService.TrySendAlertEmailAsync( + "High CPU", + serverName, + $"{totalCpu:F0}%", + $"{prefs.CpuThresholdPercent}%", + serverId); + } } } else if (_activeHighCpuAlert.TryRemove(serverId, out var wasCpu) && wasCpu) @@ -1348,23 +1378,33 @@ await _emailAlertService.TrySendAlertEmailAsync( if (!_lastPoisonWaitAlert.TryGetValue(serverId, out var lastAlert) || (now - lastAlert) >= alertCooldown) { var worst = triggeredWaits[0]; - _notificationService?.ShowPoisonWaitNotification(serverName, worst.WaitType, worst.AvgMsPerWait); + var allWaitNames = string.Join(", ", triggeredWaits.ConvertAll(w => $"{w.WaitType} ({w.AvgMsPerWait:F0}ms)")); + + var muteCtx = new AlertMuteContext { ServerName = serverName, MetricName = "Poison Wait", WaitType = worst.WaitType }; + bool isMuted = _muteRuleService.IsAlertMuted(muteCtx); _lastPoisonWaitAlert[serverId] = now; - var allWaitNames = string.Join(", ", triggeredWaits.ConvertAll(w => $"{w.WaitType} ({w.AvgMsPerWait:F0}ms)")); + if (!isMuted) + { + _notificationService?.ShowPoisonWaitNotification(serverName, worst.WaitType, worst.AvgMsPerWait); + } + _emailAlertService.RecordAlert(serverId, serverName, "Poison Wait", allWaitNames, - $"{prefs.PoisonWaitThresholdMs}ms avg", true, "tray"); - - var poisonContext = BuildPoisonWaitContext(triggeredWaits); + $"{prefs.PoisonWaitThresholdMs}ms avg", !isMuted, isMuted ? "muted" : "tray", muted: isMuted); - await _emailAlertService.TrySendAlertEmailAsync( - "Poison Wait", - serverName, - allWaitNames, - $"{prefs.PoisonWaitThresholdMs}ms avg", - serverId, - poisonContext); + if (!isMuted) + { + var poisonContext = BuildPoisonWaitContext(triggeredWaits); + + await _emailAlertService.TrySendAlertEmailAsync( + "Poison Wait", + serverName, + allWaitNames, + $"{prefs.PoisonWaitThresholdMs}ms avg", + serverId, + poisonContext); + } } } else if (_activePoisonWaitAlert.TryRemove(serverId, out var wasPoisonWait) && wasPoisonWait) @@ -1395,23 +1435,39 @@ await _emailAlertService.TrySendAlertEmailAsync( var worst = lrqList[0]; var elapsedMinutes = worst.ElapsedSeconds / 60; var preview = Truncate(worst.QueryText, 80); - _notificationService?.ShowLongRunningQueryNotification( - serverName, worst.SessionId, elapsedMinutes, preview); + + var muteCtx = new AlertMuteContext + { + ServerName = serverName, + MetricName = "Long-Running Query", + DatabaseName = worst.DatabaseName, + QueryText = worst.QueryText + }; + bool isMuted = _muteRuleService.IsAlertMuted(muteCtx); _lastLongRunningQueryAlert[serverId] = now; + if (!isMuted) + { + _notificationService?.ShowLongRunningQueryNotification( + serverName, worst.SessionId, elapsedMinutes, preview); + } + _emailAlertService.RecordAlert(serverId, serverName, "Long-Running Query", $"Session #{worst.SessionId} running {elapsedMinutes}m", - $"{prefs.LongRunningQueryThresholdMinutes}m", true, "tray"); - - var lrqContext = BuildLongRunningQueryContext(lrqList); + $"{prefs.LongRunningQueryThresholdMinutes}m", !isMuted, isMuted ? "muted" : "tray", muted: isMuted); - await _emailAlertService.TrySendAlertEmailAsync( - "Long-Running Query", - serverName, - $"{lrqList.Count} query(s), longest {elapsedMinutes}m", - $"{prefs.LongRunningQueryThresholdMinutes}m", - serverId, - lrqContext); + if (!isMuted) + { + var lrqContext = BuildLongRunningQueryContext(lrqList); + + await _emailAlertService.TrySendAlertEmailAsync( + "Long-Running Query", + serverName, + $"{lrqList.Count} query(s), longest {elapsedMinutes}m", + $"{prefs.LongRunningQueryThresholdMinutes}m", + serverId, + lrqContext); + } } } else if (_activeLongRunningQueryAlert.TryRemove(serverId, out var wasLongRunning) && wasLongRunning) @@ -1433,22 +1489,31 @@ await _emailAlertService.TrySendAlertEmailAsync( _activeTempDbSpaceAlert[serverId] = true; if (!_lastTempDbSpaceAlert.TryGetValue(serverId, out var lastAlert) || (now - lastAlert) >= alertCooldown) { - _notificationService?.ShowTempDbSpaceNotification(serverName, tempDb.UsedPercent); + var muteCtx = new AlertMuteContext { ServerName = serverName, MetricName = "TempDB Space" }; + bool isMuted = _muteRuleService.IsAlertMuted(muteCtx); _lastTempDbSpaceAlert[serverId] = now; + if (!isMuted) + { + _notificationService?.ShowTempDbSpaceNotification(serverName, tempDb.UsedPercent); + } + _emailAlertService.RecordAlert(serverId, serverName, "TempDB Space", $"{tempDb.UsedPercent:F0}% used ({tempDb.TotalReservedMb:F0} MB)", - $"{prefs.TempDbSpaceThresholdPercent}%", true, "tray"); + $"{prefs.TempDbSpaceThresholdPercent}%", !isMuted, isMuted ? "muted" : "tray", muted: isMuted); - var tempDbContext = BuildTempDbSpaceContext(tempDb); - - await _emailAlertService.TrySendAlertEmailAsync( - "TempDB Space", - serverName, - $"{tempDb.UsedPercent:F0}% used ({tempDb.TotalReservedMb:F0} MB)", - $"{prefs.TempDbSpaceThresholdPercent}%", - serverId, - tempDbContext); + if (!isMuted) + { + var tempDbContext = BuildTempDbSpaceContext(tempDb); + + await _emailAlertService.TrySendAlertEmailAsync( + "TempDB Space", + serverName, + $"{tempDb.UsedPercent:F0}% used ({tempDb.TotalReservedMb:F0} MB)", + $"{prefs.TempDbSpaceThresholdPercent}%", + serverId, + tempDbContext); + } } } else if (_activeTempDbSpaceAlert.TryRemove(serverId, out var wasTempDb) && wasTempDb) @@ -1473,23 +1538,33 @@ await _emailAlertService.TrySendAlertEmailAsync( if (!_lastLongRunningJobAlert.TryGetValue(jobKey, out var lastAlert) || (now - lastAlert) >= alertCooldown) { var currentMinutes = worst.CurrentDurationSeconds / 60; - _notificationService?.ShowLongRunningJobNotification( - serverName, worst.JobName, currentMinutes, worst.PercentOfAverage ?? 0); + + var muteCtx = new AlertMuteContext { ServerName = serverName, MetricName = "Long-Running Job", JobName = worst.JobName }; + bool isMuted = _muteRuleService.IsAlertMuted(muteCtx); _lastLongRunningJobAlert[jobKey] = now; + if (!isMuted) + { + _notificationService?.ShowLongRunningJobNotification( + serverName, worst.JobName, currentMinutes, worst.PercentOfAverage ?? 0); + } + _emailAlertService.RecordAlert(serverId, serverName, "Long-Running Job", $"{worst.JobName} at {worst.PercentOfAverage:F0}% of avg ({currentMinutes}m)", - $"{prefs.LongRunningJobMultiplier}x avg", true, "tray"); + $"{prefs.LongRunningJobMultiplier}x avg", !isMuted, isMuted ? "muted" : "tray", muted: isMuted); - var jobContext = BuildAnomalousJobContext(health.AnomalousJobs); - - await _emailAlertService.TrySendAlertEmailAsync( - "Long-Running Job", - serverName, - $"{health.AnomalousJobs.Count} job(s) exceeding {prefs.LongRunningJobMultiplier}x average", - $"{prefs.LongRunningJobMultiplier}x historical avg", - serverId, - jobContext); + if (!isMuted) + { + var jobContext = BuildAnomalousJobContext(health.AnomalousJobs); + + await _emailAlertService.TrySendAlertEmailAsync( + "Long-Running Job", + serverName, + $"{health.AnomalousJobs.Count} job(s) exceeding {prefs.LongRunningJobMultiplier}x average", + $"{prefs.LongRunningJobMultiplier}x historical avg", + serverId, + jobContext); + } } } else if (_activeLongRunningJobAlert.TryRemove(serverId, out var wasJob) && wasJob) diff --git a/Dashboard/ManageMuteRulesWindow.xaml b/Dashboard/ManageMuteRulesWindow.xaml new file mode 100644 index 00000000..1275574b --- /dev/null +++ b/Dashboard/ManageMuteRulesWindow.xaml @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +